Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,7 +12,6 @@ from diffusers import StableVideoDiffusionPipeline
|
|
| 12 |
from diffusers.utils import export_to_video
|
| 13 |
|
| 14 |
# ββ ENV SETUP ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 15 |
-
# Support BOTH HF_TOKEN and HF_KEY (screenshot shows HF_KEY)
|
| 16 |
HF_TOKEN = (
|
| 17 |
os.environ.get("HF_TOKEN", "")
|
| 18 |
or os.environ.get("HF_KEY", "")
|
|
@@ -20,9 +19,17 @@ HF_TOKEN = (
|
|
| 20 |
|
| 21 |
if HF_TOKEN:
|
| 22 |
login(token=HF_TOKEN)
|
| 23 |
-
print("β
|
| 24 |
else:
|
| 25 |
-
print("β οΈ No HF token found
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
# ββ DEVICE SETUP βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 28 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
@@ -42,10 +49,24 @@ print("β
SVD model ready.")
|
|
| 42 |
|
| 43 |
# ββ GEMINI HELPER βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 44 |
def call_gemini(pil_image: Image.Image, user_desc: str, language: str, style: str) -> dict:
|
| 45 |
-
#
|
| 46 |
-
api_key =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
if not api_key:
|
| 48 |
-
raise ValueError(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
client = genai.Client(api_key=api_key)
|
| 51 |
|
|
@@ -66,7 +87,7 @@ def call_gemini(pil_image: Image.Image, user_desc: str, language: str, style: st
|
|
| 66 |
Language rule : {lang_map.get(language, lang_map['English'])}
|
| 67 |
Style rule : {style_map.get(style, style_map['Fun'])}
|
| 68 |
|
| 69 |
-
CRITICAL: Return ONLY a raw JSON object. No markdown
|
| 70 |
{{
|
| 71 |
"hook": "attention-grabbing opening line (1-2 sentences)",
|
| 72 |
"script": "full 15-20 second voiceover script",
|
|
@@ -79,7 +100,7 @@ CRITICAL: Return ONLY a raw JSON object. No markdown fences. No ```json. No expl
|
|
| 79 |
image_bytes = buf.getvalue()
|
| 80 |
|
| 81 |
response = client.models.generate_content(
|
| 82 |
-
model="gemini-2.5-flash",
|
| 83 |
contents=[
|
| 84 |
types.Part.from_bytes(data=image_bytes, mime_type="image/jpeg"),
|
| 85 |
types.Part.from_text(text=prompt),
|
|
@@ -87,8 +108,6 @@ CRITICAL: Return ONLY a raw JSON object. No markdown fences. No ```json. No expl
|
|
| 87 |
)
|
| 88 |
|
| 89 |
raw = response.text.strip()
|
| 90 |
-
|
| 91 |
-
# Strip markdown fences if present
|
| 92 |
if "```" in raw:
|
| 93 |
raw = raw.split("```")[1]
|
| 94 |
if raw.lower().startswith("json"):
|
|
@@ -101,7 +120,6 @@ CRITICAL: Return ONLY a raw JSON object. No markdown fences. No ```json. No expl
|
|
| 101 |
# ββ VIDEO GENERATION ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 102 |
def generate_video(pil_image: Image.Image) -> str:
|
| 103 |
img = pil_image.convert("RGB").resize((1024, 576))
|
| 104 |
-
|
| 105 |
frames = svd_pipe(
|
| 106 |
image=img,
|
| 107 |
num_frames=14,
|
|
@@ -109,7 +127,6 @@ def generate_video(pil_image: Image.Image) -> str:
|
|
| 109 |
decode_chunk_size=4,
|
| 110 |
generator=torch.manual_seed(42),
|
| 111 |
).frames[0]
|
| 112 |
-
|
| 113 |
tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
|
| 114 |
export_to_video(frames, tmp.name, fps=7)
|
| 115 |
return tmp.name
|
|
@@ -122,7 +139,6 @@ def generate_ad(image, user_desc, language, style):
|
|
| 122 |
|
| 123 |
pil_image = image if isinstance(image, Image.Image) else Image.fromarray(image)
|
| 124 |
|
| 125 |
-
# STEP 1 β Gemini ad copy
|
| 126 |
try:
|
| 127 |
ad_data = call_gemini(pil_image, user_desc or "", language, style)
|
| 128 |
except Exception as e:
|
|
@@ -132,7 +148,6 @@ def generate_ad(image, user_desc, language, style):
|
|
| 132 |
script = ad_data.get("script", "")
|
| 133 |
cta = ad_data.get("cta", "")
|
| 134 |
|
| 135 |
-
# STEP 2 β Video generation
|
| 136 |
try:
|
| 137 |
video_path = generate_video(pil_image)
|
| 138 |
except Exception as e:
|
|
@@ -154,10 +169,8 @@ with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue="violet")) as demo:
|
|
| 154 |
|
| 155 |
with gr.Row():
|
| 156 |
with gr.Column(scale=1):
|
| 157 |
-
image_input = gr.Image(
|
| 158 |
-
|
| 159 |
-
)
|
| 160 |
-
desc_input = gr.Textbox(
|
| 161 |
label="π Describe your product (optional)",
|
| 162 |
placeholder="e.g. Organic honey sourced from Himalayan farms β¦",
|
| 163 |
lines=3,
|
|
@@ -187,8 +200,8 @@ with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue="violet")) as demo:
|
|
| 187 |
|
| 188 |
gr.Markdown(
|
| 189 |
"---\n**How it works:** "
|
| 190 |
-
"1οΈβ£ Gemini
|
| 191 |
-
"2οΈβ£ Stable Video Diffusion β short cinematic video. "
|
| 192 |
"3οΈβ£ Ready-to-post reel! π"
|
| 193 |
)
|
| 194 |
|
|
|
|
| 12 |
from diffusers.utils import export_to_video
|
| 13 |
|
| 14 |
# ββ ENV SETUP ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 15 |
HF_TOKEN = (
|
| 16 |
os.environ.get("HF_TOKEN", "")
|
| 17 |
or os.environ.get("HF_KEY", "")
|
|
|
|
| 19 |
|
| 20 |
if HF_TOKEN:
|
| 21 |
login(token=HF_TOKEN)
|
| 22 |
+
print(f"β
HF login OK β token starts: {HF_TOKEN[:8]}...")
|
| 23 |
else:
|
| 24 |
+
print("β οΈ No HF token found.")
|
| 25 |
+
|
| 26 |
+
# ββ DEBUG: Print ALL env vars that contain KEY or TOKEN ββββββββββββββββββββββ
|
| 27 |
+
print("\n=== ENV DEBUG (secrets check) ===")
|
| 28 |
+
for k, v in os.environ.items():
|
| 29 |
+
if any(word in k.upper() for word in ["KEY", "TOKEN", "GEMINI", "API"]):
|
| 30 |
+
masked = v[:6] + "..." + v[-4:] if len(v) > 10 else "TOO_SHORT"
|
| 31 |
+
print(f" {k} = {masked} (len={len(v)})")
|
| 32 |
+
print("=================================\n")
|
| 33 |
|
| 34 |
# ββ DEVICE SETUP βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 35 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
| 49 |
|
| 50 |
# ββ GEMINI HELPER βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 51 |
def call_gemini(pil_image: Image.Image, user_desc: str, language: str, style: str) -> dict:
|
| 52 |
+
# Try every possible env var name user might have used
|
| 53 |
+
api_key = (
|
| 54 |
+
os.environ.get("GEMINI_API_KEY", "")
|
| 55 |
+
or os.environ.get("GEMINI_KEY", "")
|
| 56 |
+
or os.environ.get("GOOGLE_API_KEY", "")
|
| 57 |
+
or os.environ.get("API_KEY", "")
|
| 58 |
+
).strip()
|
| 59 |
+
|
| 60 |
+
print(f"[Gemini] Using key: {api_key[:6]}...{api_key[-4:]} (len={len(api_key)})")
|
| 61 |
+
|
| 62 |
if not api_key:
|
| 63 |
+
raise ValueError(
|
| 64 |
+
"No Gemini API key found! Tried: GEMINI_API_KEY, GEMINI_KEY, "
|
| 65 |
+
"GOOGLE_API_KEY, API_KEY. Please set one in Space Secrets."
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
if len(api_key) < 20:
|
| 69 |
+
raise ValueError(f"Key looks too short (len={len(api_key)}). Check for copy-paste errors.")
|
| 70 |
|
| 71 |
client = genai.Client(api_key=api_key)
|
| 72 |
|
|
|
|
| 87 |
Language rule : {lang_map.get(language, lang_map['English'])}
|
| 88 |
Style rule : {style_map.get(style, style_map['Fun'])}
|
| 89 |
|
| 90 |
+
CRITICAL: Return ONLY a raw JSON object. No markdown. No ```json. No explanation. Pure JSON only.
|
| 91 |
{{
|
| 92 |
"hook": "attention-grabbing opening line (1-2 sentences)",
|
| 93 |
"script": "full 15-20 second voiceover script",
|
|
|
|
| 100 |
image_bytes = buf.getvalue()
|
| 101 |
|
| 102 |
response = client.models.generate_content(
|
| 103 |
+
model="gemini-2.5-flash",
|
| 104 |
contents=[
|
| 105 |
types.Part.from_bytes(data=image_bytes, mime_type="image/jpeg"),
|
| 106 |
types.Part.from_text(text=prompt),
|
|
|
|
| 108 |
)
|
| 109 |
|
| 110 |
raw = response.text.strip()
|
|
|
|
|
|
|
| 111 |
if "```" in raw:
|
| 112 |
raw = raw.split("```")[1]
|
| 113 |
if raw.lower().startswith("json"):
|
|
|
|
| 120 |
# ββ VIDEO GENERATION ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 121 |
def generate_video(pil_image: Image.Image) -> str:
|
| 122 |
img = pil_image.convert("RGB").resize((1024, 576))
|
|
|
|
| 123 |
frames = svd_pipe(
|
| 124 |
image=img,
|
| 125 |
num_frames=14,
|
|
|
|
| 127 |
decode_chunk_size=4,
|
| 128 |
generator=torch.manual_seed(42),
|
| 129 |
).frames[0]
|
|
|
|
| 130 |
tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
|
| 131 |
export_to_video(frames, tmp.name, fps=7)
|
| 132 |
return tmp.name
|
|
|
|
| 139 |
|
| 140 |
pil_image = image if isinstance(image, Image.Image) else Image.fromarray(image)
|
| 141 |
|
|
|
|
| 142 |
try:
|
| 143 |
ad_data = call_gemini(pil_image, user_desc or "", language, style)
|
| 144 |
except Exception as e:
|
|
|
|
| 148 |
script = ad_data.get("script", "")
|
| 149 |
cta = ad_data.get("cta", "")
|
| 150 |
|
|
|
|
| 151 |
try:
|
| 152 |
video_path = generate_video(pil_image)
|
| 153 |
except Exception as e:
|
|
|
|
| 169 |
|
| 170 |
with gr.Row():
|
| 171 |
with gr.Column(scale=1):
|
| 172 |
+
image_input = gr.Image(label="πΈ Upload Product Image", type="pil", height=300)
|
| 173 |
+
desc_input = gr.Textbox(
|
|
|
|
|
|
|
| 174 |
label="π Describe your product (optional)",
|
| 175 |
placeholder="e.g. Organic honey sourced from Himalayan farms β¦",
|
| 176 |
lines=3,
|
|
|
|
| 200 |
|
| 201 |
gr.Markdown(
|
| 202 |
"---\n**How it works:** "
|
| 203 |
+
"1οΈβ£ Gemini 2.5 Flash reads your image β hook, script, CTA. "
|
| 204 |
+
"2οΈβ£ Stable Video Diffusion β short cinematic video clip. "
|
| 205 |
"3οΈβ£ Ready-to-post reel! π"
|
| 206 |
)
|
| 207 |
|