GLAkavya commited on
Commit
7dfb42b
Β·
verified Β·
1 Parent(s): 6595df8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -46
app.py CHANGED
@@ -12,63 +12,45 @@ from diffusers import StableVideoDiffusionPipeline
12
  from diffusers.utils import export_to_video
13
 
14
  # ── ENV SETUP ────────────────────────────────────────────────────────────────
15
- HF_TOKEN = (
16
- os.environ.get("HF_TOKEN", "")
17
- or os.environ.get("HF_KEY", "")
 
18
  ).strip()
19
-
20
- if HF_TOKEN:
21
- login(token=HF_TOKEN)
22
- print(f"βœ… HF login OK β€” token starts: {HF_TOKEN[:8]}...")
23
  else:
24
- print("⚠️ No HF token found.")
25
 
26
- # ── DEBUG: Print ALL env vars that contain KEY or TOKEN ──────────────────────
27
- print("\n=== ENV DEBUG (secrets check) ===")
28
- for k, v in os.environ.items():
29
- if any(word in k.upper() for word in ["KEY", "TOKEN", "GEMINI", "API"]):
30
- masked = v[:6] + "..." + v[-4:] if len(v) > 10 else "TOO_SHORT"
31
- print(f" {k} = {masked} (len={len(v)})")
32
- print("=================================\n")
33
 
34
- # ── DEVICE SETUP ─────────────────────────────────────────────────────────────
35
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
36
  DTYPE = torch.float16 if DEVICE == "cuda" else torch.float32
37
- print(f"πŸ–₯️ Using device: {DEVICE}")
38
 
39
- # ── LOAD SVD MODEL ONCE ──────────────────────────────────────────────────────
40
- print("⏳ Loading Stable Video Diffusion …")
41
  svd_pipe = StableVideoDiffusionPipeline.from_pretrained(
42
  "stabilityai/stable-video-diffusion-img2vid-xt",
43
  torch_dtype=DTYPE,
44
  variant="fp16" if DEVICE == "cuda" else None,
45
  )
46
  svd_pipe = svd_pipe.to(DEVICE)
47
- print("βœ… SVD model ready.")
48
 
49
 
50
- # ── GEMINI HELPER ─────────────────────────────────────────────────────────────
51
  def call_gemini(pil_image: Image.Image, user_desc: str, language: str, style: str) -> dict:
52
- # Try every possible env var name user might have used
53
- api_key = (
54
- os.environ.get("GEMINI_API_KEY", "")
55
- or os.environ.get("GEMINI_KEY", "")
56
- or os.environ.get("GOOGLE_API_KEY", "")
57
- or os.environ.get("API_KEY", "")
58
- ).strip()
59
-
60
- print(f"[Gemini] Using key: {api_key[:6]}...{api_key[-4:]} (len={len(api_key)})")
61
-
62
- if not api_key:
63
- raise ValueError(
64
- "No Gemini API key found! Tried: GEMINI_API_KEY, GEMINI_KEY, "
65
- "GOOGLE_API_KEY, API_KEY. Please set one in Space Secrets."
66
- )
67
-
68
- if len(api_key) < 20:
69
- raise ValueError(f"Key looks too short (len={len(api_key)}). Check for copy-paste errors.")
70
-
71
- client = genai.Client(api_key=api_key)
72
 
73
  lang_map = {
74
  "English": "Write everything in English.",
@@ -87,7 +69,7 @@ def call_gemini(pil_image: Image.Image, user_desc: str, language: str, style: st
87
  Language rule : {lang_map.get(language, lang_map['English'])}
88
  Style rule : {style_map.get(style, style_map['Fun'])}
89
 
90
- CRITICAL: Return ONLY a raw JSON object. No markdown. No ```json. No explanation. Pure JSON only.
91
  {{
92
  "hook": "attention-grabbing opening line (1-2 sentences)",
93
  "script": "full 15-20 second voiceover script",
@@ -117,7 +99,7 @@ CRITICAL: Return ONLY a raw JSON object. No markdown. No ```json. No explanation
117
  return json.loads(raw)
118
 
119
 
120
- # ── VIDEO GENERATION ──────────────────────────────────────────────────────────
121
  def generate_video(pil_image: Image.Image) -> str:
122
  img = pil_image.convert("RGB").resize((1024, 576))
123
  frames = svd_pipe(
@@ -132,7 +114,7 @@ def generate_video(pil_image: Image.Image) -> str:
132
  return tmp.name
133
 
134
 
135
- # ── MAIN PIPELINE ─────────────────────────────────────────────────────────────
136
  def generate_ad(image, user_desc, language, style):
137
  if image is None:
138
  return None, "⚠️ Please upload a product image.", "", ""
@@ -156,7 +138,7 @@ def generate_ad(image, user_desc, language, style):
156
  return video_path, hook, script, cta
157
 
158
 
159
- # ── GRADIO UI ─────────────────────────────────────────────────────────────────
160
  css = """
161
  #title { text-align:center; font-size:2.2rem; font-weight:800; margin-bottom:.2rem; }
162
  #sub { text-align:center; color:#888; margin-bottom:1.5rem; }
@@ -201,7 +183,7 @@ with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue="violet")) as demo:
201
  gr.Markdown(
202
  "---\n**How it works:** "
203
  "1️⃣ Gemini 2.5 Flash reads your image β†’ hook, script, CTA. "
204
- "2️⃣ Stable Video Diffusion β†’ short cinematic video clip. "
205
  "3️⃣ Ready-to-post reel! πŸŽ‰"
206
  )
207
 
 
12
  from diffusers.utils import export_to_video
13
 
14
  # ── ENV SETUP ────────────────────────────────────────────────────────────────
15
+ # genai.Client() auto-reads GOOGLE_API_KEY β€” so we map our secret to it
16
+ gemini_key = (
17
+ os.environ.get("GEMINI_API_KEY", "")
18
+ or os.environ.get("GOOGLE_API_KEY", "")
19
  ).strip()
20
+ if gemini_key:
21
+ os.environ["GOOGLE_API_KEY"] = gemini_key # ensure Client() finds it
22
+ print(f"βœ… Gemini key loaded: {gemini_key[:6]}...{gemini_key[-4:]} (len={len(gemini_key)})")
 
23
  else:
24
+ print("❌ No Gemini key found!")
25
 
26
+ hf_token = (
27
+ os.environ.get("HF_TOKEN", "")
28
+ or os.environ.get("HF_KEY", "")
29
+ ).strip()
30
+ if hf_token:
31
+ login(token=hf_token)
32
+ print("βœ… HuggingFace login OK")
33
 
34
+ # ── DEVICE ───────────────────────────────────────────────────────────────────
35
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
36
  DTYPE = torch.float16 if DEVICE == "cuda" else torch.float32
37
+ print(f"πŸ–₯️ Device: {DEVICE}")
38
 
39
+ # ── LOAD SVD ONCE ─────────────────────────────────────────────────────────────
40
+ print("⏳ Loading Stable Video Diffusion …")
41
  svd_pipe = StableVideoDiffusionPipeline.from_pretrained(
42
  "stabilityai/stable-video-diffusion-img2vid-xt",
43
  torch_dtype=DTYPE,
44
  variant="fp16" if DEVICE == "cuda" else None,
45
  )
46
  svd_pipe = svd_pipe.to(DEVICE)
47
+ print("βœ… SVD ready.")
48
 
49
 
50
+ # ── GEMINI ────────────────────────────────────────────────────────────────────
51
  def call_gemini(pil_image: Image.Image, user_desc: str, language: str, style: str) -> dict:
52
+ # genai.Client() reads GOOGLE_API_KEY automatically β€” no manual key passing!
53
+ client = genai.Client()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  lang_map = {
56
  "English": "Write everything in English.",
 
69
  Language rule : {lang_map.get(language, lang_map['English'])}
70
  Style rule : {style_map.get(style, style_map['Fun'])}
71
 
72
+ CRITICAL: Return ONLY raw JSON. No markdown. No ```json. No explanation. Pure JSON only.
73
  {{
74
  "hook": "attention-grabbing opening line (1-2 sentences)",
75
  "script": "full 15-20 second voiceover script",
 
99
  return json.loads(raw)
100
 
101
 
102
+ # ── VIDEO ─────────────────────────────────────────────────────────────────────
103
  def generate_video(pil_image: Image.Image) -> str:
104
  img = pil_image.convert("RGB").resize((1024, 576))
105
  frames = svd_pipe(
 
114
  return tmp.name
115
 
116
 
117
+ # ── PIPELINE ──────────────────────────────────────────────────────────────────
118
  def generate_ad(image, user_desc, language, style):
119
  if image is None:
120
  return None, "⚠️ Please upload a product image.", "", ""
 
138
  return video_path, hook, script, cta
139
 
140
 
141
+ # ── UI ────────────────────────────────────────────────────────────────────────
142
  css = """
143
  #title { text-align:center; font-size:2.2rem; font-weight:800; margin-bottom:.2rem; }
144
  #sub { text-align:center; color:#888; margin-bottom:1.5rem; }
 
183
  gr.Markdown(
184
  "---\n**How it works:** "
185
  "1️⃣ Gemini 2.5 Flash reads your image β†’ hook, script, CTA. "
186
+ "2️⃣ Stable Video Diffusion β†’ short cinematic clip. "
187
  "3️⃣ Ready-to-post reel! πŸŽ‰"
188
  )
189