Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,63 +12,45 @@ from diffusers import StableVideoDiffusionPipeline
|
|
| 12 |
from diffusers.utils import export_to_video
|
| 13 |
|
| 14 |
# ββ ENV SETUP ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
|
|
|
| 18 |
).strip()
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
print(f"β
HF login OK β token starts: {HF_TOKEN[:8]}...")
|
| 23 |
else:
|
| 24 |
-
print("
|
| 25 |
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
print("
|
| 33 |
|
| 34 |
-
# ββ DEVICE
|
| 35 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 36 |
DTYPE = torch.float16 if DEVICE == "cuda" else torch.float32
|
| 37 |
-
print(f"π₯οΈ
|
| 38 |
|
| 39 |
-
# ββ LOAD SVD
|
| 40 |
-
print("β³
|
| 41 |
svd_pipe = StableVideoDiffusionPipeline.from_pretrained(
|
| 42 |
"stabilityai/stable-video-diffusion-img2vid-xt",
|
| 43 |
torch_dtype=DTYPE,
|
| 44 |
variant="fp16" if DEVICE == "cuda" else None,
|
| 45 |
)
|
| 46 |
svd_pipe = svd_pipe.to(DEVICE)
|
| 47 |
-
print("β
|
| 48 |
|
| 49 |
|
| 50 |
-
# ββ GEMINI
|
| 51 |
def call_gemini(pil_image: Image.Image, user_desc: str, language: str, style: str) -> dict:
|
| 52 |
-
#
|
| 53 |
-
|
| 54 |
-
os.environ.get("GEMINI_API_KEY", "")
|
| 55 |
-
or os.environ.get("GEMINI_KEY", "")
|
| 56 |
-
or os.environ.get("GOOGLE_API_KEY", "")
|
| 57 |
-
or os.environ.get("API_KEY", "")
|
| 58 |
-
).strip()
|
| 59 |
-
|
| 60 |
-
print(f"[Gemini] Using key: {api_key[:6]}...{api_key[-4:]} (len={len(api_key)})")
|
| 61 |
-
|
| 62 |
-
if not api_key:
|
| 63 |
-
raise ValueError(
|
| 64 |
-
"No Gemini API key found! Tried: GEMINI_API_KEY, GEMINI_KEY, "
|
| 65 |
-
"GOOGLE_API_KEY, API_KEY. Please set one in Space Secrets."
|
| 66 |
-
)
|
| 67 |
-
|
| 68 |
-
if len(api_key) < 20:
|
| 69 |
-
raise ValueError(f"Key looks too short (len={len(api_key)}). Check for copy-paste errors.")
|
| 70 |
-
|
| 71 |
-
client = genai.Client(api_key=api_key)
|
| 72 |
|
| 73 |
lang_map = {
|
| 74 |
"English": "Write everything in English.",
|
|
@@ -87,7 +69,7 @@ def call_gemini(pil_image: Image.Image, user_desc: str, language: str, style: st
|
|
| 87 |
Language rule : {lang_map.get(language, lang_map['English'])}
|
| 88 |
Style rule : {style_map.get(style, style_map['Fun'])}
|
| 89 |
|
| 90 |
-
CRITICAL: Return ONLY
|
| 91 |
{{
|
| 92 |
"hook": "attention-grabbing opening line (1-2 sentences)",
|
| 93 |
"script": "full 15-20 second voiceover script",
|
|
@@ -117,7 +99,7 @@ CRITICAL: Return ONLY a raw JSON object. No markdown. No ```json. No explanation
|
|
| 117 |
return json.loads(raw)
|
| 118 |
|
| 119 |
|
| 120 |
-
# ββ VIDEO
|
| 121 |
def generate_video(pil_image: Image.Image) -> str:
|
| 122 |
img = pil_image.convert("RGB").resize((1024, 576))
|
| 123 |
frames = svd_pipe(
|
|
@@ -132,7 +114,7 @@ def generate_video(pil_image: Image.Image) -> str:
|
|
| 132 |
return tmp.name
|
| 133 |
|
| 134 |
|
| 135 |
-
# ββ
|
| 136 |
def generate_ad(image, user_desc, language, style):
|
| 137 |
if image is None:
|
| 138 |
return None, "β οΈ Please upload a product image.", "", ""
|
|
@@ -156,7 +138,7 @@ def generate_ad(image, user_desc, language, style):
|
|
| 156 |
return video_path, hook, script, cta
|
| 157 |
|
| 158 |
|
| 159 |
-
# ββ
|
| 160 |
css = """
|
| 161 |
#title { text-align:center; font-size:2.2rem; font-weight:800; margin-bottom:.2rem; }
|
| 162 |
#sub { text-align:center; color:#888; margin-bottom:1.5rem; }
|
|
@@ -201,7 +183,7 @@ with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue="violet")) as demo:
|
|
| 201 |
gr.Markdown(
|
| 202 |
"---\n**How it works:** "
|
| 203 |
"1οΈβ£ Gemini 2.5 Flash reads your image β hook, script, CTA. "
|
| 204 |
-
"2οΈβ£ Stable Video Diffusion β short cinematic
|
| 205 |
"3οΈβ£ Ready-to-post reel! π"
|
| 206 |
)
|
| 207 |
|
|
|
|
| 12 |
from diffusers.utils import export_to_video
|
| 13 |
|
| 14 |
# ββ ENV SETUP ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 15 |
+
# genai.Client() auto-reads GOOGLE_API_KEY β so we map our secret to it
|
| 16 |
+
gemini_key = (
|
| 17 |
+
os.environ.get("GEMINI_API_KEY", "")
|
| 18 |
+
or os.environ.get("GOOGLE_API_KEY", "")
|
| 19 |
).strip()
|
| 20 |
+
if gemini_key:
|
| 21 |
+
os.environ["GOOGLE_API_KEY"] = gemini_key # ensure Client() finds it
|
| 22 |
+
print(f"β
Gemini key loaded: {gemini_key[:6]}...{gemini_key[-4:]} (len={len(gemini_key)})")
|
|
|
|
| 23 |
else:
|
| 24 |
+
print("β No Gemini key found!")
|
| 25 |
|
| 26 |
+
hf_token = (
|
| 27 |
+
os.environ.get("HF_TOKEN", "")
|
| 28 |
+
or os.environ.get("HF_KEY", "")
|
| 29 |
+
).strip()
|
| 30 |
+
if hf_token:
|
| 31 |
+
login(token=hf_token)
|
| 32 |
+
print("β
HuggingFace login OK")
|
| 33 |
|
| 34 |
+
# ββ DEVICE βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 35 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 36 |
DTYPE = torch.float16 if DEVICE == "cuda" else torch.float32
|
| 37 |
+
print(f"π₯οΈ Device: {DEVICE}")
|
| 38 |
|
| 39 |
+
# ββ LOAD SVD ONCE βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 40 |
+
print("β³ Loading Stable Video Diffusion β¦")
|
| 41 |
svd_pipe = StableVideoDiffusionPipeline.from_pretrained(
|
| 42 |
"stabilityai/stable-video-diffusion-img2vid-xt",
|
| 43 |
torch_dtype=DTYPE,
|
| 44 |
variant="fp16" if DEVICE == "cuda" else None,
|
| 45 |
)
|
| 46 |
svd_pipe = svd_pipe.to(DEVICE)
|
| 47 |
+
print("β
SVD ready.")
|
| 48 |
|
| 49 |
|
| 50 |
+
# ββ GEMINI ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 51 |
def call_gemini(pil_image: Image.Image, user_desc: str, language: str, style: str) -> dict:
|
| 52 |
+
# genai.Client() reads GOOGLE_API_KEY automatically β no manual key passing!
|
| 53 |
+
client = genai.Client()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
lang_map = {
|
| 56 |
"English": "Write everything in English.",
|
|
|
|
| 69 |
Language rule : {lang_map.get(language, lang_map['English'])}
|
| 70 |
Style rule : {style_map.get(style, style_map['Fun'])}
|
| 71 |
|
| 72 |
+
CRITICAL: Return ONLY raw JSON. No markdown. No ```json. No explanation. Pure JSON only.
|
| 73 |
{{
|
| 74 |
"hook": "attention-grabbing opening line (1-2 sentences)",
|
| 75 |
"script": "full 15-20 second voiceover script",
|
|
|
|
| 99 |
return json.loads(raw)
|
| 100 |
|
| 101 |
|
| 102 |
+
# ββ VIDEO βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 103 |
def generate_video(pil_image: Image.Image) -> str:
|
| 104 |
img = pil_image.convert("RGB").resize((1024, 576))
|
| 105 |
frames = svd_pipe(
|
|
|
|
| 114 |
return tmp.name
|
| 115 |
|
| 116 |
|
| 117 |
+
# ββ PIPELINE ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 118 |
def generate_ad(image, user_desc, language, style):
|
| 119 |
if image is None:
|
| 120 |
return None, "β οΈ Please upload a product image.", "", ""
|
|
|
|
| 138 |
return video_path, hook, script, cta
|
| 139 |
|
| 140 |
|
| 141 |
+
# ββ UI ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 142 |
css = """
|
| 143 |
#title { text-align:center; font-size:2.2rem; font-weight:800; margin-bottom:.2rem; }
|
| 144 |
#sub { text-align:center; color:#888; margin-bottom:1.5rem; }
|
|
|
|
| 183 |
gr.Markdown(
|
| 184 |
"---\n**How it works:** "
|
| 185 |
"1οΈβ£ Gemini 2.5 Flash reads your image β hook, script, CTA. "
|
| 186 |
+
"2οΈβ£ Stable Video Diffusion β short cinematic clip. "
|
| 187 |
"3οΈβ£ Ready-to-post reel! π"
|
| 188 |
)
|
| 189 |
|