Spaces:

GLAkavya
/

ADDENEATOR

Running

App Files Files Community

GLAkavya commited on 9 days ago

Commit

7112db6

verified ·

1 Parent(s): fef93c0

Create app.py

Browse files

Files changed (1) hide show

app.py +206 -0

app.py ADDED Viewed

	@@ -0,0 +1,206 @@

+import os
+import json
+import tempfile
+import torch
+import gradio as gr
+import google.generativeai as genai
+from PIL import Image
+from huggingface_hub import login
+from diffusers import StableVideoDiffusionPipeline
+from diffusers.utils import export_to_video
+# ── ENV SETUP ────────────────────────────────────────────────────────────────
+GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY", "")
+HF_TOKEN       = os.environ.get("HF_TOKEN", "")
+if HF_TOKEN:
+    login(token=HF_TOKEN)
+if GEMINI_API_KEY:
+    genai.configure(api_key=GEMINI_API_KEY)
+# ── LOAD SVD MODEL ONCE ──────────────────────────────────────────────────────
+print("⏳  Loading Stable Video Diffusion …")
+svd_pipe = StableVideoDiffusionPipeline.from_pretrained(
+    "stabilityai/stable-video-diffusion-img2vid-xt",
+    torch_dtype=torch.float16,
+    variant="fp16",
+)
+svd_pipe.enable_model_cpu_offload()   # saves VRAM
+print("✅  SVD model ready.")
+# ── GEMINI HELPER ─────────────────────────────────────────────────────────────
+GEMINI_SYSTEM = (
+    "You are an expert ad copywriter. "
+    "Always respond with ONLY valid JSON — no markdown, no extra text. "
+    "Keys required: hook, script, cta, video_prompt."
+)
+def call_gemini(pil_image: Image.Image, user_desc: str, language: str, style: str) -> dict:
+    """Send image + context to Gemini 2.5 Flash and get structured ad copy."""
+    model = genai.GenerativeModel(
+        model_name="gemini-2.5-flash",
+        system_instruction=GEMINI_SYSTEM,
+    )
+    lang_map = {
+        "English":  "Write everything in English.",
+        "Hindi":    "सब कुछ हिंदी में लिखें।",
+        "Hinglish": "Write in Hinglish (mix of Hindi and English).",
+    }
+    style_map = {
+        "Fun":       "tone: playful, witty, youthful",
+        "Premium":   "tone: luxurious, sophisticated, aspirational",
+        "Energetic": "tone: high-energy, bold, action-packed",
+    }
+    prompt = f"""
+Analyze this product image and create a compelling social-media video ad.
+{f'Product description: {user_desc}' if user_desc.strip() else ''}
+Language rule : {lang_map.get(language, lang_map['English'])}
+Style rule     : {style_map.get(style, style_map['Fun'])}
+Return ONLY this JSON structure:
+{{
+  "hook":         "attention-grabbing opening line (1–2 sentences)",
+  "script":       "full 15–20 second voiceover script",
+  "cta":          "call-to-action phrase",
+  "video_prompt": "detailed cinematic advertising scene description for video generation"
+}}
+"""
+    # Convert PIL → bytes for Gemini
+    import io
+    buf = io.BytesIO()
+    pil_image.save(buf, format="JPEG")
+    image_bytes = buf.getvalue()
+    response = model.generate_content(
+        [
+            {"mime_type": "image/jpeg", "data": image_bytes},
+            prompt,
+        ]
+    )
+    raw = response.text.strip()
+    # Strip possible markdown fences
+    if raw.startswith("```"):
+        raw = raw.split("```")[1]
+        if raw.startswith("json"):
+            raw = raw[4:]
+    raw = raw.strip()
+    return json.loads(raw)
+# ── VIDEO GENERATION ──────────────────────────────────────────────────────────
+def generate_video(pil_image: Image.Image) -> str:
+    """Run SVD on the product image and return path to .mp4 file."""
+    # SVD works best with 1024×576
+    img = pil_image.convert("RGB").resize((1024, 576))
+    frames = svd_pipe(
+        image=img,
+        num_frames=14,
+        fps=7,
+        decode_chunk_size=4,
+        generator=torch.manual_seed(42),
+    ).frames[0]
+    tmp_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
+    export_to_video(frames, tmp_file.name, fps=7)
+    return tmp_file.name
+# ── MAIN PIPELINE ─────────────────────────────────────────────────────────────
+def generate_ad(image, user_desc, language, style):
+    if image is None:
+        return None, "⚠️ Please upload a product image.", "", ""
+    try:
+        pil_image = Image.fromarray(image) if not isinstance(image, Image.Image) else image
+    except Exception:
+        pil_image = image  # already PIL
+    # Step 1 — Gemini ad copy
+    try:
+        ad_data = call_gemini(pil_image, user_desc, language, style)
+    except Exception as e:
+        return None, f"❌ Gemini error: {e}", "", ""
+    hook         = ad_data.get("hook", "")
+    script       = ad_data.get("script", "")
+    cta          = ad_data.get("cta", "")
+    video_prompt = ad_data.get("video_prompt", "")
+    # Step 2 — SVD video
+    try:
+        video_path = generate_video(pil_image)
+    except Exception as e:
+        return None, hook, f"❌ Video error: {e}\n\n{script}", cta
+    # Step 3 — Return everything
+    return video_path, hook, script, cta
+# ── GRADIO UI ─────────────────────────────────────────────────────────────────
+css = """
+#title  { text-align: center; font-size: 2.2rem; font-weight: 800; margin-bottom: 0.2rem; }
+#sub    { text-align: center; color: #888; margin-bottom: 1.5rem; }
+.card   { border-radius: 12px; padding: 1rem; background: #1a1a2e; }
+"""
+with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue="violet")) as demo:
+    gr.Markdown("# 🎬 AI Reel Generator", elem_id="title")
+    gr.Markdown("Upload a product image → get a cinematic ad video + copy in seconds.", elem_id="sub")
+    with gr.Row():
+        # ── LEFT COLUMN — inputs ──────────────────────────────────────────────
+        with gr.Column(scale=1):
+            image_input = gr.Image(
+                label="📸 Upload Product Image",
+                type="pil",
+                height=300,
+            )
+            desc_input = gr.Textbox(
+                label="📝 Describe your product (optional)",
+                placeholder="e.g. Organic honey sourced from Himalayan farms …",
+                lines=3,
+            )
+            with gr.Row():
+                lang_dropdown = gr.Dropdown(
+                    choices=["English", "Hindi", "Hinglish"],
+                    value="English",
+                    label="🌐 Language",
+                )
+                style_dropdown = gr.Dropdown(
+                    choices=["Fun", "Premium", "Energetic"],
+                    value="Fun",
+                    label="🎨 Style",
+                )
+            gen_btn = gr.Button("🚀 Generate Ad", variant="primary", size="lg")
+        # ── RIGHT COLUMN — outputs ────────────────────────────────────────────
+        with gr.Column(scale=1):
+            video_out  = gr.Video(label="🎥 Generated Video", height=300)
+            hook_out   = gr.Textbox(label="⚡ Hook",   lines=2, interactive=False)
+            script_out = gr.Textbox(label="📄 Script", lines=5, interactive=False)
+            cta_out    = gr.Textbox(label="🎯 CTA",    lines=1, interactive=False)
+    gen_btn.click(
+        fn=generate_ad,
+        inputs=[image_input, desc_input, lang_dropdown, style_dropdown],
+        outputs=[video_out, hook_out, script_out, cta_out],
+    )
+    gr.Markdown(
+        "---\n"
+        "**How it works:** "
+        "1️⃣ Gemini 2.5 Flash reads your image and writes ad copy + a cinematic prompt. "
+        "2️⃣ Stable Video Diffusion turns your image into a short video. "
+        "3️⃣ You get a ready-to-post reel!"
+    )
+if __name__ == "__main__":
+    demo.launch()