Spaces:

GLAkavya
/

ADgeineAI

Sleeping

App Files Files Community

GLAkavya commited on 8 days ago

Commit

4d9294e

verified ·

1 Parent(s): 4e38ad3

Create app.py

Browse files

Files changed (1) hide show

app.py +451 -0

app.py ADDED Viewed

	@@ -0,0 +1,451 @@

+import os
+import json
+import tempfile
+import io
+import math
+import time
+import numpy as np
+import cv2
+import gradio as gr
+from google import genai
+from google.genai import types
+from PIL import Image
+# ── ENV SETUP ────────────────────────────────────────────────────────────────
+gemini_key = (
+    os.environ.get("GEMINI_API_KEY", "")
+    or os.environ.get("GOOGLE_API_KEY", "")
+).strip()
+if gemini_key:
+    os.environ["GOOGLE_API_KEY"] = gemini_key
+    print(f"✅ Gemini key loaded (len={len(gemini_key)})")
+else:
+    print("❌ No Gemini key found!")
+hf_token = (
+    os.environ.get("HF_TOKEN", "")
+    or os.environ.get("HF_KEY", "")
+).strip()
+if hf_token:
+    try:
+        from huggingface_hub import login, InferenceClient
+        login(token=hf_token)
+        hf_client = InferenceClient(token=hf_token)
+        print("✅ HF login OK")
+    except Exception as e:
+        hf_client = None
+        print(f"⚠️ HF login skipped: {e}")
+else:
+    hf_client = None
+    print("⚠️ No HF token — will use Ken Burns fallback")
+print("✅ App ready!")
+# ── HF MODEL FALLBACK CHAIN ──────────────────────────────────────────────────
+# Models tried in order — first success wins, last is Ken Burns (always works)
+HF_MODELS = [
+    {
+        "id":   "Lightricks/LTX-2",
+        "name": "LTX-2 (Lightricks)",
+        "note": "Best quality, fastest inference available ⚡",
+    },
+    {
+        "id":   "Wan-AI/Wan2.2-I2V-A14B",
+        "name": "Wan 2.2 14B",
+        "note": "High quality, slightly slower",
+    },
+    {
+        "id":   "stabilityai/stable-video-diffusion-img2vid-xt",
+        "name": "Stable Video Diffusion XT",
+        "note": "136k downloads, reliable classic",
+    },
+    {
+        "id":   "KlingTeam/LivePortrait",
+        "name": "KlingTeam LivePortrait",
+        "note": "Great for portraits / faces",
+    },
+    {
+        "id":   "Lightricks/LTX-Video",
+        "name": "LTX-Video (older)",
+        "note": "248k downloads, solid fallback",
+    },
+    # Final fallback — pure OpenCV, always works
+    {
+        "id":   "__ken_burns__",
+        "name": "Ken Burns (local, no API)",
+        "note": "Always works — cinematic zoom/pan effect",
+    },
+]
+def try_hf_model(model_id: str, pil_image: Image.Image, prompt: str) -> bytes | None:
+    """Try one HuggingFace model. Returns video bytes or None on failure."""
+    if hf_client is None:
+        return None
+    try:
+        buf = io.BytesIO()
+        pil_image.save(buf, format="JPEG")
+        image_bytes = buf.getvalue()
+        print(f"   🤖 Trying {model_id} ...")
+        result = hf_client.image_to_video(
+            image=image_bytes,
+            model=model_id,
+            prompt=prompt,
+        )
+        if isinstance(result, bytes):
+            return result
+        elif hasattr(result, "read"):
+            return result.read()
+        else:
+            return None
+    except Exception as e:
+        print(f"   ❌ {model_id} failed: {e}")
+        return None
+def generate_video_with_fallback(
+    pil_image: Image.Image,
+    prompt: str,
+    style: str,
+    progress_callback=None,
+) -> tuple[str, str]:
+    """
+    Tries HF models in order. Falls back to Ken Burns if all fail.
+    Returns (video_path, model_used_name).
+    """
+    for model_info in HF_MODELS:
+        model_id   = model_info["id"]
+        model_name = model_info["name"]
+        if progress_callback:
+            progress_callback(f"⏳ Trying: **{model_name}** — {model_info['note']}")
+        # Ken Burns is always last and always works
+        if model_id == "__ken_burns__":
+            print("   🎬 Using Ken Burns (local fallback)")
+            path = generate_video_ken_burns(pil_image, duration_sec=5, fps=24, style=style.lower())
+            return path, f"🎨 {model_name}"
+        # Try HF model
+        video_bytes = try_hf_model(model_id, pil_image, prompt)
+        if video_bytes:
+            tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
+            tmp.write(video_bytes)
+            tmp.flush()
+            print(f"   ✅ SUCCESS with {model_name}")
+            return tmp.name, f"🤖 {model_name}"
+        # Small wait between retries to avoid hammering API
+        time.sleep(1)
+    # Should never reach here (Ken Burns is last), but just in case
+    path = generate_video_ken_burns(pil_image, duration_sec=5, fps=24, style=style.lower())
+    return path, "🎨 Ken Burns (local)"
+# ── GEMINI ────────────────────────────────────────────────────────────────────
+def call_gemini(pil_image: Image.Image, user_desc: str, language: str, style: str) -> dict:
+    client = genai.Client()
+    lang_map = {
+        "English":  "Write everything in English.",
+        "Hindi":    "सब कुछ हिंदी में लिखें।",
+        "Hinglish": "Write in Hinglish (mix of Hindi and English).",
+    }
+    style_map = {
+        "Fun":       "tone: playful, witty, youthful",
+        "Premium":   "tone: luxurious, sophisticated, aspirational",
+        "Energetic": "tone: high-energy, bold, action-packed",
+    }
+    prompt = f"""You are an expert ad copywriter. Analyze this product image and create a compelling social-media video ad.
+{f'Product description: {user_desc}' if user_desc.strip() else ''}
+Language rule : {lang_map.get(language, lang_map['English'])}
+Style rule     : {style_map.get(style, style_map['Fun'])}
+CRITICAL: Return ONLY raw JSON. No markdown. No ```json. No explanation. Pure JSON only.
+{{
+  "hook":         "attention-grabbing opening line (1-2 sentences)",
+  "script":       "full 15-20 second voiceover script",
+  "cta":          "call-to-action phrase",
+  "video_prompt": "detailed cinematic advertising scene description for image-to-video AI"
+}}"""
+    buf = io.BytesIO()
+    pil_image.save(buf, format="JPEG")
+    image_bytes = buf.getvalue()
+    response = client.models.generate_content(
+        model="gemini-2.5-flash",
+        contents=[
+            types.Part.from_bytes(data=image_bytes, mime_type="image/jpeg"),
+            types.Part.from_text(text=prompt),
+        ],
+    )
+    raw = response.text.strip()
+    if "```" in raw:
+        raw = raw.split("```")[1]
+        if raw.lower().startswith("json"):
+            raw = raw[4:]
+        raw = raw.strip()
+    return json.loads(raw)
+# ── KEN BURNS VIDEO (local fallback) ─────────────────────────────────────────
+def ease_in_out(t):
+    return t * t * (3 - 2 * t)
+def ease_out_bounce(t):
+    if t < 1/2.75:
+        return 7.5625 * t * t
+    elif t < 2/2.75:
+        t -= 1.5/2.75
+        return 7.5625 * t * t + 0.75
+    elif t < 2.5/2.75:
+        t -= 2.25/2.75
+        return 7.5625 * t * t + 0.9375
+    else:
+        t -= 2.625/2.75
+        return 7.5625 * t * t + 0.984375
+def apply_vignette(frame, strength=0.6):
+    h, w = frame.shape[:2]
+    Y, X = np.ogrid[:h, :w]
+    cx, cy = w / 2, h / 2
+    dist = np.sqrt(((X - cx) / cx) ** 2 + ((Y - cy) / cy) ** 2)
+    mask = np.clip(1.0 - strength * (dist ** 1.5), 0, 1)
+    return (frame * mask[:, :, np.newaxis]).astype(np.uint8)
+def apply_color_grade(frame, style="premium"):
+    f = frame.astype(np.float32)
+    if style == "premium":
+        f[:,:,0] = np.clip(f[:,:,0] * 1.05, 0, 255)
+        f[:,:,2] = np.clip(f[:,:,2] * 1.08, 0, 255)
+        f = np.clip(f * 1.05, 0, 255)
+    elif style == "energetic":
+        gray = np.mean(f, axis=2, keepdims=True)
+        f = np.clip(gray + 1.4 * (f - gray), 0, 255)
+        f = np.clip(f * 1.1, 0, 255)
+    elif style == "fun":
+        f[:,:,0] = np.clip(f[:,:,0] * 1.1, 0, 255)
+        f[:,:,1] = np.clip(f[:,:,1] * 1.05, 0, 255)
+    return f.astype(np.uint8)
+def generate_video_ken_burns(pil_image: Image.Image, duration_sec: int = 5, fps: int = 24, style: str = "premium") -> str:
+    total_frames = duration_sec * fps
+    img = pil_image.convert("RGB")
+    target_w, target_h = 720, 1280
+    img = img.resize((target_w, target_h), Image.LANCZOS)
+    tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
+    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+    out = cv2.VideoWriter(tmp.name, fourcc, fps, (target_w, target_h))
+    pad = 160
+    big_h, big_w = target_h + pad * 2, target_w + pad * 2
+    big_img = np.array(img.resize((big_w, big_h), Image.LANCZOS))
+    s1_end = int(fps * 1.5)
+    s2_end = int(fps * 3.0)
+    s3_end = int(fps * 4.2)
+    s4_end = total_frames
+    for i in range(total_frames):
+        if i < s1_end:
+            t = i / s1_end
+            te = ease_out_bounce(min(t * 1.1, 1.0))
+            zoom = 1.35 - 0.25 * te
+            pan_x = int(pad * 0.1 * t)
+            pan_y = int(-pad * 0.15 * t)
+        elif i < s2_end:
+            t = (i - s1_end) / (s2_end - s1_end)
+            te = ease_in_out(t)
+            zoom = 1.10 - 0.05 * te
+            shake_x = int(3 * math.sin(i * 0.8))
+            shake_y = int(2 * math.cos(i * 1.1))
+            pan_x = int(pad * 0.1 + shake_x)
+            pan_y = int(-pad * 0.15 - pad * 0.20 * te + shake_y)
+        elif i < s3_end:
+            t = (i - s2_end) / (s3_end - s2_end)
+            te = ease_in_out(t)
+            zoom = 1.05 - 0.04 * te
+            pan_x = int(pad * 0.1 * (1 - te))
+            pan_y = int(-pad * 0.35 * (1 - te))
+        else:
+            t = (i - s3_end) / (s4_end - s3_end)
+            te = ease_in_out(t)
+            zoom = 1.01 + 0.03 * te
+            pan_x = 0
+            pan_y = 0
+        crop_w = int(target_w / zoom)
+        crop_h = int(target_h / zoom)
+        cx = big_w // 2 + pan_x
+        cy = big_h // 2 + pan_y
+        x1 = max(0, cx - crop_w // 2)
+        y1 = max(0, cy - crop_h // 2)
+        x2 = min(big_w, x1 + crop_w)
+        y2 = min(big_h, y1 + crop_h)
+        if x2 - x1 < 10 or y2 - y1 < 10:
+            x1, y1, x2, y2 = 0, 0, target_w, target_h
+        cropped = big_img[y1:y2, x1:x2]
+        frame = cv2.resize(cropped, (target_w, target_h), interpolation=cv2.INTER_LINEAR)
+        frame = apply_color_grade(frame, style)
+        frame = apply_vignette(frame, strength=0.55)
+        fade_in_end  = int(fps * 0.4)
+        fade_out_sta = int(fps * 4.4)
+        if i < fade_in_end:
+            alpha = ease_in_out(i / fade_in_end)
+        elif i >= fade_out_sta:
+            alpha = ease_in_out(1.0 - (i - fade_out_sta) / (total_frames - fade_out_sta))
+        else:
+            alpha = 1.0
+        flash_frames = {s1_end, s1_end+1, s2_end, s2_end+1}
+        if i in flash_frames:
+            flash_strength = 0.35 if i in {s1_end, s2_end} else 0.15
+            white = np.ones_like(frame) * 255
+            frame = cv2.addWeighted(frame, 1 - flash_strength, white.astype(np.uint8), flash_strength, 0)
+        frame = np.clip(frame.astype(np.float32) * alpha, 0, 255).astype(np.uint8)
+        frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
+        out.write(frame_bgr)
+    out.release()
+    return tmp.name
+# ── MAIN PIPELINE ─────────────────────────────────────────────────────────────
+_status_log = []
+def generate_ad(image, user_desc, language, style, progress=gr.Progress()):
+    global _status_log
+    _status_log = []
+    if image is None:
+        return None, "⚠️ Please upload a product image.", "", "", "❌ No image"
+    pil_image = image if isinstance(image, Image.Image) else Image.fromarray(image)
+    # STEP 1 — Gemini ad copy
+    progress(0.1, desc="🧠 Gemini generating ad copy...")
+    try:
+        ad_data = call_gemini(pil_image, user_desc or "", language, style)
+    except Exception as e:
+        return None, f"❌ Gemini error: {e}", "", "", "❌ Gemini failed"
+    hook         = ad_data.get("hook", "")
+    script       = ad_data.get("script", "")
+    cta          = ad_data.get("cta", "")
+    video_prompt = ad_data.get("video_prompt", hook)
+    # STEP 2 — Video with fallback chain
+    progress(0.3, desc="🎬 Generating video (trying AI models)...")
+    status_lines = []
+    def log_progress(msg):
+        status_lines.append(msg)
+        progress(0.3 + len(status_lines) * 0.1, desc=msg.replace("**", "").replace("*", ""))
+    try:
+        video_path, model_used = generate_video_with_fallback(
+            pil_image,
+            prompt=video_prompt,
+            style=style,
+            progress_callback=log_progress,
+        )
+    except Exception as e:
+        return None, hook, f"❌ Video error: {e}\n\n{script}", cta, "❌ All models failed"
+    progress(1.0, desc="✅ Done!")
+    model_log = "\n".join(status_lines) + f"\n\n✅ **Used:** {model_used}"
+    return video_path, hook, script, cta, model_log
+# ── GRADIO UI ─────────────────────────────────────────────────────────────────
+css = """
+#title  { text-align:center; font-size:2.2rem; font-weight:800; margin-bottom:.2rem; }
+#sub    { text-align:center; color:#888; margin-bottom:1.5rem; }
+.model-chain { font-size:.85rem; line-height:1.7; }
+"""
+with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue="violet")) as demo:
+    gr.Markdown("# 🎬 AI Reel Generator", elem_id="title")
+    gr.Markdown(
+        "Upload a product image → Gemini writes ad copy → "
+        "AI generates cinematic 5-sec reel (5-model fallback chain).",
+        elem_id="sub",
+    )
+    with gr.Row():
+        # ── LEFT COLUMN ──────────────────────────────────────────────────────
+        with gr.Column(scale=1):
+            image_input = gr.Image(label="📸 Upload Product Image", type="pil", height=300)
+            desc_input  = gr.Textbox(
+                label="📝 Describe your product (optional)",
+                placeholder="e.g. Premium sneakers with star design …",
+                lines=3,
+            )
+            with gr.Row():
+                lang_dropdown = gr.Dropdown(
+                    choices=["English", "Hindi", "Hinglish"],
+                    value="English", label="🌐 Language",
+                )
+                style_dropdown = gr.Dropdown(
+                    choices=["Fun", "Premium", "Energetic"],
+                    value="Fun", label="🎨 Style",
+                )
+            gen_btn = gr.Button("🚀 Generate Ad", variant="primary", size="lg")
+            # Model chain info box
+            gr.Markdown(
+                "**🔗 Model Fallback Chain:**\n"
+                "1. 🤖 Lightricks/LTX-2 ⚡\n"
+                "2. 🤖 Wan 2.2 I2V-A14B\n"
+                "3. 🤖 Stable Video Diffusion XT\n"
+                "4. 🤖 KlingTeam/LivePortrait\n"
+                "5. 🤖 Lightricks/LTX-Video\n"
+                "6. 🎨 Ken Burns (local, always works)",
+                elem_classes="model-chain",
+            )
+        # ── RIGHT COLUMN ─────────────────────────────────────────────────────
+        with gr.Column(scale=1):
+            video_out   = gr.Video(label="🎥 5-Second Ad Reel", height=400)
+            hook_out    = gr.Textbox(label="⚡ Hook",         lines=2, interactive=False)
+            script_out  = gr.Textbox(label="📄 Script",       lines=5, interactive=False)
+            cta_out     = gr.Textbox(label="🎯 CTA",          lines=1, interactive=False)
+            status_out  = gr.Textbox(label="📊 Model Log",    lines=6, interactive=False)
+    gen_btn.click(
+        fn=generate_ad,
+        inputs=[image_input, desc_input, lang_dropdown, style_dropdown],
+        outputs=[video_out, hook_out, script_out, cta_out, status_out],
+    )
+    gr.Markdown(
+        "---\n**How it works:** "
+        "1️⃣ Gemini 2.5 Flash → hook + script + CTA + video prompt. "
+        "2️⃣ Tries 5 HuggingFace image-to-video models in order. "
+        "3️⃣ First success wins → downloads video. "
+        "4️⃣ If all API calls fail → Ken Burns cinematic effect (local, always works). "
+        "⚡ With HF token + inference-available model: ~10-30 seconds total!"
+    )
+if __name__ == "__main__":
+    demo.launch()