Spaces:

GLAkavya
/

ADDENEATOR

Running

App Files Files Community

GLAkavya commited on 8 days ago

Commit

c980c4a

verified ·

1 Parent(s): cbcf1e6

Update app.py

Browse files

Files changed (1) hide show

app.py +441 -312

app.py CHANGED Viewed

@@ -1,333 +1,462 @@
-import os
-import json
-import tempfile
-import io
-import math
 import numpy as np
 import cv2
 import gradio as gr
-from google import genai
-from google.genai import types
-from PIL import Image
-# ── ENV SETUP ────────────────────────────────────────────────────────────────
-gemini_key = (
-    os.environ.get("GEMINI_API_KEY", "")
-    or os.environ.get("GOOGLE_API_KEY", "")
-).strip()
-if gemini_key:
-    os.environ["GOOGLE_API_KEY"] = gemini_key
-    print(f"✅ Gemini key loaded (len={len(gemini_key)})")
-else:
-    print("❌ No Gemini key found!")
-hf_token = (
-    os.environ.get("HF_TOKEN", "")
-    or os.environ.get("HF_KEY", "")
-).strip()
 if hf_token:
     try:
-        from huggingface_hub import login
-        login(token=hf_token)
-        print("✅ HF login OK")
-    except Exception as e:
-        print(f"⚠️ HF login skipped: {e}")
-print("✅ App ready — using fast OpenCV video generation (no heavy models!)")
-# ── GEMINI ────────────────────────────────────────────────────────────────────
-def call_gemini(pil_image: Image.Image, user_desc: str, language: str, style: str) -> dict:
-    client = genai.Client()
-    lang_map = {
-        "English":  "Write everything in English.",
-        "Hindi":    "सब कुछ हिंदी में लिखें।",
-        "Hinglish": "Write in Hinglish (mix of Hindi and English).",
-    }
-    style_map = {
-        "Fun":       "tone: playful, witty, youthful",
-        "Premium":   "tone: luxurious, sophisticated, aspirational",
-        "Energetic": "tone: high-energy, bold, action-packed",
-    }
-    prompt = f"""You are an expert ad copywriter. Analyze this product image and create a compelling social-media video ad.
-{f'Product description: {user_desc}' if user_desc.strip() else ''}
-Language rule : {lang_map.get(language, lang_map['English'])}
-Style rule     : {style_map.get(style, style_map['Fun'])}
-CRITICAL: Return ONLY raw JSON. No markdown. No ```json. No explanation. Pure JSON only.
-{{
-  "hook":         "attention-grabbing opening line (1-2 sentences)",
-  "script":       "full 15-20 second voiceover script",
-  "cta":          "call-to-action phrase",
-  "video_prompt": "detailed cinematic advertising scene description"
-}}"""
-    buf = io.BytesIO()
-    pil_image.save(buf, format="JPEG")
-    image_bytes = buf.getvalue()
-    response = client.models.generate_content(
-        model="gemini-2.5-flash",
-        contents=[
-            types.Part.from_bytes(data=image_bytes, mime_type="image/jpeg"),
-            types.Part.from_text(text=prompt),
-        ],
-    )
-    raw = response.text.strip()
-    if "```" in raw:
-        raw = raw.split("```")[1]
-        if raw.lower().startswith("json"):
-            raw = raw[4:]
-        raw = raw.strip()
-    return json.loads(raw)
-# ── FAST VIDEO: Ken Burns effect (zoom + pan) — NO heavy model needed ─────────
-def ease_in_out(t):
-    """Smooth easing — no jerky motion."""
-    return t * t * (3 - 2 * t)
-def ease_out_bounce(t):
-    """Bouncy pop effect."""
-    if t < 1/2.75:
-        return 7.5625 * t * t
-    elif t < 2/2.75:
-        t -= 1.5/2.75
-        return 7.5625 * t * t + 0.75
-    elif t < 2.5/2.75:
-        t -= 2.25/2.75
-        return 7.5625 * t * t + 0.9375
-    else:
-        t -= 2.625/2.75
-        return 7.5625 * t * t + 0.984375
-def apply_vignette(frame, strength=0.6):
-    """Dark edges — cinematic look."""
-    h, w = frame.shape[:2]
-    Y, X = np.ogrid[:h, :w]
-    cx, cy = w / 2, h / 2
-    dist = np.sqrt(((X - cx) / cx) ** 2 + ((Y - cy) / cy) ** 2)
-    mask = np.clip(1.0 - strength * (dist ** 1.5), 0, 1)
-    return (frame * mask[:, :, np.newaxis]).astype(np.uint8)
-def apply_color_grade(frame, style="premium"):
-    """Color grading per style."""
-    f = frame.astype(np.float32)
-    if style == "premium":
-        # Teal-orange grade: boost blues in shadows, warm highlights
-        f[:,:,0] = np.clip(f[:,:,0] * 1.05, 0, 255)   # R boost
-        f[:,:,2] = np.clip(f[:,:,2] * 1.08, 0, 255)   # B boost
-        f = np.clip(f * 1.05, 0, 255)                  # slight brightness
-    elif style == "energetic":
-        # Saturated vivid
-        gray = np.mean(f, axis=2, keepdims=True)
-        f = np.clip(gray + 1.4 * (f - gray), 0, 255)
-        f = np.clip(f * 1.1, 0, 255)
-    elif style == "fun":
-        # Warm, bright, punchy
-        f[:,:,0] = np.clip(f[:,:,0] * 1.1, 0, 255)   # R
-        f[:,:,1] = np.clip(f[:,:,1] * 1.05, 0, 255)  # G
-    return f.astype(np.uint8)
-def generate_video(pil_image: Image.Image, duration_sec: int = 5, fps: int = 24, style: str = "premium") -> str:
-    """
-    Cinematic 5-second video with:
-    - Segment 1 (0-1.5s): ZOOM IN burst + bounce pop
-    - Segment 2 (1.5-3s): Slow upward pan + subtle shake
-    - Segment 3 (3-4.2s): ZOOM OUT pull-back
-    - Segment 4 (4.2-5s): Fade out with color flash
-    - Vignette overlay
-    - Color grading
-    - Fade in/out
-    """
-    total_frames = duration_sec * fps  # 120 frames
-    img = pil_image.convert("RGB")
-    target_w, target_h = 720, 1280
-    img = img.resize((target_w, target_h), Image.LANCZOS)
-    tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
-    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
-    out = cv2.VideoWriter(tmp.name, fourcc, fps, (target_w, target_h))
-    # Large canvas to allow all movements without black borders
-    pad = 160
-    big_h, big_w = target_h + pad * 2, target_w + pad * 2
-    big_img = np.array(img.resize((big_w, big_h), Image.LANCZOS))
-    # Segment boundaries (in frames)
-    s1_end = int(fps * 1.5)   # 36
-    s2_end = int(fps * 3.0)   # 72
-    s3_end = int(fps * 4.2)   # 100
-    s4_end = total_frames     # 120
-    for i in range(total_frames):
-        t_global = i / (total_frames - 1)
-        # ── SEGMENT 1: Zoom-in bounce pop (0 → 1.5s) ────────────────────────
-        if i < s1_end:
-            t = i / s1_end
-            te = ease_out_bounce(min(t * 1.1, 1.0))
-            zoom = 1.35 - 0.25 * te          # 1.35 → 1.10 with bounce
-            pan_x = int(pad * 0.1 * t)
-            pan_y = int(-pad * 0.15 * t)     # slight upward
-        # ── SEGMENT 2: Slow pan upward + micro shake (1.5s → 3s) ────────────
-        elif i < s2_end:
-            t = (i - s1_end) / (s2_end - s1_end)
-            te = ease_in_out(t)
-            zoom = 1.10 - 0.05 * te          # gentle zoom out
-            shake_x = int(3 * math.sin(i * 0.8))   # micro horizontal shake
-            shake_y = int(2 * math.cos(i * 1.1))
-            pan_x = int(pad * 0.1 + shake_x)
-            pan_y = int(-pad * 0.15 - pad * 0.20 * te + shake_y)
-        # ── SEGMENT 3: Zoom out pull-back (3s → 4.2s) ───────────────────────
-        elif i < s3_end:
-            t = (i - s2_end) / (s3_end - s2_end)
-            te = ease_in_out(t)
-            zoom = 1.05 - 0.04 * te          # zoom out to near 1.0
-            pan_x = int(pad * 0.1 * (1 - te))
-            pan_y = int(-pad * 0.35 * (1 - te))
-        # ── SEGMENT 4: Final fade out (4.2s → 5s) ───────────────────────────
-        else:
-            t = (i - s3_end) / (s4_end - s3_end)
-            te = ease_in_out(t)
-            zoom = 1.01 + 0.03 * te          # subtle zoom in at end
-            pan_x = 0
-            pan_y = 0
-        # Crop from big canvas
-        crop_w = int(target_w / zoom)
-        crop_h = int(target_h / zoom)
-        cx = big_w // 2 + pan_x
-        cy = big_h // 2 + pan_y
-        x1 = max(0, cx - crop_w // 2)
-        y1 = max(0, cy - crop_h // 2)
-        x2 = min(big_w, x1 + crop_w)
-        y2 = min(big_h, y1 + crop_h)
-        if x2 - x1 < 10 or y2 - y1 < 10:
-            x1, y1, x2, y2 = 0, 0, target_w, target_h
-        cropped = big_img[y1:y2, x1:x2]
-        frame = cv2.resize(cropped, (target_w, target_h), interpolation=cv2.INTER_LINEAR)
-        # ── COLOR GRADE ──────────────────────────────────────────────────────
-        frame = apply_color_grade(frame, style)
-        # ── VIGNETTE ─────────────────────────────────────────────────────────
-        frame = apply_vignette(frame, strength=0.55)
-        # ── FADE IN (first 0.4s) + FADE OUT (last 0.6s) ─────────────────────
-        fade_in_end  = int(fps * 0.4)
-        fade_out_sta = int(fps * 4.4)
-        if i < fade_in_end:
-            alpha = ease_in_out(i / fade_in_end)
-        elif i >= fade_out_sta:
-            alpha = ease_in_out(1.0 - (i - fade_out_sta) / (total_frames - fade_out_sta))
-        else:
-            alpha = 1.0
-        # ── WHITE FLASH at segment transitions (frame 36, 72) ────────────────
-        flash_frames = {s1_end, s1_end+1, s2_end, s2_end+1}
-        if i in flash_frames:
-            flash_strength = 0.35 if i in {s1_end, s2_end} else 0.15
-            white = np.ones_like(frame) * 255
-            frame = cv2.addWeighted(frame, 1 - flash_strength, white.astype(np.uint8), flash_strength, 0)
-        frame = np.clip(frame.astype(np.float32) * alpha, 0, 255).astype(np.uint8)
-        # RGB → BGR for OpenCV
-        frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
-        out.write(frame_bgr)
-    out.release()
-    return tmp.name
-# ── MAIN PIPELINE ─────────────────────────────────────────────────────────────
-def generate_ad(image, user_desc, language, style):
-    if image is None:
-        return None, "⚠️ Please upload a product image.", "", ""
-    pil_image = image if isinstance(image, Image.Image) else Image.fromarray(image)
-    # STEP 1 — Gemini ad copy
     try:
-        ad_data = call_gemini(pil_image, user_desc or "", language, style)
-    except Exception as e:
-        return None, f"❌ Gemini error: {e}", "", ""
-    hook   = ad_data.get("hook", "")
-    script = ad_data.get("script", "")
-    cta    = ad_data.get("cta", "")
-    # STEP 2 — Fast video (2-3 sec)
     try:
-        video_path = generate_video(pil_image, duration_sec=5, fps=24, style=style.lower())
-    except Exception as e:
-        return None, hook, f"❌ Video error: {e}\n\n{script}", cta
-    return video_path, hook, script, cta
-# ── GRADIO UI ─────────────────────────────────────────────────────────────────
-css = """
-#title { text-align:center; font-size:2.2rem; font-weight:800; margin-bottom:.2rem; }
-#sub   { text-align:center; color:#888; margin-bottom:1.5rem; }
 """
-with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue="violet")) as demo:
-    gr.Markdown("# 🎬 AI Reel Generator", elem_id="title")
-    gr.Markdown("Upload a product image → cinematic 5-sec ad reel + copy in seconds.", elem_id="sub")
     with gr.Row():
         with gr.Column(scale=1):
-            image_input = gr.Image(label="📸 Upload Product Image", type="pil", height=300)
-            desc_input  = gr.Textbox(
-                label="📝 Describe your product (optional)",
-                placeholder="e.g. Premium sneakers with star design …",
-                lines=3,
-            )
             with gr.Row():
-                lang_dropdown = gr.Dropdown(
-                    choices=["English", "Hindi", "Hinglish"],
-                    value="English", label="🌐 Language",
-                )
-                style_dropdown = gr.Dropdown(
-                    choices=["Fun", "Premium", "Energetic"],
-                    value="Fun", label="🎨 Style",
-                )
-            gen_btn = gr.Button("🚀 Generate Ad", variant="primary", size="lg")
         with gr.Column(scale=1):
-            video_out  = gr.Video(label="🎥 5-Second Ad Reel", height=400)
-            hook_out   = gr.Textbox(label="⚡ Hook",   lines=2, interactive=False)
-            script_out = gr.Textbox(label="📄 Script", lines=5, interactive=False)
-            cta_out    = gr.Textbox(label="🎯 CTA",    lines=1, interactive=False)
     gen_btn.click(
-        fn=generate_ad,
-        inputs=[image_input, desc_input, lang_dropdown, style_dropdown],
-        outputs=[video_out, hook_out, script_out, cta_out],
-    )
-    gr.Markdown(
-        "---\n**How it works:** "
-        "1️⃣ Gemini 2.5 Flash → hook, script, CTA. "
-        "2️⃣ Ken Burns cinematic effect → smooth 5-sec reel (no heavy AI model!). "
-        "⚡ Total time: ~5-10 seconds!"
     )
-if __name__ == "__main__":
     demo.launch()

+import os, tempfile, io, math, time, threading
 import numpy as np
 import cv2
 import gradio as gr
+from PIL import Image, ImageDraw, ImageFont, ImageFilter, ImageEnhance
+# ── TOKENS ────────────────────────────────────────────────────────
+hf_token = (os.environ.get("HF_TOKEN","") or os.environ.get("HF_KEY","")).strip()
+hf_client = None
 if hf_token:
     try:
+        from huggingface_hub import login, InferenceClient
+        login(token=hf_token); hf_client = InferenceClient(token=hf_token)
+        print("✅ HF ready")
+    except Exception as e: print(f"⚠️ HF: {e}")
+# ── HF MODELS ─────────────────────────────────────────────────────
+HF_MODELS = [
+    {"id": "Lightricks/LTX-2",                                 "name": "LTX-2 ⚡"},
+    {"id": "Wan-AI/Wan2.2-I2V-A14B",                           "name": "Wan 2.2"},
+    {"id": "stabilityai/stable-video-diffusion-img2vid-xt",    "name": "SVD-XT"},
+    {"id": "KlingTeam/LivePortrait",                           "name": "Kling LivePortrait"},
+    {"id": "Lightricks/LTX-Video",                             "name": "LTX-Video"},
+    {"id": "__local__",                                        "name": "Ken Burns ✅"},
+]
+def pil_to_bytes(img):
+    b=io.BytesIO(); img.save(b,format="JPEG",quality=92); return b.getvalue()
+def run_timeout(fn, sec, *a, **kw):
+    box=[None]; err=[None]
+    def r():
+        try: box[0]=fn(*a,**kw)
+        except Exception as e: err[0]=str(e)
+    t=threading.Thread(target=r,daemon=True); t.start(); t.join(timeout=sec)
+    if t.is_alive(): print(f"  ⏱ timeout"); return None
+    if err[0]: print(f"  ❌ {err[0][:80]}")
+    return box[0]
+def try_hf(model_id, pil, prompt):
+    if not hf_client: return None
     try:
+        r=hf_client.image_to_video(image=pil_to_bytes(pil),model=model_id,prompt=prompt)
+        return r.read() if hasattr(r,"read") else r
+    except Exception as e: print(f"  ❌ {model_id}: {e}"); return None
+def get_video(pil, prompt, cb=None):
+    for m in HF_MODELS:
+        mid,mname=m["id"],m["name"]
+        if cb: cb(f"⏳ Trying: {mname}")
+        if mid=="__local__":
+            return ken_burns(pil), mname
+        data=run_timeout(try_hf,50,mid,pil,prompt)
+        if data:
+            t=tempfile.NamedTemporaryFile(suffix=".mp4",delete=False)
+            t.write(data); t.flush()
+            return t.name, mname
+        time.sleep(1)
+    return ken_burns(pil), "Ken Burns"
+# ══════════════════════════════════════════════════════════════════
+#  KEN BURNS  (working, image always shows)
+# ══════════════════════════════════════════════════════════════════
+def ease(t): t=max(0.,min(1.,t)); return t*t*(3-2*t)
+def ease_cubic(t): t=max(0.,min(1.,t)); return 4*t*t*t if t<.5 else 1-math.pow(-2*t+2,3)/2
+def ease_expo(t): return 1-math.pow(2,-10*t) if t<1 else 1.
+def ease_bounce(t):
+    if t<1/2.75: return 7.5625*t*t
+    elif t<2/2.75: t-=1.5/2.75; return 7.5625*t*t+.75
+    elif t<2.5/2.75: t-=2.25/2.75; return 7.5625*t*t+.9375
+    else: t-=2.625/2.75; return 7.5625*t*t+.984375
+def ken_burns(pil, duration_sec=6, fps=30, style="premium"):
+    TW,TH=720,1280
+    # Small pad — just enough for gentle movement, no aggressive zoom
+    pad=60; BW,BH=TW+pad*2,TH+pad*2
+    total=duration_sec*fps
+    # Prepare image — fit full image, letterbox if needed
+    img=pil.convert("RGB"); sw,sh=img.size
+    # Fit entire image inside TH height, pad sides with blurred bg
+    scale=TH/sh; nw=int(sw*scale); nh=TH
+    if nw>TW: scale=TW/sw; nw=TW; nh=int(sh*scale)
+    img_resized=img.resize((nw,nh),Image.LANCZOS)
+    # Blurred background fill
+    bg=img.resize((TW,TH),Image.LANCZOS)
+    bg=bg.filter(ImageFilter.GaussianBlur(radius=20))
+    bg_arr=np.array(ImageEnhance.Brightness(bg).enhance(0.5))
+    canvas=Image.fromarray(bg_arr)
+    # Paste sharp image centered
+    px=(TW-nw)//2; py=(TH-nh)//2
+    canvas.paste(img_resized,(px,py))
+    canvas=canvas.filter(ImageFilter.UnsharpMask(radius=0.8,percent=110,threshold=2))
+    canvas=ImageEnhance.Contrast(canvas).enhance(1.05)
+    canvas=ImageEnhance.Color(canvas).enhance(1.08)
+    base=np.array(canvas.resize((BW,BH),Image.LANCZOS))
+    # Pre-baked vignette mask (very subtle)
+    Y,X=np.ogrid[:TH,:TW]
+    dist=np.sqrt(((X-TW/2)/(TW/2))**2+((Y-TH/2)/(TH/2))**2)
+    vmask=np.clip(1.-0.22*np.maximum(dist-0.85,0)**2,0,1).astype(np.float32)
+    # GENTLE zoom: 1.00→1.06 max — full image always visible
+    SEG=[
+        (0.00,0.30, 1.00,1.04,  0,          -int(pad*.40),  0,          -int(pad*.40)),
+        (0.30,0.60, 1.04,1.06, -int(pad*.30), int(pad*.30), -int(pad*.40),-int(pad*.70)),
+        (0.60,0.80, 1.06,1.04,  int(pad*.30), int(pad*.50), -int(pad*.70),-int(pad*.40)),
+        (0.80,1.00, 1.04,1.00,  int(pad*.50), 0,            -int(pad*.40), 0),
+    ]
+    tmp=tempfile.NamedTemporaryFile(suffix=".mp4",delete=False)
+    writer=cv2.VideoWriter(tmp.name,cv2.VideoWriter_fourcc(*"mp4v"),fps,(TW,TH))
+    for i in range(total):
+        tg=i/max(total-1,1)
+        zoom=pan_x=pan_y=None
+        for t0,t1,z0,z1,px0,px1,py0,py1 in SEG:
+            if t0<=tg<=t1:
+                te=ease_cubic((tg-t0)/(t1-t0))
+                zoom=z0+(z1-z0)*te; pan_x=int(px0+(px1-px0)*te); pan_y=int(py0+(py1-py0)*te); break
+        if zoom is None: zoom,pan_x,pan_y=1.,0,0
+        # No shake — keeps image stable and well-framed
+        cw,ch=int(TW/zoom),int(TH/zoom)
+        ox,oy=BW//2+pan_x,BH//2+pan_y
+        x1,y1=max(0,ox-cw//2),max(0,oy-ch//2)
+        x2,y2=min(BW,x1+cw),min(BH,y1+ch)
+        if (x2-x1)<10 or (y2-y1)<10: x1,y1,x2,y2=0,0,TW,TH
+        frame=cv2.resize(base[y1:y2,x1:x2],(TW,TH),interpolation=cv2.INTER_LINEAR)
+        # Very subtle color grade
+        f=frame.astype(np.float32)/255.
+        if style=="premium":
+            f[:,:,0]=np.clip(f[:,:,0]*1.03+.01,0,1)
+            f[:,:,2]=np.clip(f[:,:,2]*1.02,0,1)
+        elif style=="energetic":
+            gray=0.299*f[:,:,0:1]+0.587*f[:,:,1:2]+0.114*f[:,:,2:3]
+            f=np.clip(gray+1.2*(f-gray),0,1); f=np.clip(f*1.04,0,1)
+        elif style=="fun":
+            f[:,:,0]=np.clip(f[:,:,0]*1.05,0,1)
+            f[:,:,1]=np.clip(f[:,:,1]*1.03,0,1)
+        frame=np.clip(f*255,0,255).astype(np.uint8)
+        # Vignette
+        frame=np.clip(frame.astype(np.float32)*vmask[:,:,None],0,255).astype(np.uint8)
+        # Grain
+        frame=np.clip(frame.astype(np.float32)+np.random.normal(0,3,frame.shape),0,255).astype(np.uint8)
+        # Bars
+        frame[:36,:]=0; frame[-36:,:]=0
+        # Fade in (2%) / out (5%)
+        if tg<0.02: alpha=ease_expo(tg/0.02)
+        elif tg>0.95: alpha=ease(1-(tg-0.95)/0.05)
+        else: alpha=1.
+        if alpha<1.: frame=np.clip(frame.astype(np.float32)*alpha,0,255).astype(np.uint8)
+        writer.write(cv2.cvtColor(frame,cv2.COLOR_RGB2BGR))
+    writer.release()
+    return tmp.name
+# ══════════════════════════════════════════════════════════════════
+#  CAPTIONS — burn into existing video via ffmpeg
+# ══════════════════════════════════════════════════════════════════
+def add_captions_ffmpeg(video_path, caption, duration_sec, style):
+    """Burn animated captions + hashtag tag + shop-now CTA using ffmpeg drawtext."""
+    import re
+    def clean(t): return re.sub(r"[^A-Za-z0-9 !.,-]","",t).strip()
+    words=caption.strip().split()
+    mid=max(1,len(words)//2)
+    line1=clean(" ".join(words[:mid]))
+    line2=clean(" ".join(words[mid:])) if len(words)>1 else line1
+    colors={"premium":"FFD232","energetic":"3CC8FF","fun":"FF78C8"}
+    col=colors.get(style,"FFFFFF")
+    out=video_path.replace(".mp4","_cap.mp4")
+    font_paths=[
+        "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
+        "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
+        "/usr/share/fonts/truetype/freefont/FreeSansBold.ttf",
+    ]
+    font=""; font_reg=""
+    for p in font_paths:
+        if os.path.exists(p): font=f":fontfile='{p}'"; font_reg=font; break
+    def dt(text, start, end, y, size=42, color=None, box_alpha="0.60"):
+        c = color or col
+        fd=0.4
+        return (
+            f"drawtext=text='{text}'{font}"
+            f":fontsize={size}:fontcolor=#{c}"
+            f":x=(w-text_w)/2:y={y}"
+            f":box=1:boxcolor=black@{box_alpha}:boxborderw=14"
+            f":enable='between(t,{start},{end})'"
+            f":alpha='if(lt(t,{start+fd}),(t-{start})/{fd},if(gt(t,{end-fd}),({end}-t)/{fd},1))'"
+        )
+    end2 = min(duration_sec-0.2, 6.5)
+    # 1. Main captions — inside frame, above bars
+    cap1 = dt(line1,  1.0, 3.5,  "h-190")
+    cap2 = dt(line2,  3.8, end2, "h-190")
+    # 2. "Shop Now" CTA — appears at 4.5s, small, bottom center
+    cta_colors={"premium":"FF9900","energetic":"FF4444","fun":"AA44FF"}
+    cta  = dt("Shop Now >", 4.5, end2, "h-130", size=32, color=cta_colors.get(style,"FF9900"), box_alpha="0.70")
+    # 3. Hashtag top-left — appears early
+    tag  = dt("#NewCollection", 0.5, 3.0, "60", size=28, color="FFFFFF", box_alpha="0.40")
+    vf = ",".join([cap1, cap2, cta, tag])
+    ret=os.system(f'ffmpeg -y -i "{video_path}" -vf "{vf}" -c:a copy "{out}" -loglevel error')
+    return out if (ret==0 and os.path.exists(out)) else video_path
+# ══════════════════════════════════════════════════════════════════
+#  AUDIO — BGM + optional TTS
+# ══════════════════════════════════════════════════════════════════
+def make_bgm(duration_sec, out_path, style="premium"):
+    import wave
+    sr=44100; n=int(sr*duration_sec)
+    t=np.linspace(0,duration_sec,n,endpoint=False)
+    bpm={"premium":88,"energetic":126,"fun":104}.get(style,88)
+    beat=60./bpm
+    kick=np.zeros(n,np.float32)
+    for i in range(int(duration_sec/beat)+2):
+        s=int(i*beat*sr)
+        if s>=n: break
+        l=min(int(sr*.10),n-s)
+        env=np.exp(-20*np.arange(l)/sr)
+        kick[s:s+l]+=env*np.sin(2*math.pi*55*np.exp(-25*np.arange(l)/sr)*np.arange(l)/sr)*0.55
+    bass_f={"premium":55,"energetic":80,"fun":65}.get(style,55)
+    bass=np.sin(2*math.pi*bass_f*t)*0.10*(0.5+0.5*np.sin(2*math.pi*(bpm/60/4)*t))
+    mf={"premium":[261,329,392],"energetic":[330,415,494],"fun":[392,494,587]}.get(style,[261,329,392])
+    mel=np.zeros(n,np.float32)
+    for j,f in enumerate(mf):
+        env=np.clip(0.5+0.5*np.sin(2*math.pi*1.5*t-j*2.1),0,1)
+        mel+=np.sin(2*math.pi*f*t)*env*0.045
+    hat=np.zeros(n,np.float32)
+    hs=beat/2
+    for i in range(int(duration_sec/hs)+2):
+        s=int(i*hs*sr)
+        if s>=n: break
+        l=min(int(sr*.03),n-s)
+        hat[s:s+l]+=np.random.randn(l)*np.exp(-80*np.arange(l)/sr)*0.06
+    mix=np.clip((kick+bass+mel+hat)*0.18,-1,1)
+    fade=int(sr*.5); mix[:fade]*=np.linspace(0,1,fade); mix[-fade:]*=np.linspace(1,0,fade)
+    with wave.open(out_path,"w") as wf:
+        wf.setnchannels(1); wf.setsampwidth(2); wf.setframerate(sr)
+        wf.writeframes((mix*32767).astype(np.int16).tobytes())
+def add_audio(video_path, caption, duration_sec, style):
+    bgm=video_path.replace(".mp4","_bgm.wav")
+    final=video_path.replace(".mp4","_final.mp4")
+    make_bgm(duration_sec, bgm, style)
+    # Try TTS voiceover
+    audio=bgm
     try:
+        from gtts import gTTS
+        tts_mp3=video_path.replace(".mp4","_tts.mp3")
+        tts_wav=video_path.replace(".mp4","_tts.wav")
+        gTTS(text=caption[:200],lang="en",slow=False).save(tts_mp3)
+        mixed=video_path.replace(".mp4","_mix.wav")
+        os.system(f'ffmpeg -y -i "{bgm}" -i "{tts_mp3}" '
+                  f'-filter_complex "[0]volume=0.20[a];[1]volume=0.95[b];[a][b]amix=inputs=2:duration=first" '
+                  f'-t {duration_sec} "{mixed}" -loglevel error')
+        if os.path.exists(mixed): audio=mixed
+    except Exception as e: print(f"  TTS skip: {e}")
+    os.system(f'ffmpeg -y -i "{video_path}" -i "{audio}" '
+              f'-c:v copy -c:a aac -b:a 128k -shortest "{final}" -loglevel error')
+    return final if os.path.exists(final) else video_path
+# ══════════════════════════════════════════════════════════════════
+#  AI BRAIN — Captions, Posting Time, Target Audience
+# ══════════════════════════════════════════════════════════════════
+POSTING_TIMES = {
+    "Fashion":     {"slots":["7:00 AM","12:00 PM","6:00 PM","9:00 PM"],"best":"9:00 PM","days":"Tue, Thu, Fri"},
+    "Food":        {"slots":["11:00 AM","1:00 PM","7:00 PM"],"best":"12:00 PM","days":"Mon, Wed, Sat"},
+    "Tech":        {"slots":["8:00 AM","12:00 PM","5:00 PM"],"best":"8:00 AM","days":"Mon, Tue, Wed"},
+    "Beauty":      {"slots":["8:00 AM","1:00 PM","8:00 PM"],"best":"8:00 PM","days":"Wed, Fri, Sun"},
+    "Fitness":     {"slots":["6:00 AM","12:00 PM","7:00 PM"],"best":"6:00 AM","days":"Mon, Wed, Fri"},
+    "Lifestyle":   {"slots":["9:00 AM","2:00 PM","7:00 PM"],"best":"7:00 PM","days":"Thu, Fri, Sat"},
+    "Product/Other":{"slots":["10:00 AM","3:00 PM","8:00 PM"],"best":"8:00 PM","days":"Tue, Thu, Sat"},
+}
+AUDIENCES = {
+    "Fashion":    "👗 18-35 yo females, fashion lovers, Instagram scrollers, trend followers",
+    "Food":       "🍕 18-45 yo foodies, home cooks, restaurant goers, food bloggers",
+    "Tech":       "💻 20-40 yo tech enthusiasts, early adopters, gadget buyers, professionals",
+    "Beauty":     "💄 16-35 yo beauty lovers, skincare fans, makeup artists, self-care community",
+    "Fitness":    "💪 18-40 yo gym goers, health-conscious buyers, athletes, wellness seekers",
+    "Lifestyle":  "🌿 22-40 yo aspirational buyers, aesthetic lovers, home decor fans",
+    "Product/Other":"🛍️ 18-45 yo online shoppers, deal hunters, value-conscious buyers",
+}
+CAPTION_TEMPLATES = {
+    "English": {
+        "Premium":   ["{cap} ✨ Quality that speaks for itself. 🛒 Shop Now → Link in bio",
+                      "Elevate your style. {cap} 💫 DM us to order!"],
+        "Energetic": ["🔥 {cap} Hit different. Grab yours NOW 👆 Limited stock!",
+                      "⚡ Game changer alert! {cap} Don't sleep on this 🚀"],
+        "Fun":       ["Obsessed with this!! 😍 {cap} Tag someone who needs it 👇",
+                      "POV: You just found your new fav 🎉 {cap} Link in bio!"],
+    },
+    "Hindi": {
+        "Premium":   ["{cap} ✨ क्वालिटी जो बोलती है। 🛒 अभी खरीदें → Bio में link",
+                      "अपना स्टाइल बढ़ाएं। {cap} 💫 Order के लिए DM करें!"],
+        "Energetic": ["🔥 {cap} एकदम अलग है! अभी grab करो 👆 Limited stock!",
+                      "⚡ Game changer! {cap} मत सोचो, order करो 🚀"],
+        "Fun":       ["इसके साथ तो दीवाने हो जाओगे!! 😍 {cap} किसी को tag करो 👇",
+                      "POV: नया favourite मिल गया 🎉 {cap} Bio में link है!"],
+    },
+    "Hinglish": {
+        "Premium":   ["{cap} ✨ Quality toh dekho yaar! 🛒 Shop karo → Bio mein link",
+                      "Style upgrade time! {cap} 💫 DM karo order ke liye!"],
+        "Energetic": ["🔥 {cap} Bilkul alag hai bhai! Abhi lo 👆 Limited stock!",
+                      "⚡ Ek dum fire hai! {cap} Mat ruko, order karo 🚀"],
+        "Fun":       ["Yaar yeh toh kamaal hai!! 😍 {cap} Kisi ko tag karo 👇",
+                      "POV: Naya fav mil gaya 🎉 {cap} Bio mein link hai!"],
+    },
+}
+def detect_category(caption):
+    cap_low = caption.lower()
+    if any(w in cap_low for w in ["shoe","sneaker","dress","outfit","wear","fashion","style","cloth","jeans","kurta"]):
+        return "Fashion"
+    if any(w in cap_low for w in ["food","eat","recipe","cook","restaurant","cafe","pizza","biryani"]):
+        return "Food"
+    if any(w in cap_low for w in ["phone","laptop","tech","gadget","device","app","software","camera"]):
+        return "Tech"
+    if any(w in cap_low for w in ["skin","beauty","makeup","lipstick","cream","hair","glow","face"]):
+        return "Beauty"
+    if any(w in cap_low for w in ["gym","fit","workout","protein","yoga","health","run","sport"]):
+        return "Fitness"
+    if any(w in cap_low for w in ["home","decor","interior","lifestyle","aesthetic","plant","candle"]):
+        return "Lifestyle"
+    return "Product/Other"
+def get_smart_insights(caption, style, language):
+    import random, re
+    category = detect_category(caption)
+    pt = POSTING_TIMES[category]
+    audience = AUDIENCES[category]
+    # Generate caption in selected language
+    templates = CAPTION_TEMPLATES.get(language, CAPTION_TEMPLATES["English"])
+    style_templates = templates.get(style, templates["Premium"])
+    clean_cap = re.sub(r"[^A-Za-z0-9 !.,'-ऀ-ॿ]","",caption).strip()
+    generated_cap = random.choice(style_templates).replace("{cap}", clean_cap)
+    # Build insight card
+    insight = f"""📊 SMART INSIGHTS
+━━━━━━━━━━━━━━━━━━━━━━
+🎯 Category Detected: {category}
+👥 Target Audience:
+{audience}
+⏰ Best Time to Post:
+🏆 Prime Slot: {pt['best']}
+📅 Best Days: {pt['days']}
+🕐 All Good Times: {', '.join(pt['slots'])}
+💬 AI Caption ({language}):
+{generated_cap}
+#️⃣ Suggested Hashtags:
+#{category.replace('/','').replace(' ','')} #Trending #NewCollection #MustHave #ShopNow #Viral #Reels #ForYou
+━━━━━━━━━━━━━━━━━━━━━━"""
+    return insight, generated_cap
+# ══════════════════════════════════════════════════════════════════
+#  MAIN
+# ══════════════════════════════════════════════════════════════════
+def generate(image, caption, style, language, add_aud, add_cap, progress=gr.Progress()):
+    if image is None: return None,"⚠️ Upload an image!","Upload image first!"
+    pil=image if isinstance(image,Image.Image) else Image.fromarray(image)
+    cap=caption.strip() or "Premium Quality. Shop Now."
+    prompt=f"cinematic product ad, {cap}, smooth motion, dramatic lighting"
+    lines=[]
+    def log(msg): lines.append(msg); progress(min(.1+len(lines)*.10,.80),desc=msg)
+    # Get smart insights first (instant)
+    insight, ai_caption = get_smart_insights(cap, style, language)
+    progress(.05,desc="🚀 Generating video...")
+    video_path, model_used = get_video(pil, prompt, cb=log)
+    dur=6
+    # Use AI caption for video if captions enabled
+    video_caption = ai_caption if language != "English" else cap
+    if add_cap:
+        log("💬 Adding captions...")
+        video_path=add_captions_ffmpeg(video_path, video_caption, dur, style.lower())
+    if add_aud:
+        log("🎵 Adding music + voice...")
+        video_path=add_audio(video_path, cap, dur, style.lower())
+    progress(1.0,desc="✅ Done!")
+    return video_path, "\n".join(lines)+f"\n\n✅ Used: {model_used}", insight
+# ── UI ────────────────────────────────────────────────────────────
+css="""
+#title{text-align:center;font-size:2.3rem;font-weight:900}
+#sub{text-align:center;color:#888;margin-bottom:1.5rem}
+.insight{font-family:monospace;font-size:.88rem;line-height:1.7}
 """
+with gr.Blocks(css=css,theme=gr.themes.Soft(primary_hue="violet")) as demo:
+    gr.Markdown("# 🎬 AI Reel Generator",elem_id="title")
+    gr.Markdown("Image → AI video + smart captions + posting strategy",elem_id="sub")
     with gr.Row():
+        # ── LEFT ──────────────────────────────────────────────────
         with gr.Column(scale=1):
+            img_in  = gr.Image(label="📸 Upload Image",type="pil",height=280)
+            cap_in  = gr.Textbox(label="✏️ Your Caption / Product Description",
+                                  value="Step into style. Own the moment.",lines=2)
+            with gr.Row():
+                sty_dd  = gr.Dropdown(["Premium","Energetic","Fun"],value="Premium",label="🎨 Style")
+                lang_dd = gr.Dropdown(["English","Hindi","Hinglish"],value="English",label="🌐 Language")
             with gr.Row():
+                aud_cb = gr.Checkbox(label="🎵 Music + Voice",value=True)
+                cap_cb = gr.Checkbox(label="💬 Captions",     value=True)
+            gen_btn = gr.Button("🚀 Generate Reel + Insights",variant="primary",size="lg")
+            gr.Markdown("**🔗 Chain:** LTX-2 ⚡ → Wan 2.2 → SVD-XT → Kling → LTX-Video → Ken Burns ✅")
+        # ── RIGHT ─────────────────────────────────────────────────
         with gr.Column(scale=1):
+            vid_out     = gr.Video(label="🎥 Reel",height=420)
+            insight_out = gr.Textbox(label="📊 Smart Insights — Audience + Posting Time + AI Caption",
+                                      lines=18, interactive=False, elem_classes="insight")
+            log_out     = gr.Textbox(label="🔧 Log",lines=3,interactive=False)
     gen_btn.click(
+        fn=generate,
+        inputs=[img_in,cap_in,sty_dd,lang_dd,aud_cb,cap_cb],
+        outputs=[vid_out,log_out,insight_out],
     )
+if __name__=="__main__":
     demo.launch()