Spaces:

GLAkavya
/

ADgeineAI

Sleeping

App Files Files Community

GLAkavya commited on 13 days ago

Commit

0f46b8d

verified ·

1 Parent(s): 7aad255

Update app.py

Browse files

Files changed (1) hide show

app.py +246 -386

app.py CHANGED Viewed

@@ -1,29 +1,31 @@
-import os, tempfile, io, math, time, threading, base64, requests
 import numpy as np
 import cv2
 import gradio as gr
-from PIL import Image, ImageFilter, ImageEnhance, ImageDraw, ImageFont
 # ── TOKENS ────────────────────────────────────────────────────────
-FAL_KEY  = (os.environ.get("FAL_KEY","")  or os.environ.get("FAL_API_KEY","")).strip()
-HF_TOKEN = (os.environ.get("HF_TOKEN","") or os.environ.get("HF_KEY","")).strip()
 hf_client = None
-if HF_TOKEN:
     try:
         from huggingface_hub import login, InferenceClient
-        login(token=HF_TOKEN); hf_client = InferenceClient(token=HF_TOKEN)
         print("✅ HF ready")
     except Exception as e: print(f"⚠️ HF: {e}")
-if FAL_KEY: os.environ["FAL_KEY"] = FAL_KEY; print("✅ fal.ai ready")
-# ── HELPERS ───────────────────────────────────────────────────────
-def pil_to_bytes(img, q=92):
-    b=io.BytesIO(); img.save(b,format="JPEG",quality=q); return b.getvalue()
-def save_bytes(data):
-    t=tempfile.NamedTemporaryFile(suffix=".mp4",delete=False)
-    t.write(data); t.flush(); return t.name
 def run_timeout(fn, sec, *a, **kw):
     box=[None]; err=[None]
@@ -31,424 +33,282 @@ def run_timeout(fn, sec, *a, **kw):
         try: box[0]=fn(*a,**kw)
         except Exception as e: err[0]=str(e)
     t=threading.Thread(target=r,daemon=True); t.start(); t.join(timeout=sec)
-    if t.is_alive(): print(f"  ⏱ timeout {sec}s"); return None
     if err[0]: print(f"  ❌ {err[0][:80]}")
     return box[0]
-def ease(t):
-    t=max(0.,min(1.,t)); return t*t*(3-2*t)
-def ease_cubic(t):
-    t=max(0.,min(1.,t)); return 4*t*t*t if t<.5 else 1-math.pow(-2*t+2,3)/2
-def ease_expo(t):
-    return 1-math.pow(2,-10*t) if t<1 else 1.
-# ── FAL + HF CHAIN ────────────────────────────────────────────────
-def try_fal(pil_image, prompt):
-    if not FAL_KEY: return None
-    try:
-        import fal_client
-        url = fal_client.upload_image(pil_image)
-        r = fal_client.run("fal-ai/ltx-video/image-to-video", arguments={
-            "image_url": url, "prompt": prompt,
-            "num_frames": 121, "fps": 24, "guidance_scale": 3.5,
-            "num_inference_steps": 30,
-        })
-        vurl = r.get("video",{}).get("url") or r.get("video_url")
-        if vurl:
-            resp = requests.get(vurl, timeout=60)
-            if resp.status_code==200: return resp.content
-    except Exception as e: print(f"  ❌ fal: {e}")
-    return None
-def try_hf(pil_image, prompt):
     if not hf_client: return None
     try:
-        r = hf_client.image_to_video(image=pil_to_bytes(pil_image),
-                                      model="Lightricks/LTX-2", prompt=prompt)
         return r.read() if hasattr(r,"read") else r
-    except Exception as e: print(f"  ❌ HF: {e}")
-    return None
-def get_ai_video(pil, prompt, cb=None):
-    for name, fn, sec in [
-        ("🤖 fal.ai LTX", try_fal, 90),
-        ("🤖 HF LTX-2",   try_hf,  60),
-    ]:
-        if cb: cb(f"⏳ {name}...")
-        r = run_timeout(fn, sec, pil, prompt)
-        if r: return save_bytes(r), name
-    return None, "local"
 # ══════════════════════════════════════════════════════════════════
-#  CINEMATIC ENGINE  — image ALWAYS visible
 # ══════════════════════════════════════════════════════════════════
-def prep_image(pil, W, H):
-    """Resize with smart crop — NO quality loss, NO black bars."""
-    img = pil.convert("RGB")
-    sw, sh = img.size
-    # Crop to target ratio
-    tr = W/H
-    if sw/sh > tr:
-        nw = int(sh*tr); img = img.crop(((sw-nw)//2, 0, (sw-nw)//2+nw, sh))
-    else:
-        nh = int(sw/tr); img = img.crop((0, (sh-nh)//2, sw, (sh-nh)//2+nh))
-    img = img.resize((W, H), Image.LANCZOS)
-    # Gentle sharpening only
-    img = img.filter(ImageFilter.UnsharpMask(radius=0.8, percent=110, threshold=3))
-    img = ImageEnhance.Contrast(img).enhance(1.05)
-    img = ImageEnhance.Color(img).enhance(1.08)
-    return np.array(img)
-def grade(frame, style):
-    """Subtle color grade — won't darken image."""
-    f = frame.astype(np.float32) / 255.0
-    if style == "premium":
-        # Slight warm highlights, cool shadows — VERY subtle
-        f[:,:,0] = np.clip(f[:,:,0] * 1.03 + 0.01, 0, 1)
-        f[:,:,2] = np.clip(f[:,:,2] * 1.02, 0, 1)
-    elif style == "energetic":
-        # Slight saturation boost
-        gray = 0.299*f[:,:,0:1] + 0.587*f[:,:,1:2] + 0.114*f[:,:,2:3]
-        f = np.clip(gray + 1.25*(f-gray), 0, 1)
-        f = np.clip(f * 1.05, 0, 1)
-    elif style == "fun":
-        f[:,:,0] = np.clip(f[:,:,0] * 1.06, 0, 1)
-        f[:,:,1] = np.clip(f[:,:,1] * 1.03, 0, 1)
-    return np.clip(f*255, 0, 255).astype(np.uint8)
-def soft_vignette(frame):
-    """Very subtle vignette — only darkens extreme edges."""
-    h, w = frame.shape[:2]
-    Y, X = np.ogrid[:h, :w]
-    dist = np.sqrt(((X-w/2)/(w/2))**2 + ((Y-h/2)/(h/2))**2)
-    # Only kicks in after 0.85 from center — very gentle
-    mask = np.clip(1.0 - 0.30 * np.maximum(dist - 0.85, 0)**2, 0, 1)
-    return np.clip(frame.astype(np.float32)*mask[:,:,None], 0, 255).astype(np.uint8)
-class Bokeh:
-    def __init__(self, W, H, style):
-        self.W, self.H = W, H
-        cols = {"premium":[(255,220,100),(180,160,255)],
-                "energetic":[(80,180,255),(255,80,80)],
-                "fun":[(255,150,200),(150,255,180)]}
-        c = cols.get(style, cols["premium"])
-        self.p = [{
-            "x": np.random.uniform(0,W), "y": np.random.uniform(0,H),
-            "r": np.random.uniform(5,20),
-            "a": np.random.uniform(0.06, 0.20),   # very transparent
-            "vx": np.random.uniform(-0.2,0.2),
-            "vy": np.random.uniform(-0.5,-0.05),
-            "col": c[np.random.randint(len(c))],
-            "ph": np.random.uniform(0, math.pi*2),
-        } for _ in range(18)]
-    def draw(self, frame, t_sec):
-        ov = frame.astype(np.float32)
-        for p in self.p:
-            px = int(p["x"] + p["vx"]*t_sec*50 + math.sin(t_sec*1.5+p["ph"])*6) % self.W
-            py = int((p["y"] + p["vy"]*t_sec*50) % self.H)
-            r  = max(3, int(p["r"] * (0.8+0.2*math.sin(t_sec*2+p["ph"]))))
-            a  = p["a"] * (0.7+0.3*math.sin(t_sec*2+p["ph"]))
-            tmp = np.zeros_like(ov)
-            cv2.circle(tmp, (px,py), r, p["col"], -1)
-            # soft glow: blur the circle
-            tmp_blurred = cv2.GaussianBlur(tmp, (r|1, r|1), r/2)
-            ov = ov*(1-a) + tmp_blurred.astype(np.float32)*a
-        return np.clip(ov, 0, 255).astype(np.uint8)
-def get_font(size):
-    for p in ["/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
-              "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
-              "/usr/share/fonts/truetype/freefont/FreeSansBold.ttf"]:
-        if os.path.exists(p):
-            try: return ImageFont.truetype(p, size)
-            except: pass
-    return ImageFont.load_default()
-def draw_caption(frame_np, text, anim_t, W, H, style):
-    """Animated caption — slide up from bottom."""
-    if not text.strip(): return frame_np
-    pil = Image.fromarray(frame_np).convert("RGBA")
-    overlay = Image.new("RGBA", pil.size, (0,0,0,0))
-    draw = ImageDraw.Draw(overlay)
-    font = get_font(max(30, W//20))
-    # Word wrap
-    words = text.split(); lines = []; line = ""
-    for w in words:
-        test = (line+" "+w).strip()
-        try: bbox = font.getbbox(test)
-        except: bbox = (0,0,len(test)*18,30)
-        if bbox[2] > W*0.82 and line:
-            lines.append(line); line = w
-        else: line = test
-    if line: lines.append(line)
-    lh = max(36, W//18)
-    total_h = len(lines)*lh + 24
-    base_y = H - total_h - 80
-    # Slide up animation
-    slide = ease_expo(min(anim_t/0.5, 1.0))
-    offset = int((1-slide)*50)
-    alpha = int(min(anim_t/0.4, 1.0) * 255)
-    txt_colors = {"premium":(255,210,60),"energetic":(60,200,255),"fun":(255,100,180)}
-    txt_col = txt_colors.get(style, (255,255,255))
-    for i, ln in enumerate(lines):
-        try: bbox = font.getbbox(ln); tw = bbox[2]-bbox[0]
-        except: tw = len(ln)*18
-        tx = (W-tw)//2; ty = base_y + i*lh + offset
-        # Background pill
-        pad = 14
-        draw.rounded_rectangle([tx-pad, ty-6, tx+tw+pad, ty+lh+4],
-                                radius=12, fill=(0,0,0,min(170,alpha)))
-        # Shadow
-        draw.text((tx+2, ty+2), ln, font=font, fill=(0,0,0,min(200,alpha)))
-        # Text
-        r,g,b = txt_col
-        draw.text((tx, ty), ln, font=font, fill=(r,g,b,alpha))
-    combined = Image.alpha_composite(pil, overlay)
-    return np.array(combined.convert("RGB"))
 def make_bgm(duration_sec, out_path, style="premium"):
     import wave
-    sr = 44100; n = int(sr*duration_sec)
-    t  = np.linspace(0, duration_sec, n, endpoint=False)
-    bpm = {"premium":90, "energetic":128, "fun":105}.get(style, 90)
-    beat = 60./bpm
-    # Kick
-    kick = np.zeros(n, np.float32)
     for i in range(int(duration_sec/beat)+2):
-        s = int(i*beat*sr)
-        if s >= n: break
-        l = min(int(sr*.10), n-s)
-        env = np.exp(-20*np.arange(l)/sr)
-        kick[s:s+l] += env * np.sin(2*math.pi*55*np.exp(-30*np.arange(l)/sr)*np.arange(l)/sr) * 0.6
-    # Bassline
-    bass_f = {"premium":55,"energetic":80,"fun":65}.get(style,55)
-    bass = np.sin(2*math.pi*bass_f*t)*0.12*(0.5+0.5*np.sin(2*math.pi*(bpm/60/4)*t))
-    # Melody
-    mel_freqs = {"premium":[261,329,392],"energetic":[330,415,494],"fun":[392,494,587]}.get(style,[261,329,392])
-    mel = np.zeros(n, np.float32)
-    for j,f in enumerate(mel_freqs):
-        env = np.clip(0.5+0.5*np.sin(2*math.pi*1.5*t - j*2.1), 0, 1)
-        mel += np.sin(2*math.pi*f*t)*env*0.05
-    # Hi-hat
-    hat = np.zeros(n, np.float32)
-    hs  = beat/2
     for i in range(int(duration_sec/hs)+2):
-        s = int(i*hs*sr)
-        if s >= n: break
-        l = min(int(sr*.03), n-s)
-        hat[s:s+l] += np.random.randn(l)*np.exp(-80*np.arange(l)/sr)*0.07
-    mix = np.clip((kick+bass+mel+hat)*0.20, -1, 1)
-    fade = int(sr*.4)
-    mix[:fade] *= np.linspace(0,1,fade)
-    mix[-fade:] *= np.linspace(1,0,fade)
     with wave.open(out_path,"w") as wf:
         wf.setnchannels(1); wf.setsampwidth(2); wf.setframerate(sr)
         wf.writeframes((mix*32767).astype(np.int16).tobytes())
-    return True
-def add_audio_to_video(video_path, style, duration_sec, caption):
-    bgm  = video_path.replace(".mp4","_bgm.wav")
-    final= video_path.replace(".mp4","_final.mp4")
     make_bgm(duration_sec, bgm, style)
-    # Try TTS
-    tts_ok = False
-    tts = video_path.replace(".mp4","_tts.mp3")
     try:
         from gtts import gTTS
-        gTTS(text=caption[:180], lang="en", slow=False).save(tts)
-        # Mix tts(loud) + bgm(soft)
-        mixed = video_path.replace(".mp4","_mix.wav")
-        os.system(f'ffmpeg -y -i "{bgm}" -i "{tts}" '
-                  f'-filter_complex "[0]volume=0.22[a];[1]volume=1.0[b];[a][b]amix=inputs=2:duration=first" '
                   f'-t {duration_sec} "{mixed}" -loglevel error')
-        if os.path.exists(mixed): bgm = mixed; tts_ok = True
-    except: pass
-    os.system(f'ffmpeg -y -i "{video_path}" -i "{bgm}" '
               f'-c:v copy -c:a aac -b:a 128k -shortest "{final}" -loglevel error')
     return final if os.path.exists(final) else video_path
-# ── MAIN RENDER ───────────────────────────────────────────────────
-def render(pil, caption="Step into style.", style="premium",
-           duration_sec=7, fps=30, add_audio=True, add_caption=True, add_bokeh=True):
-    TW, TH = 720, 1280
-    PAD    = 160                        # extra canvas for zoom
-    BW, BH = TW+PAD*2, TH+PAD*2
-    base   = prep_image(pil, BW, BH)   # large canvas — FULL COLOR image
-    total  = duration_sec * fps
-    bokeh  = Bokeh(TW, TH, style) if add_bokeh else None
-    # Motion: gentle zoom + pan — no 3D, no warp
-    SEGS = [
-        (0.00, 0.22,  1.30, 1.12,  0,           -int(PAD*.07),  0,           -int(PAD*.08)),
-        (0.22, 0.52,  1.12, 1.07, -int(PAD*.04),  int(PAD*.06), -int(PAD*.08),-int(PAD*.22)),
-        (0.52, 0.78,  1.07, 1.03,  int(PAD*.06),  int(PAD*.13), -int(PAD*.22),-int(PAD*.12)),
-        (0.78, 1.00,  1.03, 1.00,  int(PAD*.13),  0,            -int(PAD*.12), 0),
-    ]
-    tmp    = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
-    writer = cv2.VideoWriter(tmp.name, cv2.VideoWriter_fourcc(*"mp4v"), fps, (TW,TH))
-    # Caption segments
-    cap_words = caption.strip().split()
-    mid = max(1, len(cap_words)//2)
-    cap_segs = [
-        (1.0, 3.5,  " ".join(cap_words[:mid])),
-        (3.8, 6.5,  " ".join(cap_words[mid:]) or " ".join(cap_words)),
-    ]
-    for i in range(total):
-        tg = i / max(total-1, 1)
-        # Get motion params
-        zoom = pan_x = pan_y = None
-        for (t0,t1,z0,z1,px0,px1,py0,py1) in SEGS:
-            if t0 <= tg <= t1:
-                te = ease_cubic((tg-t0)/(t1-t0))
-                zoom  = z0+(z1-z0)*te
-                pan_x = int(px0+(px1-px0)*te)
-                pan_y = int(py0+(py1-py0)*te)
-                break
-        if zoom is None: zoom,pan_x,pan_y = 1.0,0,0
-        # Micro camera shake (first 15%)
-        if tg < 0.15:
-            s = (0.15-tg)/0.15 * 1.8
-            pan_x += int(s*math.sin(i*1.5))
-            pan_y += int(s*math.cos(i*1.1))
-        # Crop from big canvas
-        cw, ch = int(TW/zoom), int(TH/zoom)
-        cx, cy = BW//2+pan_x, BH//2+pan_y
-        x1 = max(0, cx-cw//2); y1 = max(0, cy-ch//2)
-        x2 = min(BW, x1+cw);   y2 = min(BH, y1+ch)
-        if (x2-x1)<20 or (y2-y1)<20: x1,y1,x2,y2=0,0,TW,TH
-        frame = cv2.resize(base[y1:y2,x1:x2], (TW,TH), interpolation=cv2.INTER_LINEAR)
-        # Subtle color grade (won't darken)
-        frame = grade(frame, style)
-        # Soft vignette (barely noticeable)
-        frame = soft_vignette(frame)
-        # Film grain — very light
-        frame = np.clip(frame.astype(np.float32) +
-                        np.random.normal(0, 3.0, frame.shape), 0, 255).astype(np.uint8)
-        # Bokeh on top
-        if bokeh: frame = bokeh.draw(frame, tg*duration_sec)
-        # Cinematic bars — thin
-        frame[:36, :] = 0; frame[-36:, :] = 0
-        # Fade in (first 2%) / out (last 5%)
-        if   tg < 0.02: alpha = ease_expo(tg/0.02)
-        elif tg > 0.95: alpha = ease(1-(tg-0.95)/0.05)
-        else:           alpha = 1.0
-        if alpha < 1.0:
-            frame = np.clip(frame.astype(np.float32)*alpha, 0, 255).astype(np.uint8)
-        # Captions
-        if add_caption:
-            t_sec = tg*duration_sec
-            for (cs,ce,ct) in cap_segs:
-                if cs <= t_sec <= ce:
-                    frame = draw_caption(frame, ct, t_sec-cs, TW, TH, style)
-        writer.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
-    writer.release()
-    if add_audio:
-        return add_audio_to_video(tmp.name, style, duration_sec, caption)
-    return tmp.name
-# ── PIPELINE ──────────────────────────────────────────────────────
-def generate(image, caption, style, add_audio, add_caption, add_bokeh, progress=gr.Progress()):
-    if image is None: return None, "⚠️ Upload an image first!"
-    pil  = image if isinstance(image,Image.Image) else Image.fromarray(image)
-    cap  = caption.strip() or "Premium Quality. Shop Now."
-    prompt = f"cinematic product ad, {cap}, smooth motion, dramatic lighting"
-    lines  = []
-    def log(msg): lines.append(msg); progress(min(.1+len(lines)*.12,.80),desc=msg)
-    progress(.05, desc="🚀 Starting...")
-    ai_path, model = get_ai_video(pil, prompt, cb=log)
-    if ai_path:
-        log(f"✅ AI video: {model}")
-        if add_audio:
-            progress(.85, desc="🎵 Adding music...")
-            ai_path = add_audio_to_video(ai_path, style.lower(), 6, cap)
-        progress(1.0, desc="✅ Done!")
-        return ai_path, "\n".join(lines)+f"\n\n✅ {model}"
-    log("🎬 Cinematic Engine...")
-    progress(.60, desc="🎬 Rendering...")
-    out = render(pil, caption=cap, style=style.lower(),
-                 add_audio=add_audio, add_caption=add_caption, add_bokeh=add_bokeh)
-    progress(1.0, desc="✅ Done!")
-    return out, "\n".join(lines)+"\n\n✅ 🎬 Cinematic Engine"
 # ── UI ────────────────────────────────────────────────────────────
 css="#title{text-align:center;font-size:2.3rem;font-weight:900}#sub{text-align:center;color:#888;margin-bottom:1.5rem}"
-with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue="violet")) as demo:
-    gr.Markdown("# 🎬 AI Reel Generator", elem_id="title")
-    gr.Markdown("Image + caption → cinematic reel with music & captions", elem_id="sub")
     with gr.Row():
         with gr.Column(scale=1):
-            img_in  = gr.Image(label="📸 Upload Image", type="pil", height=280)
-            cap_in  = gr.Textbox(label="✏️ Caption", value="Step into style. Own the moment.", lines=2)
-            sty_dd  = gr.Dropdown(["Premium","Energetic","Fun"], value="Premium", label="🎨 Style")
             with gr.Row():
-                audio_cb  = gr.Checkbox(label="🎵 Music + Voice", value=True)
-                caption_cb= gr.Checkbox(label="💬 Captions",      value=True)
-                bokeh_cb  = gr.Checkbox(label="✨ Bokeh",          value=True)
-            gen_btn = gr.Button("🚀 Generate Reel", variant="primary", size="lg")
-            gr.Markdown("**Chain:** fal.ai LTX → HF LTX-2 → 🎬 Cinematic Engine")
         with gr.Column(scale=1):
-            vid_out = gr.Video(label="🎥 Cinematic Reel", height=500)
-            log_out = gr.Textbox(label="📊 Log", lines=5, interactive=False)
-    gen_btn.click(fn=generate,
-                  inputs=[img_in,cap_in,sty_dd,audio_cb,caption_cb,bokeh_cb],
-                  outputs=[vid_out,log_out])
-if __name__ == "__main__":
     demo.launch()

+import os, tempfile, io, math, time, threading
 import numpy as np
 import cv2
 import gradio as gr
+from PIL import Image, ImageDraw, ImageFont, ImageFilter, ImageEnhance
 # ── TOKENS ────────────────────────────────────────────────────────
+hf_token = (os.environ.get("HF_TOKEN","") or os.environ.get("HF_KEY","")).strip()
 hf_client = None
+if hf_token:
     try:
         from huggingface_hub import login, InferenceClient
+        login(token=hf_token); hf_client = InferenceClient(token=hf_token)
         print("✅ HF ready")
     except Exception as e: print(f"⚠️ HF: {e}")
+# ── HF MODELS ─────────────────────────────────────────────────────
+HF_MODELS = [
+    {"id": "Lightricks/LTX-2",                                 "name": "LTX-2 ⚡"},
+    {"id": "Wan-AI/Wan2.2-I2V-A14B",                           "name": "Wan 2.2"},
+    {"id": "stabilityai/stable-video-diffusion-img2vid-xt",    "name": "SVD-XT"},
+    {"id": "KlingTeam/LivePortrait",                           "name": "Kling LivePortrait"},
+    {"id": "Lightricks/LTX-Video",                             "name": "LTX-Video"},
+    {"id": "__local__",                                        "name": "Ken Burns ✅"},
+]
+def pil_to_bytes(img):
+    b=io.BytesIO(); img.save(b,format="JPEG",quality=92); return b.getvalue()
 def run_timeout(fn, sec, *a, **kw):
     box=[None]; err=[None]
         try: box[0]=fn(*a,**kw)
         except Exception as e: err[0]=str(e)
     t=threading.Thread(target=r,daemon=True); t.start(); t.join(timeout=sec)
+    if t.is_alive(): print(f"  ⏱ timeout"); return None
     if err[0]: print(f"  ❌ {err[0][:80]}")
     return box[0]
+def try_hf(model_id, pil, prompt):
     if not hf_client: return None
     try:
+        r=hf_client.image_to_video(image=pil_to_bytes(pil),model=model_id,prompt=prompt)
         return r.read() if hasattr(r,"read") else r
+    except Exception as e: print(f"  ❌ {model_id}: {e}"); return None
+def get_video(pil, prompt, cb=None):
+    for m in HF_MODELS:
+        mid,mname=m["id"],m["name"]
+        if cb: cb(f"⏳ Trying: {mname}")
+        if mid=="__local__":
+            return ken_burns(pil), mname
+        data=run_timeout(try_hf,50,mid,pil,prompt)
+        if data:
+            t=tempfile.NamedTemporaryFile(suffix=".mp4",delete=False)
+            t.write(data); t.flush()
+            return t.name, mname
+        time.sleep(1)
+    return ken_burns(pil), "Ken Burns"
+# ══════════════════════════════════════════════════════════════════
+#  KEN BURNS  (working, image always shows)
+# ══════════════════════════════════════════════════════════════════
+def ease(t): t=max(0.,min(1.,t)); return t*t*(3-2*t)
+def ease_cubic(t): t=max(0.,min(1.,t)); return 4*t*t*t if t<.5 else 1-math.pow(-2*t+2,3)/2
+def ease_expo(t): return 1-math.pow(2,-10*t) if t<1 else 1.
+def ease_bounce(t):
+    if t<1/2.75: return 7.5625*t*t
+    elif t<2/2.75: t-=1.5/2.75; return 7.5625*t*t+.75
+    elif t<2.5/2.75: t-=2.25/2.75; return 7.5625*t*t+.9375
+    else: t-=2.625/2.75; return 7.5625*t*t+.984375
+def ken_burns(pil, duration_sec=6, fps=30, style="premium"):
+    TW,TH=720,1280; pad=160; BW,BH=TW+pad*2,TH+pad*2
+    total=duration_sec*fps
+    # Prepare image
+    img=pil.convert("RGB"); sw,sh=img.size
+    if sw/sh>TW/TH: nw=int(sh*TW/TH); img=img.crop(((sw-nw)//2,0,(sw-nw)//2+nw,sh))
+    else: nh=int(sw*TH/TW); img=img.crop((0,(sh-nh)//2,sw,(sh-nh)//2+nh))
+    img=img.filter(ImageFilter.UnsharpMask(radius=1.0,percent=120,threshold=2))
+    img=ImageEnhance.Contrast(img).enhance(1.06)
+    img=ImageEnhance.Color(img).enhance(1.10)
+    base=np.array(img.resize((BW,BH),Image.LANCZOS))
+    # Pre-baked vignette mask (subtle)
+    Y,X=np.ogrid[:TH,:TW]
+    dist=np.sqrt(((X-TW/2)/(TW/2))**2+((Y-TH/2)/(TH/2))**2)
+    vmask=np.clip(1.-0.28*np.maximum(dist-0.80,0)**2,0,1).astype(np.float32)
+    SEG=[
+        (0.00,0.25, 1.35,1.12, 0,         -int(pad*.10), 0,          -int(pad*.12)),
+        (0.25,0.55, 1.12,1.07, -int(pad*.05),int(pad*.07),-int(pad*.12),-int(pad*.28)),
+        (0.55,0.78, 1.07,1.04, int(pad*.07),int(pad*.16), -int(pad*.28),-int(pad*.16)),
+        (0.78,1.00, 1.04,1.00, int(pad*.16),0,            -int(pad*.16),0),
+    ]
+    tmp=tempfile.NamedTemporaryFile(suffix=".mp4",delete=False)
+    writer=cv2.VideoWriter(tmp.name,cv2.VideoWriter_fourcc(*"mp4v"),fps,(TW,TH))
+    for i in range(total):
+        tg=i/max(total-1,1)
+        zoom=pan_x=pan_y=None
+        for t0,t1,z0,z1,px0,px1,py0,py1 in SEG:
+            if t0<=tg<=t1:
+                te=ease_cubic((tg-t0)/(t1-t0))
+                zoom=z0+(z1-z0)*te; pan_x=int(px0+(px1-px0)*te); pan_y=int(py0+(py1-py0)*te); break
+        if zoom is None: zoom,pan_x,pan_y=1.,0,0
+        if tg<0.20:
+            s=(0.20-tg)/0.20*1.8
+            pan_x+=int(s*math.sin(i*1.4)); pan_y+=int(s*math.cos(i*1.0))
+        cw,ch=int(TW/zoom),int(TH/zoom)
+        ox,oy=BW//2+pan_x,BH//2+pan_y
+        x1,y1=max(0,ox-cw//2),max(0,oy-ch//2)
+        x2,y2=min(BW,x1+cw),min(BH,y1+ch)
+        if (x2-x1)<10 or (y2-y1)<10: x1,y1,x2,y2=0,0,TW,TH
+        frame=cv2.resize(base[y1:y2,x1:x2],(TW,TH),interpolation=cv2.INTER_LINEAR)
+        # Very subtle color grade
+        f=frame.astype(np.float32)/255.
+        if style=="premium":
+            f[:,:,0]=np.clip(f[:,:,0]*1.03+.01,0,1)
+            f[:,:,2]=np.clip(f[:,:,2]*1.02,0,1)
+        elif style=="energetic":
+            gray=0.299*f[:,:,0:1]+0.587*f[:,:,1:2]+0.114*f[:,:,2:3]
+            f=np.clip(gray+1.2*(f-gray),0,1); f=np.clip(f*1.04,0,1)
+        elif style=="fun":
+            f[:,:,0]=np.clip(f[:,:,0]*1.05,0,1)
+            f[:,:,1]=np.clip(f[:,:,1]*1.03,0,1)
+        frame=np.clip(f*255,0,255).astype(np.uint8)
+        # Vignette
+        frame=np.clip(frame.astype(np.float32)*vmask[:,:,None],0,255).astype(np.uint8)
+        # Grain
+        frame=np.clip(frame.astype(np.float32)+np.random.normal(0,3,frame.shape),0,255).astype(np.uint8)
+        # Bars
+        frame[:36,:]=0; frame[-36:,:]=0
+        # Fade in (2%) / out (5%)
+        if tg<0.02: alpha=ease_expo(tg/0.02)
+        elif tg>0.95: alpha=ease(1-(tg-0.95)/0.05)
+        else: alpha=1.
+        if alpha<1.: frame=np.clip(frame.astype(np.float32)*alpha,0,255).astype(np.uint8)
+        writer.write(cv2.cvtColor(frame,cv2.COLOR_RGB2BGR))
+    writer.release()
+    return tmp.name
 # ══════════════════════════════════════════════════════════════════
+#  CAPTIONS — burn into existing video via ffmpeg
 # ══════════════════════════════════════════════════════════════════
+def add_captions_ffmpeg(video_path, caption, duration_sec, style):
+    """Burn animated captions using ffmpeg drawtext."""
+    words=caption.strip().split()
+    mid=max(1,len(words)//2)
+    line1=" ".join(words[:mid])
+    line2=" ".join(words[mid:]) if len(words)>1 else line1
+    colors={"premium":"FFD232","energetic":"3CC8FF","fun":"FF78C8"}
+    col=colors.get(style,"FFFFFF")
+    # ffmpeg drawtext with fade-in animation
+    # line1: shows 1.0s → 3.5s, line2: 3.8s → 6.5s
+    out=video_path.replace(".mp4","_cap.mp4")
+    font_paths=[
+        "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
+        "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
+        "/usr/share/fonts/truetype/freefont/FreeSansBold.ttf",
+    ]
+    font=""
+    for p in font_paths:
+        if os.path.exists(p): font=f":fontfile='{p}'"; break
+    def drawtext(text, start, end):
+        fade_dur=0.4
+        return (
+            f"drawtext=text='{text}'{font}"
+            f":fontsize=44:fontcolor=#{col}"
+            f":x=(w-text_w)/2:y=h-130"
+            f":box=1:boxcolor=black@0.55:boxborderw=14"
+            f":enable='between(t,{start},{end})'"
+            f":alpha='if(lt(t,{start+fade_dur}),(t-{start})/{fade_dur},"
+            f"if(gt(t,{end-fade_dur}),({end}-t)/{fade_dur},1))'"
+        )
+    vf=f"{drawtext(line1,1.0,3.5)},{drawtext(line2,3.8,min(6.5,duration_sec-0.3))}"
+    ret=os.system(f'ffmpeg -y -i "{video_path}" -vf "{vf}" -c:a copy "{out}" -loglevel error')
+    return out if (ret==0 and os.path.exists(out)) else video_path
+# ══════════════════════════════════════════════════════════════════
+#  AUDIO — BGM + optional TTS
+# ══════════════════════════════════════════════════════════════════
 def make_bgm(duration_sec, out_path, style="premium"):
     import wave
+    sr=44100; n=int(sr*duration_sec)
+    t=np.linspace(0,duration_sec,n,endpoint=False)
+    bpm={"premium":88,"energetic":126,"fun":104}.get(style,88)
+    beat=60./bpm
+    kick=np.zeros(n,np.float32)
     for i in range(int(duration_sec/beat)+2):
+        s=int(i*beat*sr)
+        if s>=n: break
+        l=min(int(sr*.10),n-s)
+        env=np.exp(-20*np.arange(l)/sr)
+        kick[s:s+l]+=env*np.sin(2*math.pi*55*np.exp(-25*np.arange(l)/sr)*np.arange(l)/sr)*0.55
+    bass_f={"premium":55,"energetic":80,"fun":65}.get(style,55)
+    bass=np.sin(2*math.pi*bass_f*t)*0.10*(0.5+0.5*np.sin(2*math.pi*(bpm/60/4)*t))
+    mf={"premium":[261,329,392],"energetic":[330,415,494],"fun":[392,494,587]}.get(style,[261,329,392])
+    mel=np.zeros(n,np.float32)
+    for j,f in enumerate(mf):
+        env=np.clip(0.5+0.5*np.sin(2*math.pi*1.5*t-j*2.1),0,1)
+        mel+=np.sin(2*math.pi*f*t)*env*0.045
+    hat=np.zeros(n,np.float32)
+    hs=beat/2
     for i in range(int(duration_sec/hs)+2):
+        s=int(i*hs*sr)
+        if s>=n: break
+        l=min(int(sr*.03),n-s)
+        hat[s:s+l]+=np.random.randn(l)*np.exp(-80*np.arange(l)/sr)*0.06
+    mix=np.clip((kick+bass+mel+hat)*0.18,-1,1)
+    fade=int(sr*.5); mix[:fade]*=np.linspace(0,1,fade); mix[-fade:]*=np.linspace(1,0,fade)
     with wave.open(out_path,"w") as wf:
         wf.setnchannels(1); wf.setsampwidth(2); wf.setframerate(sr)
         wf.writeframes((mix*32767).astype(np.int16).tobytes())
+def add_audio(video_path, caption, duration_sec, style):
+    bgm=video_path.replace(".mp4","_bgm.wav")
+    final=video_path.replace(".mp4","_final.mp4")
     make_bgm(duration_sec, bgm, style)
+    # Try TTS voiceover
+    audio=bgm
     try:
         from gtts import gTTS
+        tts_mp3=video_path.replace(".mp4","_tts.mp3")
+        tts_wav=video_path.replace(".mp4","_tts.wav")
+        gTTS(text=caption[:200],lang="en",slow=False).save(tts_mp3)
+        mixed=video_path.replace(".mp4","_mix.wav")
+        os.system(f'ffmpeg -y -i "{bgm}" -i "{tts_mp3}" '
+                  f'-filter_complex "[0]volume=0.20[a];[1]volume=0.95[b];[a][b]amix=inputs=2:duration=first" '
                   f'-t {duration_sec} "{mixed}" -loglevel error')
+        if os.path.exists(mixed): audio=mixed
+    except Exception as e: print(f"  TTS skip: {e}")
+    os.system(f'ffmpeg -y -i "{video_path}" -i "{audio}" '
               f'-c:v copy -c:a aac -b:a 128k -shortest "{final}" -loglevel error')
     return final if os.path.exists(final) else video_path
+# ══════════════════════════════════════════════════════════════════
+#  MAIN
+# ══════════════════════════════════════════════════════════════════
+def generate(image, caption, style, add_aud, add_cap, progress=gr.Progress()):
+    if image is None: return None,"⚠️ Upload an image!"
+    pil=image if isinstance(image,Image.Image) else Image.fromarray(image)
+    cap=caption.strip() or "Premium Quality. Shop Now."
+    prompt=f"cinematic product ad, {cap}, smooth motion, dramatic lighting"
+    lines=[]
+    def log(msg): lines.append(msg); progress(min(.1+len(lines)*.10,.80),desc=msg)
+    progress(.05,desc="🚀 Starting...")
+    video_path, model_used = get_video(pil, prompt, cb=log)
+    dur=6
+    # Add captions
+    if add_cap:
+        log("💬 Adding captions...")
+        video_path=add_captions_ffmpeg(video_path, cap, dur, style.lower())
+    # Add audio
+    if add_aud:
+        log("🎵 Adding music + voice...")
+        video_path=add_audio(video_path, cap, dur, style.lower())
+    progress(1.0,desc="✅ Done!")
+    return video_path, "\n".join(lines)+f"\n\n✅ Used: {model_used}"
 # ── UI ────────────────────────────────────────────────────────────
 css="#title{text-align:center;font-size:2.3rem;font-weight:900}#sub{text-align:center;color:#888;margin-bottom:1.5rem}"
+with gr.Blocks(css=css,theme=gr.themes.Soft(primary_hue="violet")) as demo:
+    gr.Markdown("# 🎬 AI Reel Generator",elem_id="title")
+    gr.Markdown("Image → AI video + captions + music",elem_id="sub")
     with gr.Row():
         with gr.Column(scale=1):
+            img_in =gr.Image(label="📸 Upload Image",type="pil",height=300)
+            cap_in =gr.Textbox(label="✏️ Caption",value="Step into style. Own the moment.",lines=2)
+            sty_dd =gr.Dropdown(["Premium","Energetic","Fun"],value="Premium",label="🎨 Style")
             with gr.Row():
+                aud_cb=gr.Checkbox(label="🎵 Music + Voice",value=True)
+                cap_cb=gr.Checkbox(label="💬 Captions",     value=True)
+            gen_btn=gr.Button("🚀 Generate Reel",variant="primary",size="lg")
+            gr.Markdown("**🔗 Chain:** LTX-2 ⚡ → Wan 2.2 → SVD-XT → Kling → LTX-Video → Ken Burns ✅")
         with gr.Column(scale=1):
+            vid_out=gr.Video(label="🎥 Reel",height=500)
+            log_out=gr.Textbox(label="📊 Log",lines=6,interactive=False)
+    gen_btn.click(fn=generate,inputs=[img_in,cap_in,sty_dd,aud_cb,cap_cb],outputs=[vid_out,log_out])
+if __name__=="__main__":
     demo.launch()