GLAkavya commited on
Commit
0f46b8d
Β·
verified Β·
1 Parent(s): 7aad255

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +246 -386
app.py CHANGED
@@ -1,29 +1,31 @@
1
- import os, tempfile, io, math, time, threading, base64, requests
2
  import numpy as np
3
  import cv2
4
  import gradio as gr
5
- from PIL import Image, ImageFilter, ImageEnhance, ImageDraw, ImageFont
6
 
7
  # ── TOKENS ────────────────────────────────────────────────────────
8
- FAL_KEY = (os.environ.get("FAL_KEY","") or os.environ.get("FAL_API_KEY","")).strip()
9
- HF_TOKEN = (os.environ.get("HF_TOKEN","") or os.environ.get("HF_KEY","")).strip()
10
-
11
  hf_client = None
12
- if HF_TOKEN:
13
  try:
14
  from huggingface_hub import login, InferenceClient
15
- login(token=HF_TOKEN); hf_client = InferenceClient(token=HF_TOKEN)
16
  print("βœ… HF ready")
17
  except Exception as e: print(f"⚠️ HF: {e}")
18
- if FAL_KEY: os.environ["FAL_KEY"] = FAL_KEY; print("βœ… fal.ai ready")
19
 
20
- # ── HELPERS ───────────────────────────────────────────────────────
21
- def pil_to_bytes(img, q=92):
22
- b=io.BytesIO(); img.save(b,format="JPEG",quality=q); return b.getvalue()
 
 
 
 
 
 
23
 
24
- def save_bytes(data):
25
- t=tempfile.NamedTemporaryFile(suffix=".mp4",delete=False)
26
- t.write(data); t.flush(); return t.name
27
 
28
  def run_timeout(fn, sec, *a, **kw):
29
  box=[None]; err=[None]
@@ -31,424 +33,282 @@ def run_timeout(fn, sec, *a, **kw):
31
  try: box[0]=fn(*a,**kw)
32
  except Exception as e: err[0]=str(e)
33
  t=threading.Thread(target=r,daemon=True); t.start(); t.join(timeout=sec)
34
- if t.is_alive(): print(f" ⏱ timeout {sec}s"); return None
35
  if err[0]: print(f" ❌ {err[0][:80]}")
36
  return box[0]
37
 
38
- def ease(t):
39
- t=max(0.,min(1.,t)); return t*t*(3-2*t)
40
-
41
- def ease_cubic(t):
42
- t=max(0.,min(1.,t)); return 4*t*t*t if t<.5 else 1-math.pow(-2*t+2,3)/2
43
-
44
- def ease_expo(t):
45
- return 1-math.pow(2,-10*t) if t<1 else 1.
46
-
47
- # ── FAL + HF CHAIN ────────────────────────────────────────────────
48
- def try_fal(pil_image, prompt):
49
- if not FAL_KEY: return None
50
- try:
51
- import fal_client
52
- url = fal_client.upload_image(pil_image)
53
- r = fal_client.run("fal-ai/ltx-video/image-to-video", arguments={
54
- "image_url": url, "prompt": prompt,
55
- "num_frames": 121, "fps": 24, "guidance_scale": 3.5,
56
- "num_inference_steps": 30,
57
- })
58
- vurl = r.get("video",{}).get("url") or r.get("video_url")
59
- if vurl:
60
- resp = requests.get(vurl, timeout=60)
61
- if resp.status_code==200: return resp.content
62
- except Exception as e: print(f" ❌ fal: {e}")
63
- return None
64
-
65
- def try_hf(pil_image, prompt):
66
  if not hf_client: return None
67
  try:
68
- r = hf_client.image_to_video(image=pil_to_bytes(pil_image),
69
- model="Lightricks/LTX-2", prompt=prompt)
70
  return r.read() if hasattr(r,"read") else r
71
- except Exception as e: print(f" ❌ HF: {e}")
72
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
- def get_ai_video(pil, prompt, cb=None):
75
- for name, fn, sec in [
76
- ("πŸ€– fal.ai LTX", try_fal, 90),
77
- ("πŸ€– HF LTX-2", try_hf, 60),
78
- ]:
79
- if cb: cb(f"⏳ {name}...")
80
- r = run_timeout(fn, sec, pil, prompt)
81
- if r: return save_bytes(r), name
82
- return None, "local"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
 
85
  # ══════════════════════════════════════════════════════════════════
86
- # CINEMATIC ENGINE β€” image ALWAYS visible
87
  # ══════════════════════════════════════════════════════════════════
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
- def prep_image(pil, W, H):
90
- """Resize with smart crop β€” NO quality loss, NO black bars."""
91
- img = pil.convert("RGB")
92
- sw, sh = img.size
93
- # Crop to target ratio
94
- tr = W/H
95
- if sw/sh > tr:
96
- nw = int(sh*tr); img = img.crop(((sw-nw)//2, 0, (sw-nw)//2+nw, sh))
97
- else:
98
- nh = int(sw/tr); img = img.crop((0, (sh-nh)//2, sw, (sh-nh)//2+nh))
99
- img = img.resize((W, H), Image.LANCZOS)
100
- # Gentle sharpening only
101
- img = img.filter(ImageFilter.UnsharpMask(radius=0.8, percent=110, threshold=3))
102
- img = ImageEnhance.Contrast(img).enhance(1.05)
103
- img = ImageEnhance.Color(img).enhance(1.08)
104
- return np.array(img)
105
-
106
-
107
- def grade(frame, style):
108
- """Subtle color grade β€” won't darken image."""
109
- f = frame.astype(np.float32) / 255.0
110
- if style == "premium":
111
- # Slight warm highlights, cool shadows β€” VERY subtle
112
- f[:,:,0] = np.clip(f[:,:,0] * 1.03 + 0.01, 0, 1)
113
- f[:,:,2] = np.clip(f[:,:,2] * 1.02, 0, 1)
114
- elif style == "energetic":
115
- # Slight saturation boost
116
- gray = 0.299*f[:,:,0:1] + 0.587*f[:,:,1:2] + 0.114*f[:,:,2:3]
117
- f = np.clip(gray + 1.25*(f-gray), 0, 1)
118
- f = np.clip(f * 1.05, 0, 1)
119
- elif style == "fun":
120
- f[:,:,0] = np.clip(f[:,:,0] * 1.06, 0, 1)
121
- f[:,:,1] = np.clip(f[:,:,1] * 1.03, 0, 1)
122
- return np.clip(f*255, 0, 255).astype(np.uint8)
123
-
124
-
125
- def soft_vignette(frame):
126
- """Very subtle vignette β€” only darkens extreme edges."""
127
- h, w = frame.shape[:2]
128
- Y, X = np.ogrid[:h, :w]
129
- dist = np.sqrt(((X-w/2)/(w/2))**2 + ((Y-h/2)/(h/2))**2)
130
- # Only kicks in after 0.85 from center β€” very gentle
131
- mask = np.clip(1.0 - 0.30 * np.maximum(dist - 0.85, 0)**2, 0, 1)
132
- return np.clip(frame.astype(np.float32)*mask[:,:,None], 0, 255).astype(np.uint8)
133
-
134
-
135
- class Bokeh:
136
- def __init__(self, W, H, style):
137
- self.W, self.H = W, H
138
- cols = {"premium":[(255,220,100),(180,160,255)],
139
- "energetic":[(80,180,255),(255,80,80)],
140
- "fun":[(255,150,200),(150,255,180)]}
141
- c = cols.get(style, cols["premium"])
142
- self.p = [{
143
- "x": np.random.uniform(0,W), "y": np.random.uniform(0,H),
144
- "r": np.random.uniform(5,20),
145
- "a": np.random.uniform(0.06, 0.20), # very transparent
146
- "vx": np.random.uniform(-0.2,0.2),
147
- "vy": np.random.uniform(-0.5,-0.05),
148
- "col": c[np.random.randint(len(c))],
149
- "ph": np.random.uniform(0, math.pi*2),
150
- } for _ in range(18)]
151
-
152
- def draw(self, frame, t_sec):
153
- ov = frame.astype(np.float32)
154
- for p in self.p:
155
- px = int(p["x"] + p["vx"]*t_sec*50 + math.sin(t_sec*1.5+p["ph"])*6) % self.W
156
- py = int((p["y"] + p["vy"]*t_sec*50) % self.H)
157
- r = max(3, int(p["r"] * (0.8+0.2*math.sin(t_sec*2+p["ph"]))))
158
- a = p["a"] * (0.7+0.3*math.sin(t_sec*2+p["ph"]))
159
- tmp = np.zeros_like(ov)
160
- cv2.circle(tmp, (px,py), r, p["col"], -1)
161
- # soft glow: blur the circle
162
- tmp_blurred = cv2.GaussianBlur(tmp, (r|1, r|1), r/2)
163
- ov = ov*(1-a) + tmp_blurred.astype(np.float32)*a
164
- return np.clip(ov, 0, 255).astype(np.uint8)
165
-
166
-
167
- def get_font(size):
168
- for p in ["/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
169
- "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
170
- "/usr/share/fonts/truetype/freefont/FreeSansBold.ttf"]:
171
- if os.path.exists(p):
172
- try: return ImageFont.truetype(p, size)
173
- except: pass
174
- return ImageFont.load_default()
175
-
176
-
177
- def draw_caption(frame_np, text, anim_t, W, H, style):
178
- """Animated caption β€” slide up from bottom."""
179
- if not text.strip(): return frame_np
180
- pil = Image.fromarray(frame_np).convert("RGBA")
181
- overlay = Image.new("RGBA", pil.size, (0,0,0,0))
182
- draw = ImageDraw.Draw(overlay)
183
-
184
- font = get_font(max(30, W//20))
185
- # Word wrap
186
- words = text.split(); lines = []; line = ""
187
- for w in words:
188
- test = (line+" "+w).strip()
189
- try: bbox = font.getbbox(test)
190
- except: bbox = (0,0,len(test)*18,30)
191
- if bbox[2] > W*0.82 and line:
192
- lines.append(line); line = w
193
- else: line = test
194
- if line: lines.append(line)
195
-
196
- lh = max(36, W//18)
197
- total_h = len(lines)*lh + 24
198
- base_y = H - total_h - 80
199
-
200
- # Slide up animation
201
- slide = ease_expo(min(anim_t/0.5, 1.0))
202
- offset = int((1-slide)*50)
203
- alpha = int(min(anim_t/0.4, 1.0) * 255)
204
-
205
- txt_colors = {"premium":(255,210,60),"energetic":(60,200,255),"fun":(255,100,180)}
206
- txt_col = txt_colors.get(style, (255,255,255))
207
-
208
- for i, ln in enumerate(lines):
209
- try: bbox = font.getbbox(ln); tw = bbox[2]-bbox[0]
210
- except: tw = len(ln)*18
211
- tx = (W-tw)//2; ty = base_y + i*lh + offset
212
-
213
- # Background pill
214
- pad = 14
215
- draw.rounded_rectangle([tx-pad, ty-6, tx+tw+pad, ty+lh+4],
216
- radius=12, fill=(0,0,0,min(170,alpha)))
217
- # Shadow
218
- draw.text((tx+2, ty+2), ln, font=font, fill=(0,0,0,min(200,alpha)))
219
- # Text
220
- r,g,b = txt_col
221
- draw.text((tx, ty), ln, font=font, fill=(r,g,b,alpha))
222
-
223
- combined = Image.alpha_composite(pil, overlay)
224
- return np.array(combined.convert("RGB"))
225
 
 
 
226
 
 
 
 
 
227
  def make_bgm(duration_sec, out_path, style="premium"):
228
  import wave
229
- sr = 44100; n = int(sr*duration_sec)
230
- t = np.linspace(0, duration_sec, n, endpoint=False)
231
- bpm = {"premium":90, "energetic":128, "fun":105}.get(style, 90)
232
- beat = 60./bpm
233
 
234
- # Kick
235
- kick = np.zeros(n, np.float32)
236
  for i in range(int(duration_sec/beat)+2):
237
- s = int(i*beat*sr)
238
- if s >= n: break
239
- l = min(int(sr*.10), n-s)
240
- env = np.exp(-20*np.arange(l)/sr)
241
- kick[s:s+l] += env * np.sin(2*math.pi*55*np.exp(-30*np.arange(l)/sr)*np.arange(l)/sr) * 0.6
242
-
243
- # Bassline
244
- bass_f = {"premium":55,"energetic":80,"fun":65}.get(style,55)
245
- bass = np.sin(2*math.pi*bass_f*t)*0.12*(0.5+0.5*np.sin(2*math.pi*(bpm/60/4)*t))
246
-
247
- # Melody
248
- mel_freqs = {"premium":[261,329,392],"energetic":[330,415,494],"fun":[392,494,587]}.get(style,[261,329,392])
249
- mel = np.zeros(n, np.float32)
250
- for j,f in enumerate(mel_freqs):
251
- env = np.clip(0.5+0.5*np.sin(2*math.pi*1.5*t - j*2.1), 0, 1)
252
- mel += np.sin(2*math.pi*f*t)*env*0.05
253
-
254
- # Hi-hat
255
- hat = np.zeros(n, np.float32)
256
- hs = beat/2
257
  for i in range(int(duration_sec/hs)+2):
258
- s = int(i*hs*sr)
259
- if s >= n: break
260
- l = min(int(sr*.03), n-s)
261
- hat[s:s+l] += np.random.randn(l)*np.exp(-80*np.arange(l)/sr)*0.07
262
 
263
- mix = np.clip((kick+bass+mel+hat)*0.20, -1, 1)
264
- fade = int(sr*.4)
265
- mix[:fade] *= np.linspace(0,1,fade)
266
- mix[-fade:] *= np.linspace(1,0,fade)
267
 
268
  with wave.open(out_path,"w") as wf:
269
  wf.setnchannels(1); wf.setsampwidth(2); wf.setframerate(sr)
270
  wf.writeframes((mix*32767).astype(np.int16).tobytes())
271
- return True
272
 
273
-
274
- def add_audio_to_video(video_path, style, duration_sec, caption):
275
- bgm = video_path.replace(".mp4","_bgm.wav")
276
- final= video_path.replace(".mp4","_final.mp4")
277
  make_bgm(duration_sec, bgm, style)
278
 
279
- # Try TTS
280
- tts_ok = False
281
- tts = video_path.replace(".mp4","_tts.mp3")
282
  try:
283
  from gtts import gTTS
284
- gTTS(text=caption[:180], lang="en", slow=False).save(tts)
285
- # Mix tts(loud) + bgm(soft)
286
- mixed = video_path.replace(".mp4","_mix.wav")
287
- os.system(f'ffmpeg -y -i "{bgm}" -i "{tts}" '
288
- f'-filter_complex "[0]volume=0.22[a];[1]volume=1.0[b];[a][b]amix=inputs=2:duration=first" '
 
289
  f'-t {duration_sec} "{mixed}" -loglevel error')
290
- if os.path.exists(mixed): bgm = mixed; tts_ok = True
291
- except: pass
292
 
293
- os.system(f'ffmpeg -y -i "{video_path}" -i "{bgm}" '
294
  f'-c:v copy -c:a aac -b:a 128k -shortest "{final}" -loglevel error')
295
  return final if os.path.exists(final) else video_path
296
 
297
 
298
- # ── MAIN RENDER ───────────────────────────────────────────────────
299
- def render(pil, caption="Step into style.", style="premium",
300
- duration_sec=7, fps=30, add_audio=True, add_caption=True, add_bokeh=True):
301
-
302
- TW, TH = 720, 1280
303
- PAD = 160 # extra canvas for zoom
304
- BW, BH = TW+PAD*2, TH+PAD*2
305
-
306
- base = prep_image(pil, BW, BH) # large canvas β€” FULL COLOR image
307
- total = duration_sec * fps
308
-
309
- bokeh = Bokeh(TW, TH, style) if add_bokeh else None
310
-
311
- # Motion: gentle zoom + pan β€” no 3D, no warp
312
- SEGS = [
313
- (0.00, 0.22, 1.30, 1.12, 0, -int(PAD*.07), 0, -int(PAD*.08)),
314
- (0.22, 0.52, 1.12, 1.07, -int(PAD*.04), int(PAD*.06), -int(PAD*.08),-int(PAD*.22)),
315
- (0.52, 0.78, 1.07, 1.03, int(PAD*.06), int(PAD*.13), -int(PAD*.22),-int(PAD*.12)),
316
- (0.78, 1.00, 1.03, 1.00, int(PAD*.13), 0, -int(PAD*.12), 0),
317
- ]
318
-
319
- tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
320
- writer = cv2.VideoWriter(tmp.name, cv2.VideoWriter_fourcc(*"mp4v"), fps, (TW,TH))
321
-
322
- # Caption segments
323
- cap_words = caption.strip().split()
324
- mid = max(1, len(cap_words)//2)
325
- cap_segs = [
326
- (1.0, 3.5, " ".join(cap_words[:mid])),
327
- (3.8, 6.5, " ".join(cap_words[mid:]) or " ".join(cap_words)),
328
- ]
329
-
330
- for i in range(total):
331
- tg = i / max(total-1, 1)
332
-
333
- # Get motion params
334
- zoom = pan_x = pan_y = None
335
- for (t0,t1,z0,z1,px0,px1,py0,py1) in SEGS:
336
- if t0 <= tg <= t1:
337
- te = ease_cubic((tg-t0)/(t1-t0))
338
- zoom = z0+(z1-z0)*te
339
- pan_x = int(px0+(px1-px0)*te)
340
- pan_y = int(py0+(py1-py0)*te)
341
- break
342
- if zoom is None: zoom,pan_x,pan_y = 1.0,0,0
343
-
344
- # Micro camera shake (first 15%)
345
- if tg < 0.15:
346
- s = (0.15-tg)/0.15 * 1.8
347
- pan_x += int(s*math.sin(i*1.5))
348
- pan_y += int(s*math.cos(i*1.1))
349
-
350
- # Crop from big canvas
351
- cw, ch = int(TW/zoom), int(TH/zoom)
352
- cx, cy = BW//2+pan_x, BH//2+pan_y
353
- x1 = max(0, cx-cw//2); y1 = max(0, cy-ch//2)
354
- x2 = min(BW, x1+cw); y2 = min(BH, y1+ch)
355
- if (x2-x1)<20 or (y2-y1)<20: x1,y1,x2,y2=0,0,TW,TH
356
-
357
- frame = cv2.resize(base[y1:y2,x1:x2], (TW,TH), interpolation=cv2.INTER_LINEAR)
358
-
359
- # Subtle color grade (won't darken)
360
- frame = grade(frame, style)
361
-
362
- # Soft vignette (barely noticeable)
363
- frame = soft_vignette(frame)
364
-
365
- # Film grain β€” very light
366
- frame = np.clip(frame.astype(np.float32) +
367
- np.random.normal(0, 3.0, frame.shape), 0, 255).astype(np.uint8)
368
-
369
- # Bokeh on top
370
- if bokeh: frame = bokeh.draw(frame, tg*duration_sec)
371
-
372
- # Cinematic bars β€” thin
373
- frame[:36, :] = 0; frame[-36:, :] = 0
374
-
375
- # Fade in (first 2%) / out (last 5%)
376
- if tg < 0.02: alpha = ease_expo(tg/0.02)
377
- elif tg > 0.95: alpha = ease(1-(tg-0.95)/0.05)
378
- else: alpha = 1.0
379
- if alpha < 1.0:
380
- frame = np.clip(frame.astype(np.float32)*alpha, 0, 255).astype(np.uint8)
381
-
382
- # Captions
383
- if add_caption:
384
- t_sec = tg*duration_sec
385
- for (cs,ce,ct) in cap_segs:
386
- if cs <= t_sec <= ce:
387
- frame = draw_caption(frame, ct, t_sec-cs, TW, TH, style)
388
-
389
- writer.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
390
-
391
- writer.release()
392
-
393
- if add_audio:
394
- return add_audio_to_video(tmp.name, style, duration_sec, caption)
395
- return tmp.name
396
-
397
-
398
- # ── PIPELINE ──────────────────────────────────────────────────────
399
- def generate(image, caption, style, add_audio, add_caption, add_bokeh, progress=gr.Progress()):
400
- if image is None: return None, "⚠️ Upload an image first!"
401
- pil = image if isinstance(image,Image.Image) else Image.fromarray(image)
402
- cap = caption.strip() or "Premium Quality. Shop Now."
403
- prompt = f"cinematic product ad, {cap}, smooth motion, dramatic lighting"
404
- lines = []
405
 
406
- def log(msg): lines.append(msg); progress(min(.1+len(lines)*.12,.80),desc=msg)
 
 
407
 
408
- progress(.05, desc="πŸš€ Starting...")
409
- ai_path, model = get_ai_video(pil, prompt, cb=log)
 
 
410
 
411
- if ai_path:
412
- log(f"βœ… AI video: {model}")
413
- if add_audio:
414
- progress(.85, desc="🎡 Adding music...")
415
- ai_path = add_audio_to_video(ai_path, style.lower(), 6, cap)
416
- progress(1.0, desc="βœ… Done!")
417
- return ai_path, "\n".join(lines)+f"\n\nβœ… {model}"
418
 
419
- log("🎬 Cinematic Engine...")
420
- progress(.60, desc="🎬 Rendering...")
421
- out = render(pil, caption=cap, style=style.lower(),
422
- add_audio=add_audio, add_caption=add_caption, add_bokeh=add_bokeh)
423
- progress(1.0, desc="βœ… Done!")
424
- return out, "\n".join(lines)+"\n\nβœ… 🎬 Cinematic Engine"
425
 
426
 
427
  # ── UI ────────────────────────────────────────────────────────────
428
  css="#title{text-align:center;font-size:2.3rem;font-weight:900}#sub{text-align:center;color:#888;margin-bottom:1.5rem}"
429
- with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue="violet")) as demo:
430
- gr.Markdown("# 🎬 AI Reel Generator", elem_id="title")
431
- gr.Markdown("Image + caption β†’ cinematic reel with music & captions", elem_id="sub")
432
-
433
  with gr.Row():
434
  with gr.Column(scale=1):
435
- img_in = gr.Image(label="πŸ“Έ Upload Image", type="pil", height=280)
436
- cap_in = gr.Textbox(label="✏️ Caption", value="Step into style. Own the moment.", lines=2)
437
- sty_dd = gr.Dropdown(["Premium","Energetic","Fun"], value="Premium", label="🎨 Style")
438
  with gr.Row():
439
- audio_cb = gr.Checkbox(label="🎡 Music + Voice", value=True)
440
- caption_cb= gr.Checkbox(label="πŸ’¬ Captions", value=True)
441
- bokeh_cb = gr.Checkbox(label="✨ Bokeh", value=True)
442
- gen_btn = gr.Button("πŸš€ Generate Reel", variant="primary", size="lg")
443
- gr.Markdown("**Chain:** fal.ai LTX β†’ HF LTX-2 β†’ 🎬 Cinematic Engine")
444
-
445
  with gr.Column(scale=1):
446
- vid_out = gr.Video(label="πŸŽ₯ Cinematic Reel", height=500)
447
- log_out = gr.Textbox(label="πŸ“Š Log", lines=5, interactive=False)
448
-
449
- gen_btn.click(fn=generate,
450
- inputs=[img_in,cap_in,sty_dd,audio_cb,caption_cb,bokeh_cb],
451
- outputs=[vid_out,log_out])
452
 
453
- if __name__ == "__main__":
454
  demo.launch()
 
1
+ import os, tempfile, io, math, time, threading
2
  import numpy as np
3
  import cv2
4
  import gradio as gr
5
+ from PIL import Image, ImageDraw, ImageFont, ImageFilter, ImageEnhance
6
 
7
  # ── TOKENS ────────────────────────────────────────────────────────
8
+ hf_token = (os.environ.get("HF_TOKEN","") or os.environ.get("HF_KEY","")).strip()
 
 
9
  hf_client = None
10
+ if hf_token:
11
  try:
12
  from huggingface_hub import login, InferenceClient
13
+ login(token=hf_token); hf_client = InferenceClient(token=hf_token)
14
  print("βœ… HF ready")
15
  except Exception as e: print(f"⚠️ HF: {e}")
 
16
 
17
+ # ── HF MODELS ─────────────────────────────────────────────────────
18
+ HF_MODELS = [
19
+ {"id": "Lightricks/LTX-2", "name": "LTX-2 ⚑"},
20
+ {"id": "Wan-AI/Wan2.2-I2V-A14B", "name": "Wan 2.2"},
21
+ {"id": "stabilityai/stable-video-diffusion-img2vid-xt", "name": "SVD-XT"},
22
+ {"id": "KlingTeam/LivePortrait", "name": "Kling LivePortrait"},
23
+ {"id": "Lightricks/LTX-Video", "name": "LTX-Video"},
24
+ {"id": "__local__", "name": "Ken Burns βœ…"},
25
+ ]
26
 
27
+ def pil_to_bytes(img):
28
+ b=io.BytesIO(); img.save(b,format="JPEG",quality=92); return b.getvalue()
 
29
 
30
  def run_timeout(fn, sec, *a, **kw):
31
  box=[None]; err=[None]
 
33
  try: box[0]=fn(*a,**kw)
34
  except Exception as e: err[0]=str(e)
35
  t=threading.Thread(target=r,daemon=True); t.start(); t.join(timeout=sec)
36
+ if t.is_alive(): print(f" ⏱ timeout"); return None
37
  if err[0]: print(f" ❌ {err[0][:80]}")
38
  return box[0]
39
 
40
+ def try_hf(model_id, pil, prompt):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  if not hf_client: return None
42
  try:
43
+ r=hf_client.image_to_video(image=pil_to_bytes(pil),model=model_id,prompt=prompt)
 
44
  return r.read() if hasattr(r,"read") else r
45
+ except Exception as e: print(f" ❌ {model_id}: {e}"); return None
46
+
47
+ def get_video(pil, prompt, cb=None):
48
+ for m in HF_MODELS:
49
+ mid,mname=m["id"],m["name"]
50
+ if cb: cb(f"⏳ Trying: {mname}")
51
+ if mid=="__local__":
52
+ return ken_burns(pil), mname
53
+ data=run_timeout(try_hf,50,mid,pil,prompt)
54
+ if data:
55
+ t=tempfile.NamedTemporaryFile(suffix=".mp4",delete=False)
56
+ t.write(data); t.flush()
57
+ return t.name, mname
58
+ time.sleep(1)
59
+ return ken_burns(pil), "Ken Burns"
60
 
61
+
62
+ # ══════════════════════════════════════════════════════════════════
63
+ # KEN BURNS (working, image always shows)
64
+ # ══════════════════════════════════════════════════════════════════
65
+ def ease(t): t=max(0.,min(1.,t)); return t*t*(3-2*t)
66
+ def ease_cubic(t): t=max(0.,min(1.,t)); return 4*t*t*t if t<.5 else 1-math.pow(-2*t+2,3)/2
67
+ def ease_expo(t): return 1-math.pow(2,-10*t) if t<1 else 1.
68
+ def ease_bounce(t):
69
+ if t<1/2.75: return 7.5625*t*t
70
+ elif t<2/2.75: t-=1.5/2.75; return 7.5625*t*t+.75
71
+ elif t<2.5/2.75: t-=2.25/2.75; return 7.5625*t*t+.9375
72
+ else: t-=2.625/2.75; return 7.5625*t*t+.984375
73
+
74
+ def ken_burns(pil, duration_sec=6, fps=30, style="premium"):
75
+ TW,TH=720,1280; pad=160; BW,BH=TW+pad*2,TH+pad*2
76
+ total=duration_sec*fps
77
+
78
+ # Prepare image
79
+ img=pil.convert("RGB"); sw,sh=img.size
80
+ if sw/sh>TW/TH: nw=int(sh*TW/TH); img=img.crop(((sw-nw)//2,0,(sw-nw)//2+nw,sh))
81
+ else: nh=int(sw*TH/TW); img=img.crop((0,(sh-nh)//2,sw,(sh-nh)//2+nh))
82
+ img=img.filter(ImageFilter.UnsharpMask(radius=1.0,percent=120,threshold=2))
83
+ img=ImageEnhance.Contrast(img).enhance(1.06)
84
+ img=ImageEnhance.Color(img).enhance(1.10)
85
+ base=np.array(img.resize((BW,BH),Image.LANCZOS))
86
+
87
+ # Pre-baked vignette mask (subtle)
88
+ Y,X=np.ogrid[:TH,:TW]
89
+ dist=np.sqrt(((X-TW/2)/(TW/2))**2+((Y-TH/2)/(TH/2))**2)
90
+ vmask=np.clip(1.-0.28*np.maximum(dist-0.80,0)**2,0,1).astype(np.float32)
91
+
92
+ SEG=[
93
+ (0.00,0.25, 1.35,1.12, 0, -int(pad*.10), 0, -int(pad*.12)),
94
+ (0.25,0.55, 1.12,1.07, -int(pad*.05),int(pad*.07),-int(pad*.12),-int(pad*.28)),
95
+ (0.55,0.78, 1.07,1.04, int(pad*.07),int(pad*.16), -int(pad*.28),-int(pad*.16)),
96
+ (0.78,1.00, 1.04,1.00, int(pad*.16),0, -int(pad*.16),0),
97
+ ]
98
+
99
+ tmp=tempfile.NamedTemporaryFile(suffix=".mp4",delete=False)
100
+ writer=cv2.VideoWriter(tmp.name,cv2.VideoWriter_fourcc(*"mp4v"),fps,(TW,TH))
101
+
102
+ for i in range(total):
103
+ tg=i/max(total-1,1)
104
+ zoom=pan_x=pan_y=None
105
+ for t0,t1,z0,z1,px0,px1,py0,py1 in SEG:
106
+ if t0<=tg<=t1:
107
+ te=ease_cubic((tg-t0)/(t1-t0))
108
+ zoom=z0+(z1-z0)*te; pan_x=int(px0+(px1-px0)*te); pan_y=int(py0+(py1-py0)*te); break
109
+ if zoom is None: zoom,pan_x,pan_y=1.,0,0
110
+ if tg<0.20:
111
+ s=(0.20-tg)/0.20*1.8
112
+ pan_x+=int(s*math.sin(i*1.4)); pan_y+=int(s*math.cos(i*1.0))
113
+
114
+ cw,ch=int(TW/zoom),int(TH/zoom)
115
+ ox,oy=BW//2+pan_x,BH//2+pan_y
116
+ x1,y1=max(0,ox-cw//2),max(0,oy-ch//2)
117
+ x2,y2=min(BW,x1+cw),min(BH,y1+ch)
118
+ if (x2-x1)<10 or (y2-y1)<10: x1,y1,x2,y2=0,0,TW,TH
119
+
120
+ frame=cv2.resize(base[y1:y2,x1:x2],(TW,TH),interpolation=cv2.INTER_LINEAR)
121
+
122
+ # Very subtle color grade
123
+ f=frame.astype(np.float32)/255.
124
+ if style=="premium":
125
+ f[:,:,0]=np.clip(f[:,:,0]*1.03+.01,0,1)
126
+ f[:,:,2]=np.clip(f[:,:,2]*1.02,0,1)
127
+ elif style=="energetic":
128
+ gray=0.299*f[:,:,0:1]+0.587*f[:,:,1:2]+0.114*f[:,:,2:3]
129
+ f=np.clip(gray+1.2*(f-gray),0,1); f=np.clip(f*1.04,0,1)
130
+ elif style=="fun":
131
+ f[:,:,0]=np.clip(f[:,:,0]*1.05,0,1)
132
+ f[:,:,1]=np.clip(f[:,:,1]*1.03,0,1)
133
+ frame=np.clip(f*255,0,255).astype(np.uint8)
134
+
135
+ # Vignette
136
+ frame=np.clip(frame.astype(np.float32)*vmask[:,:,None],0,255).astype(np.uint8)
137
+
138
+ # Grain
139
+ frame=np.clip(frame.astype(np.float32)+np.random.normal(0,3,frame.shape),0,255).astype(np.uint8)
140
+
141
+ # Bars
142
+ frame[:36,:]=0; frame[-36:,:]=0
143
+
144
+ # Fade in (2%) / out (5%)
145
+ if tg<0.02: alpha=ease_expo(tg/0.02)
146
+ elif tg>0.95: alpha=ease(1-(tg-0.95)/0.05)
147
+ else: alpha=1.
148
+ if alpha<1.: frame=np.clip(frame.astype(np.float32)*alpha,0,255).astype(np.uint8)
149
+
150
+ writer.write(cv2.cvtColor(frame,cv2.COLOR_RGB2BGR))
151
+ writer.release()
152
+ return tmp.name
153
 
154
 
155
  # ══════════════════════════════════════════════════════════════════
156
+ # CAPTIONS β€” burn into existing video via ffmpeg
157
  # ══════════════════════════════════════════════════════════════════
158
+ def add_captions_ffmpeg(video_path, caption, duration_sec, style):
159
+ """Burn animated captions using ffmpeg drawtext."""
160
+ words=caption.strip().split()
161
+ mid=max(1,len(words)//2)
162
+ line1=" ".join(words[:mid])
163
+ line2=" ".join(words[mid:]) if len(words)>1 else line1
164
+
165
+ colors={"premium":"FFD232","energetic":"3CC8FF","fun":"FF78C8"}
166
+ col=colors.get(style,"FFFFFF")
167
+
168
+ # ffmpeg drawtext with fade-in animation
169
+ # line1: shows 1.0s β†’ 3.5s, line2: 3.8s β†’ 6.5s
170
+ out=video_path.replace(".mp4","_cap.mp4")
171
+ font_paths=[
172
+ "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
173
+ "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
174
+ "/usr/share/fonts/truetype/freefont/FreeSansBold.ttf",
175
+ ]
176
+ font=""
177
+ for p in font_paths:
178
+ if os.path.exists(p): font=f":fontfile='{p}'"; break
179
+
180
+ def drawtext(text, start, end):
181
+ fade_dur=0.4
182
+ return (
183
+ f"drawtext=text='{text}'{font}"
184
+ f":fontsize=44:fontcolor=#{col}"
185
+ f":x=(w-text_w)/2:y=h-130"
186
+ f":box=1:boxcolor=black@0.55:boxborderw=14"
187
+ f":enable='between(t,{start},{end})'"
188
+ f":alpha='if(lt(t,{start+fade_dur}),(t-{start})/{fade_dur},"
189
+ f"if(gt(t,{end-fade_dur}),({end}-t)/{fade_dur},1))'"
190
+ )
191
 
192
+ vf=f"{drawtext(line1,1.0,3.5)},{drawtext(line2,3.8,min(6.5,duration_sec-0.3))}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
+ ret=os.system(f'ffmpeg -y -i "{video_path}" -vf "{vf}" -c:a copy "{out}" -loglevel error')
195
+ return out if (ret==0 and os.path.exists(out)) else video_path
196
 
197
+
198
+ # ══════════════════════════════════════════════════════════════════
199
+ # AUDIO β€” BGM + optional TTS
200
+ # ══════════════════════════════════════════════════════════════════
201
  def make_bgm(duration_sec, out_path, style="premium"):
202
  import wave
203
+ sr=44100; n=int(sr*duration_sec)
204
+ t=np.linspace(0,duration_sec,n,endpoint=False)
205
+ bpm={"premium":88,"energetic":126,"fun":104}.get(style,88)
206
+ beat=60./bpm
207
 
208
+ kick=np.zeros(n,np.float32)
 
209
  for i in range(int(duration_sec/beat)+2):
210
+ s=int(i*beat*sr)
211
+ if s>=n: break
212
+ l=min(int(sr*.10),n-s)
213
+ env=np.exp(-20*np.arange(l)/sr)
214
+ kick[s:s+l]+=env*np.sin(2*math.pi*55*np.exp(-25*np.arange(l)/sr)*np.arange(l)/sr)*0.55
215
+
216
+ bass_f={"premium":55,"energetic":80,"fun":65}.get(style,55)
217
+ bass=np.sin(2*math.pi*bass_f*t)*0.10*(0.5+0.5*np.sin(2*math.pi*(bpm/60/4)*t))
218
+
219
+ mf={"premium":[261,329,392],"energetic":[330,415,494],"fun":[392,494,587]}.get(style,[261,329,392])
220
+ mel=np.zeros(n,np.float32)
221
+ for j,f in enumerate(mf):
222
+ env=np.clip(0.5+0.5*np.sin(2*math.pi*1.5*t-j*2.1),0,1)
223
+ mel+=np.sin(2*math.pi*f*t)*env*0.045
224
+
225
+ hat=np.zeros(n,np.float32)
226
+ hs=beat/2
 
 
 
227
  for i in range(int(duration_sec/hs)+2):
228
+ s=int(i*hs*sr)
229
+ if s>=n: break
230
+ l=min(int(sr*.03),n-s)
231
+ hat[s:s+l]+=np.random.randn(l)*np.exp(-80*np.arange(l)/sr)*0.06
232
 
233
+ mix=np.clip((kick+bass+mel+hat)*0.18,-1,1)
234
+ fade=int(sr*.5); mix[:fade]*=np.linspace(0,1,fade); mix[-fade:]*=np.linspace(1,0,fade)
 
 
235
 
236
  with wave.open(out_path,"w") as wf:
237
  wf.setnchannels(1); wf.setsampwidth(2); wf.setframerate(sr)
238
  wf.writeframes((mix*32767).astype(np.int16).tobytes())
 
239
 
240
+ def add_audio(video_path, caption, duration_sec, style):
241
+ bgm=video_path.replace(".mp4","_bgm.wav")
242
+ final=video_path.replace(".mp4","_final.mp4")
 
243
  make_bgm(duration_sec, bgm, style)
244
 
245
+ # Try TTS voiceover
246
+ audio=bgm
 
247
  try:
248
  from gtts import gTTS
249
+ tts_mp3=video_path.replace(".mp4","_tts.mp3")
250
+ tts_wav=video_path.replace(".mp4","_tts.wav")
251
+ gTTS(text=caption[:200],lang="en",slow=False).save(tts_mp3)
252
+ mixed=video_path.replace(".mp4","_mix.wav")
253
+ os.system(f'ffmpeg -y -i "{bgm}" -i "{tts_mp3}" '
254
+ f'-filter_complex "[0]volume=0.20[a];[1]volume=0.95[b];[a][b]amix=inputs=2:duration=first" '
255
  f'-t {duration_sec} "{mixed}" -loglevel error')
256
+ if os.path.exists(mixed): audio=mixed
257
+ except Exception as e: print(f" TTS skip: {e}")
258
 
259
+ os.system(f'ffmpeg -y -i "{video_path}" -i "{audio}" '
260
  f'-c:v copy -c:a aac -b:a 128k -shortest "{final}" -loglevel error')
261
  return final if os.path.exists(final) else video_path
262
 
263
 
264
+ # ══════════════════════════════════════════════════════════════════
265
+ # MAIN
266
+ # ══════════════════════════════════════════════════════════════════
267
+ def generate(image, caption, style, add_aud, add_cap, progress=gr.Progress()):
268
+ if image is None: return None,"⚠️ Upload an image!"
269
+ pil=image if isinstance(image,Image.Image) else Image.fromarray(image)
270
+ cap=caption.strip() or "Premium Quality. Shop Now."
271
+ prompt=f"cinematic product ad, {cap}, smooth motion, dramatic lighting"
272
+ lines=[]
273
+ def log(msg): lines.append(msg); progress(min(.1+len(lines)*.10,.80),desc=msg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
 
275
+ progress(.05,desc="πŸš€ Starting...")
276
+ video_path, model_used = get_video(pil, prompt, cb=log)
277
+ dur=6
278
 
279
+ # Add captions
280
+ if add_cap:
281
+ log("πŸ’¬ Adding captions...")
282
+ video_path=add_captions_ffmpeg(video_path, cap, dur, style.lower())
283
 
284
+ # Add audio
285
+ if add_aud:
286
+ log("🎡 Adding music + voice...")
287
+ video_path=add_audio(video_path, cap, dur, style.lower())
 
 
 
288
 
289
+ progress(1.0,desc="βœ… Done!")
290
+ return video_path, "\n".join(lines)+f"\n\nβœ… Used: {model_used}"
 
 
 
 
291
 
292
 
293
  # ── UI ────────────────────────────────────────────────────────────
294
  css="#title{text-align:center;font-size:2.3rem;font-weight:900}#sub{text-align:center;color:#888;margin-bottom:1.5rem}"
295
+ with gr.Blocks(css=css,theme=gr.themes.Soft(primary_hue="violet")) as demo:
296
+ gr.Markdown("# 🎬 AI Reel Generator",elem_id="title")
297
+ gr.Markdown("Image β†’ AI video + captions + music",elem_id="sub")
 
298
  with gr.Row():
299
  with gr.Column(scale=1):
300
+ img_in =gr.Image(label="πŸ“Έ Upload Image",type="pil",height=300)
301
+ cap_in =gr.Textbox(label="✏️ Caption",value="Step into style. Own the moment.",lines=2)
302
+ sty_dd =gr.Dropdown(["Premium","Energetic","Fun"],value="Premium",label="🎨 Style")
303
  with gr.Row():
304
+ aud_cb=gr.Checkbox(label="🎡 Music + Voice",value=True)
305
+ cap_cb=gr.Checkbox(label="πŸ’¬ Captions", value=True)
306
+ gen_btn=gr.Button("πŸš€ Generate Reel",variant="primary",size="lg")
307
+ gr.Markdown("**πŸ”— Chain:** LTX-2 ⚑ β†’ Wan 2.2 β†’ SVD-XT β†’ Kling β†’ LTX-Video β†’ Ken Burns βœ…")
 
 
308
  with gr.Column(scale=1):
309
+ vid_out=gr.Video(label="πŸŽ₯ Reel",height=500)
310
+ log_out=gr.Textbox(label="πŸ“Š Log",lines=6,interactive=False)
311
+ gen_btn.click(fn=generate,inputs=[img_in,cap_in,sty_dd,aud_cb,cap_cb],outputs=[vid_out,log_out])
 
 
 
312
 
313
+ if __name__=="__main__":
314
  demo.launch()