Update app.py
Browse files
app.py
CHANGED
|
@@ -1,12 +1,10 @@
|
|
| 1 |
-
import os, tempfile, io, math, time, threading, base64, requests
|
| 2 |
import numpy as np
|
| 3 |
import cv2
|
| 4 |
import gradio as gr
|
| 5 |
from PIL import Image, ImageFilter, ImageEnhance, ImageDraw, ImageFont
|
| 6 |
|
| 7 |
-
#
|
| 8 |
-
# TOKENS
|
| 9 |
-
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 10 |
FAL_KEY = (os.environ.get("FAL_KEY","") or os.environ.get("FAL_API_KEY","")).strip()
|
| 11 |
HF_TOKEN = (os.environ.get("HF_TOKEN","") or os.environ.get("HF_KEY","")).strip()
|
| 12 |
|
|
@@ -17,49 +15,37 @@ if HF_TOKEN:
|
|
| 17 |
login(token=HF_TOKEN); hf_client = InferenceClient(token=HF_TOKEN)
|
| 18 |
print("β
HF ready")
|
| 19 |
except Exception as e: print(f"β οΈ HF: {e}")
|
| 20 |
-
|
| 21 |
if FAL_KEY: os.environ["FAL_KEY"] = FAL_KEY; print("β
fal.ai ready")
|
| 22 |
-
print("β
App ready!")
|
| 23 |
|
| 24 |
-
#
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
def pil_to_bytes(img, quality=92):
|
| 28 |
-
buf = io.BytesIO(); img.save(buf, format="JPEG", quality=quality); return buf.getvalue()
|
| 29 |
|
| 30 |
-
def
|
| 31 |
-
|
| 32 |
-
|
| 33 |
|
| 34 |
-
def
|
| 35 |
box=[None]; err=[None]
|
| 36 |
-
def
|
| 37 |
-
try: box[0]=fn(*
|
| 38 |
except Exception as e: err[0]=str(e)
|
| 39 |
-
t=threading.Thread(target=
|
| 40 |
-
if t.is_alive(): print(f" β± timeout {
|
| 41 |
-
if err[0]: print(f" β {err[0][:
|
| 42 |
return box[0]
|
| 43 |
|
| 44 |
-
def
|
| 45 |
t=max(0.,min(1.,t)); return t*t*(3-2*t)
|
| 46 |
|
| 47 |
-
def
|
| 48 |
t=max(0.,min(1.,t)); return 4*t*t*t if t<.5 else 1-math.pow(-2*t+2,3)/2
|
| 49 |
|
| 50 |
-
def
|
| 51 |
return 1-math.pow(2,-10*t) if t<1 else 1.
|
| 52 |
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
elif t<2/2.75: t-=1.5/2.75; return 7.5625*t*t+0.75
|
| 56 |
-
elif t<2.5/2.75: t-=2.25/2.75; return 7.5625*t*t+0.9375
|
| 57 |
-
else: t-=2.625/2.75; return 7.5625*t*t+0.984375
|
| 58 |
-
|
| 59 |
-
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 60 |
-
# FAL.AI + HF CHAIN
|
| 61 |
-
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 62 |
-
def try_fal_ltx(pil_image, prompt):
|
| 63 |
if not FAL_KEY: return None
|
| 64 |
try:
|
| 65 |
import fal_client
|
|
@@ -73,566 +59,396 @@ def try_fal_ltx(pil_image, prompt):
|
|
| 73 |
if vurl:
|
| 74 |
resp = requests.get(vurl, timeout=60)
|
| 75 |
if resp.status_code==200: return resp.content
|
| 76 |
-
except Exception as e: print(f" β fal
|
| 77 |
return None
|
| 78 |
|
| 79 |
-
def
|
| 80 |
if not hf_client: return None
|
| 81 |
try:
|
| 82 |
-
r = hf_client.image_to_video(image=pil_to_bytes(pil_image),
|
|
|
|
| 83 |
return r.read() if hasattr(r,"read") else r
|
| 84 |
-
except Exception as e: print(f" β HF
|
| 85 |
return None
|
| 86 |
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
]
|
| 92 |
-
|
| 93 |
-
def get_ai_video(pil_image, prompt, style, cb=None):
|
| 94 |
-
for name, fn, timeout in CHAIN:
|
| 95 |
if cb: cb(f"β³ {name}...")
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
return None, "π¬ Cinematic Engine"
|
| 100 |
|
| 101 |
|
| 102 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 103 |
-
#
|
| 104 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 105 |
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
sh=
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
"vx": np.random.uniform(-0.3,0.3),
|
| 185 |
-
"vy": np.random.uniform(-0.6,-0.1),
|
| 186 |
-
"color": c[np.random.randint(len(c))],
|
| 187 |
-
"phase": np.random.uniform(0,math.pi*2),
|
| 188 |
-
} for _ in range(n)]
|
| 189 |
-
|
| 190 |
-
def draw(self, frame, t):
|
| 191 |
-
overlay=frame.astype(np.float32)
|
| 192 |
-
for p in self.particles:
|
| 193 |
-
px=int(p["x"]+p["vx"]*t*60+math.sin(t*2+p["phase"])*8)%self.W
|
| 194 |
-
py=int((p["y"]+p["vy"]*t*60)%self.H)
|
| 195 |
-
pulse=0.7+0.3*math.sin(t*3+p["phase"])
|
| 196 |
-
r=max(2,int(p["r"]*pulse)); a=p["alpha"]*pulse
|
| 197 |
-
# Draw soft glowing circle
|
| 198 |
-
tmp=np.zeros_like(overlay)
|
| 199 |
-
cv2.circle(tmp,(px,py),r,p["color"],-1)
|
| 200 |
-
cv2.circle(tmp,(px,py),r//2,
|
| 201 |
-
tuple(min(255,int(c*1.3)) for c in p["color"]),-1)
|
| 202 |
-
overlay=overlay*(1-a)+tmp.astype(np.float32)*a
|
| 203 |
-
return np.clip(overlay,0,255).astype(np.uint8)
|
| 204 |
-
|
| 205 |
-
# ββ Animated Text Captions βββββββββββββββββββββββββββββββββββββββββ
|
| 206 |
def get_font(size):
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
"/usr/share/fonts/TTF/DejaVuSans-Bold.ttf"]
|
| 211 |
-
for p in paths:
|
| 212 |
if os.path.exists(p):
|
| 213 |
-
try: return ImageFont.truetype(p,size)
|
| 214 |
except: pass
|
| 215 |
return ImageFont.load_default()
|
| 216 |
|
| 217 |
-
def draw_caption(frame_pil, text, t, W, H, style, phase="in"):
|
| 218 |
-
"""Slide-in animated caption with gradient background."""
|
| 219 |
-
if not text.strip(): return frame_pil
|
| 220 |
-
draw=ImageDraw.Draw(frame_pil)
|
| 221 |
|
| 222 |
-
|
| 223 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
|
|
|
|
| 225 |
# Word wrap
|
| 226 |
-
words=text.split(); lines=[]; line=""
|
| 227 |
for w in words:
|
| 228 |
-
test=line+" "+w
|
| 229 |
-
bbox=
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
|
|
|
| 233 |
if line: lines.append(line)
|
| 234 |
|
| 235 |
-
|
| 236 |
-
total_h=len(lines)*
|
| 237 |
-
base_y=H-total_h-
|
| 238 |
-
|
| 239 |
-
# Eased slide-in
|
| 240 |
-
if phase=="in":
|
| 241 |
-
slide=ease_out_bounce(min(t/0.5,1.0))
|
| 242 |
-
alpha_t=min(t/0.3,1.0)
|
| 243 |
-
elif phase=="out":
|
| 244 |
-
slide=ease_in_out(1.-min(t/0.4,1.))
|
| 245 |
-
alpha_t=1.-min(t/0.4,1.)
|
| 246 |
-
else:
|
| 247 |
-
slide=1.; alpha_t=1.
|
| 248 |
-
|
| 249 |
-
offset_y=int((1.-slide)*60)
|
| 250 |
|
| 251 |
-
|
| 252 |
-
|
|
|
|
|
|
|
| 253 |
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
tx=(W-tw)//2; ty=base_y+li*line_h+offset_y
|
| 257 |
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
overlay=Image.new("RGBA",frame_pil.size,(0,0,0,0))
|
| 263 |
-
od=ImageDraw.Draw(overlay)
|
| 264 |
-
od.rounded_rectangle([rx1,ry1,rx2,ry2],radius=10,
|
| 265 |
-
fill=(0,0,0,int(160*alpha_t)))
|
| 266 |
-
frame_pil=Image.alpha_composite(frame_pil.convert("RGBA"),overlay).convert("RGB")
|
| 267 |
-
draw=ImageDraw.Draw(frame_pil)
|
| 268 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
# Shadow
|
| 270 |
-
draw.text((tx+2,ty+2),ln,font=
|
| 271 |
-
#
|
| 272 |
-
r,g,b=
|
| 273 |
-
draw.text((tx,ty),ln,font=
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
return
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
def
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
p=params.get(style,params["premium"])
|
| 321 |
-
|
| 322 |
-
# Kick drum (thump)
|
| 323 |
-
beat_sec=60./p["kick_bpm"]
|
| 324 |
-
kick=np.zeros(n,dtype=np.float32)
|
| 325 |
-
for i in range(int(duration_sec/beat_sec)+1):
|
| 326 |
-
s=int(i*beat_sec*sr)
|
| 327 |
-
if s>=n: break
|
| 328 |
-
env=np.exp(-15*np.arange(min(int(sr*.12),n-s))/sr)
|
| 329 |
-
freq_sweep=p["bass_freq"]*np.exp(-30*np.arange(min(int(sr*.12),n-s))/sr)
|
| 330 |
-
kick[s:s+len(env)]+=env*np.sin(2*math.pi*freq_sweep*np.arange(len(env))/sr)*0.7
|
| 331 |
-
|
| 332 |
-
# Bass line
|
| 333 |
-
bass=np.sin(2*math.pi*p["bass_freq"]*t_arr)*0.15
|
| 334 |
-
bass*=0.5+0.5*np.sin(2*math.pi*(p["kick_bpm"]/60/2)*t_arr)
|
| 335 |
-
|
| 336 |
-
# Melody arp
|
| 337 |
-
melody=np.zeros(n,dtype=np.float32)
|
| 338 |
-
arp_speed=2.0
|
| 339 |
-
for j,freq in enumerate(p["melody_freqs"]):
|
| 340 |
-
phase_off=j*0.33
|
| 341 |
-
env=np.clip(0.5+0.5*np.sin(2*math.pi*arp_speed*t_arr-phase_off*math.pi*2),0,1)
|
| 342 |
-
melody+=np.sin(2*math.pi*freq*t_arr)*env*0.06
|
| 343 |
-
|
| 344 |
-
# Hi-hat (noise burst on 8ths)
|
| 345 |
-
hat_sec=beat_sec/2
|
| 346 |
-
hat=np.zeros(n,dtype=np.float32)
|
| 347 |
-
for i in range(int(duration_sec/hat_sec)+1):
|
| 348 |
-
s=int(i*hat_sec*sr)
|
| 349 |
-
if s>=n: break
|
| 350 |
-
l=min(int(sr*.04),n-s)
|
| 351 |
-
env=np.exp(-60*np.arange(l)/sr)
|
| 352 |
-
hat[s:s+l]+=np.random.randn(l)*env*0.08
|
| 353 |
-
|
| 354 |
-
# Mix
|
| 355 |
-
mix=(kick+bass+melody+hat)*p["gain"]
|
| 356 |
-
# Fade in/out
|
| 357 |
-
fade=int(sr*.5)
|
| 358 |
-
mix[:fade]*=np.linspace(0,1,fade)
|
| 359 |
-
mix[-fade:]*=np.linspace(1,0,fade)
|
| 360 |
-
mix=np.clip(mix,-1,1)
|
| 361 |
-
|
| 362 |
-
# Save as WAV
|
| 363 |
-
import wave, struct
|
| 364 |
-
samples=(mix*32767).astype(np.int16)
|
| 365 |
with wave.open(out_path,"w") as wf:
|
| 366 |
wf.setnchannels(1); wf.setsampwidth(2); wf.setframerate(sr)
|
| 367 |
-
wf.writeframes(
|
| 368 |
return True
|
| 369 |
|
| 370 |
-
def merge_audio_video(video_path, audio_path, out_path):
|
| 371 |
-
"""Use ffmpeg to mux audio + video."""
|
| 372 |
-
ret=os.system(
|
| 373 |
-
f'ffmpeg -y -i "{video_path}" -i "{audio_path}" '
|
| 374 |
-
f'-c:v copy -c:a aac -b:a 128k '
|
| 375 |
-
f'-shortest "{out_path}" -loglevel error'
|
| 376 |
-
)
|
| 377 |
-
return os.path.exists(out_path)
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
# ββ MAIN CINEMATIC RENDERER βββββββββββββββββββββββββββββββββββββββ
|
| 381 |
-
def render_cinematic(
|
| 382 |
-
pil_image,
|
| 383 |
-
caption_text = "Premium Quality",
|
| 384 |
-
style = "premium",
|
| 385 |
-
duration_sec = 7,
|
| 386 |
-
fps = 30,
|
| 387 |
-
add_audio = True,
|
| 388 |
-
add_caption = True,
|
| 389 |
-
add_3d = True,
|
| 390 |
-
add_bokeh = True,
|
| 391 |
-
add_bars = True,
|
| 392 |
-
):
|
| 393 |
-
TW,TH=720,1280; pad=200; BW,BH=TW+pad*2,TH+pad*2
|
| 394 |
-
total=duration_sec*fps
|
| 395 |
-
|
| 396 |
-
# Prepare base image
|
| 397 |
-
img=pil_image.convert("RGB"); sw,sh=img.size
|
| 398 |
-
if sw/sh>TW/TH:
|
| 399 |
-
nw=int(sh*TW/TH); img=img.crop(((sw-nw)//2,0,(sw-nw)//2+nw,sh))
|
| 400 |
-
else:
|
| 401 |
-
nh=int(sw*TH/TW); img=img.crop((0,(sh-nh)//2,sw,(sh-nh)//2+nh))
|
| 402 |
-
img=img.filter(ImageFilter.UnsharpMask(radius=1.2,percent=130,threshold=2))
|
| 403 |
-
img=ImageEnhance.Contrast(img).enhance(1.08)
|
| 404 |
-
img=ImageEnhance.Color(img).enhance(1.15)
|
| 405 |
-
base=np.array(img.resize((BW,BH),Image.LANCZOS))
|
| 406 |
-
|
| 407 |
-
# Pre-build vignette β gentle, not crushing blacks
|
| 408 |
-
Y,X=np.ogrid[:TH,:TW]
|
| 409 |
-
vmask=np.clip(1.-0.40*(np.sqrt(((X-TW/2)/(TW/2*.90))**2+((Y-TH/2)/(TH/2))**2)**1.8),0,1).astype(np.float32)
|
| 410 |
-
|
| 411 |
-
# Bokeh system
|
| 412 |
-
bokeh=BokehSystem(TW,TH,n=22,style=style) if add_bokeh else None
|
| 413 |
-
|
| 414 |
-
# Caption segments [start_t, end_t, text, phase_in_dur, phase_out_dur]
|
| 415 |
-
words=caption_text.strip().split()
|
| 416 |
-
mid=len(words)//2
|
| 417 |
-
cap_lines=[" ".join(words[:mid]) or caption_text, " ".join(words[mid:]) or ""]
|
| 418 |
-
cap_segs=[
|
| 419 |
-
(0.8, 3.2, cap_lines[0]),
|
| 420 |
-
(3.5, 6.5, cap_lines[1] if cap_lines[1] else cap_lines[0]),
|
| 421 |
-
]
|
| 422 |
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 429 |
]
|
| 430 |
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
if not add_3d: return 0,0
|
| 434 |
-
ry = math.sin(tg*math.pi*1.5)*6.0 # Β±6Β° Y rotation
|
| 435 |
-
rx = math.sin(tg*math.pi+0.5)*2.5 # Β±2.5Β° X tilt
|
| 436 |
-
return ry, rx
|
| 437 |
|
| 438 |
-
|
| 439 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 440 |
|
| 441 |
for i in range(total):
|
| 442 |
-
tg=i/(total-1)
|
| 443 |
-
|
| 444 |
-
#
|
| 445 |
-
zoom=pan_x=pan_y=None
|
| 446 |
-
for t0,t1,z0,z1,px0,px1,py0,py1 in
|
| 447 |
-
if t0<=tg<=t1:
|
| 448 |
-
te=
|
| 449 |
-
zoom=z0+(z1-z0)*te
|
|
|
|
|
|
|
| 450 |
break
|
| 451 |
-
if zoom is None: zoom,pan_x,pan_y=1
|
| 452 |
-
|
| 453 |
-
# Micro shake (first 20%)
|
| 454 |
-
if tg<0.20:
|
| 455 |
-
s=(0.20-tg)/0.20*2.0
|
| 456 |
-
pan_x+=int(s*math.sin(i*1.4)); pan_y+=int(s*math.cos(i*1.0))
|
| 457 |
-
|
| 458 |
-
# Crop
|
| 459 |
-
cw,ch=int(TW/zoom),int(TH/zoom)
|
| 460 |
-
ox,oy=BW//2+pan_x,BH//2+pan_y
|
| 461 |
-
x1,y1=max(0,ox-cw//2),max(0,oy-ch//2)
|
| 462 |
-
x2,y2=min(BW,x1+cw),min(BH,y1+ch)
|
| 463 |
-
if (x2-x1)<10 or (y2-y1)<10: x1,y1,x2,y2=0,0,TW,TH
|
| 464 |
|
| 465 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 466 |
|
| 467 |
-
#
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
|
|
|
|
|
|
| 471 |
|
| 472 |
-
|
| 473 |
-
frame=color_grade(frame,style)
|
| 474 |
|
| 475 |
-
#
|
| 476 |
-
frame=
|
| 477 |
|
| 478 |
-
#
|
| 479 |
-
frame=
|
| 480 |
|
| 481 |
-
# Film grain
|
| 482 |
-
frame=np.clip(frame.astype(np.float32)+
|
|
|
|
| 483 |
|
| 484 |
-
# Bokeh
|
| 485 |
-
if bokeh: frame=bokeh.draw(frame, tg*duration_sec)
|
| 486 |
|
| 487 |
-
# Cinematic bars
|
| 488 |
-
|
| 489 |
|
| 490 |
-
# Fade
|
| 491 |
-
if
|
| 492 |
-
elif tg>0.
|
| 493 |
-
else:
|
| 494 |
-
if alpha<1
|
|
|
|
| 495 |
|
| 496 |
-
# Captions
|
| 497 |
if add_caption:
|
| 498 |
-
|
| 499 |
-
t_sec=tg*duration_sec
|
| 500 |
for (cs,ce,ct) in cap_segs:
|
| 501 |
-
if cs<=t_sec<=ce:
|
| 502 |
-
|
| 503 |
-
if seg_t<0.5: phase="in"
|
| 504 |
-
elif seg_t>seg_len-0.4: phase="out"
|
| 505 |
-
else: phase="mid"
|
| 506 |
-
local_t=seg_t if phase=="in" else (seg_t-(seg_len-0.4)) if phase=="out" else 0.5
|
| 507 |
-
fp=draw_caption(fp,ct,local_t,TW,TH,style,phase)
|
| 508 |
-
frame=np.array(fp)
|
| 509 |
|
| 510 |
-
writer.write(cv2.cvtColor(frame,cv2.COLOR_RGB2BGR))
|
| 511 |
|
| 512 |
writer.release()
|
| 513 |
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
|
| 517 |
|
| 518 |
-
bgm_path =tmp_video.name.replace(".mp4","_bgm.wav")
|
| 519 |
-
tts_path =tmp_video.name.replace(".mp4","_tts.wav")
|
| 520 |
-
audio_path=tmp_video.name.replace(".mp4","_audio.wav")
|
| 521 |
-
final_path=tmp_video.name.replace(".mp4","_final.mp4")
|
| 522 |
|
| 523 |
-
|
| 524 |
-
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
|
| 530 |
-
f'ffmpeg -y -i "{bgm_path}" -i "{tts_path}" '
|
| 531 |
-
f'-filter_complex "[0]volume=0.25[a];[1]volume=1.0[b];[a][b]amix=inputs=2:duration=first" '
|
| 532 |
-
f'-t {duration_sec} "{audio_path}" -loglevel error'
|
| 533 |
-
)
|
| 534 |
-
else:
|
| 535 |
-
audio_path=bgm_path
|
| 536 |
|
| 537 |
-
|
| 538 |
-
if merge_audio_video(tmp_video.name, audio_path, final_path):
|
| 539 |
-
return final_path
|
| 540 |
-
|
| 541 |
-
return tmp_video.name
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 545 |
-
# MAIN PIPELINE
|
| 546 |
-
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 547 |
-
def generate_ad(image, caption, style, add_audio, add_caption,
|
| 548 |
-
add_3d, add_bokeh, add_bars, progress=gr.Progress()):
|
| 549 |
-
if image is None: return None, "β οΈ Upload an image!"
|
| 550 |
-
pil=image if isinstance(image,Image.Image) else Image.fromarray(image)
|
| 551 |
-
cap=caption.strip() or "Premium Quality. Shop Now."
|
| 552 |
-
prompt=f"cinematic product advertisement, {cap}, smooth motion, dramatic lighting"
|
| 553 |
-
lines=[]
|
| 554 |
|
| 555 |
-
|
| 556 |
-
|
| 557 |
|
| 558 |
-
|
| 559 |
-
|
| 560 |
-
# Try AI models first
|
| 561 |
-
ai_video, model_used = get_ai_video(pil, prompt, style, cb=log)
|
| 562 |
-
|
| 563 |
-
if ai_video and "Ken Burns" not in model_used and "Cinematic" not in model_used:
|
| 564 |
-
# AI video got β add audio+captions on top via ffmpeg
|
| 565 |
-
log(f"β
AI video from {model_used}")
|
| 566 |
-
progress(.85,desc="π΅ Adding audio + captions...")
|
| 567 |
-
|
| 568 |
-
# For AI video: just add audio (captions would need re-encoding)
|
| 569 |
if add_audio:
|
| 570 |
-
|
| 571 |
-
|
| 572 |
-
|
| 573 |
-
|
| 574 |
-
ai_video=final
|
| 575 |
-
progress(1.0,desc="β
Done!"); return ai_video, "\n".join(lines)+f"\n\nβ
{model_used}"
|
| 576 |
-
|
| 577 |
-
# Cinematic Engine
|
| 578 |
-
log("π¬ Cinematic Engine rendering...")
|
| 579 |
-
progress(.70,desc="π¬ Rendering 3D cinematic video...")
|
| 580 |
-
out=render_cinematic(
|
| 581 |
-
pil, caption_text=cap, style=style.lower(),
|
| 582 |
-
duration_sec=7, fps=30,
|
| 583 |
-
add_audio=add_audio, add_caption=add_caption,
|
| 584 |
-
add_3d=add_3d, add_bokeh=add_bokeh, add_bars=add_bars,
|
| 585 |
-
)
|
| 586 |
-
progress(1.0,desc="β
Done!")
|
| 587 |
-
return out, "\n".join(lines)+"\n\nβ
π¬ Cinematic Engine (3D + Bokeh + Audio + Captions)"
|
| 588 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 589 |
|
| 590 |
-
|
| 591 |
-
#
|
| 592 |
-
#
|
| 593 |
-
css=""
|
| 594 |
-
#title
|
| 595 |
-
|
| 596 |
-
"""
|
| 597 |
-
with gr.Blocks(css=css,theme=gr.themes.Soft(primary_hue="violet")) as demo:
|
| 598 |
-
gr.Markdown("# π¬ AI Reel Generator",elem_id="title")
|
| 599 |
-
gr.Markdown("Image + caption β **cinematic AI video** with 3D, bokeh, music & captions",elem_id="sub")
|
| 600 |
|
| 601 |
with gr.Row():
|
| 602 |
with gr.Column(scale=1):
|
| 603 |
-
img_in=gr.Image(label="πΈ Upload Image",type="pil",height=280)
|
| 604 |
-
cap_in=gr.Textbox(label="βοΈ Caption
|
| 605 |
-
|
| 606 |
-
sty_dd=gr.Dropdown(["Premium","Energetic","Fun"],value="Premium",label="π¨ Style")
|
| 607 |
-
|
| 608 |
-
with gr.Row():
|
| 609 |
-
audio_cb =gr.Checkbox(label="π΅ Music + Voice",value=True)
|
| 610 |
-
caption_cb=gr.Checkbox(label="π¬ Captions", value=True)
|
| 611 |
with gr.Row():
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
|
| 615 |
-
|
| 616 |
-
|
| 617 |
-
|
| 618 |
-
gr.Markdown(
|
| 619 |
-
"**π Pipeline:**\n"
|
| 620 |
-
"1. π€ fal.ai LTX-Video (if FAL_KEY set)\n"
|
| 621 |
-
"2. π€ HF LTX-2 (if HF_TOKEN set)\n"
|
| 622 |
-
"3. π¬ **Cinematic Engine** β 3D warp + bokeh\n"
|
| 623 |
-
" + animated captions + lo-fi BGM + TTS voice"
|
| 624 |
-
)
|
| 625 |
|
| 626 |
with gr.Column(scale=1):
|
| 627 |
-
vid_out=gr.Video(label="π₯ Cinematic Reel",height=
|
| 628 |
-
log_out=gr.Textbox(label="π Log",lines=5,interactive=False)
|
| 629 |
|
| 630 |
-
gen_btn.click(
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
outputs=[vid_out,log_out],
|
| 634 |
-
)
|
| 635 |
-
gr.Markdown("---\n**Cinematic Engine:** 3D Perspective Warp Β· Bokeh Particles Β· Split-tone Grade Β· Light Leak Β· Film Grain Β· Animated Captions Β· Lo-fi BGM Β· TTS Voiceover Β· 30fps 720Γ1280")
|
| 636 |
|
| 637 |
-
if __name__=="__main__":
|
| 638 |
demo.launch()
|
|
|
|
| 1 |
+
import os, tempfile, io, math, time, threading, base64, requests
|
| 2 |
import numpy as np
|
| 3 |
import cv2
|
| 4 |
import gradio as gr
|
| 5 |
from PIL import Image, ImageFilter, ImageEnhance, ImageDraw, ImageFont
|
| 6 |
|
| 7 |
+
# ββ TOKENS ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
|
|
|
| 8 |
FAL_KEY = (os.environ.get("FAL_KEY","") or os.environ.get("FAL_API_KEY","")).strip()
|
| 9 |
HF_TOKEN = (os.environ.get("HF_TOKEN","") or os.environ.get("HF_KEY","")).strip()
|
| 10 |
|
|
|
|
| 15 |
login(token=HF_TOKEN); hf_client = InferenceClient(token=HF_TOKEN)
|
| 16 |
print("β
HF ready")
|
| 17 |
except Exception as e: print(f"β οΈ HF: {e}")
|
|
|
|
| 18 |
if FAL_KEY: os.environ["FAL_KEY"] = FAL_KEY; print("β
fal.ai ready")
|
|
|
|
| 19 |
|
| 20 |
+
# ββ HELPERS βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 21 |
+
def pil_to_bytes(img, q=92):
|
| 22 |
+
b=io.BytesIO(); img.save(b,format="JPEG",quality=q); return b.getvalue()
|
|
|
|
|
|
|
| 23 |
|
| 24 |
+
def save_bytes(data):
|
| 25 |
+
t=tempfile.NamedTemporaryFile(suffix=".mp4",delete=False)
|
| 26 |
+
t.write(data); t.flush(); return t.name
|
| 27 |
|
| 28 |
+
def run_timeout(fn, sec, *a, **kw):
|
| 29 |
box=[None]; err=[None]
|
| 30 |
+
def r():
|
| 31 |
+
try: box[0]=fn(*a,**kw)
|
| 32 |
except Exception as e: err[0]=str(e)
|
| 33 |
+
t=threading.Thread(target=r,daemon=True); t.start(); t.join(timeout=sec)
|
| 34 |
+
if t.is_alive(): print(f" β± timeout {sec}s"); return None
|
| 35 |
+
if err[0]: print(f" β {err[0][:80]}")
|
| 36 |
return box[0]
|
| 37 |
|
| 38 |
+
def ease(t):
|
| 39 |
t=max(0.,min(1.,t)); return t*t*(3-2*t)
|
| 40 |
|
| 41 |
+
def ease_cubic(t):
|
| 42 |
t=max(0.,min(1.,t)); return 4*t*t*t if t<.5 else 1-math.pow(-2*t+2,3)/2
|
| 43 |
|
| 44 |
+
def ease_expo(t):
|
| 45 |
return 1-math.pow(2,-10*t) if t<1 else 1.
|
| 46 |
|
| 47 |
+
# ββ FAL + HF CHAIN ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 48 |
+
def try_fal(pil_image, prompt):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
if not FAL_KEY: return None
|
| 50 |
try:
|
| 51 |
import fal_client
|
|
|
|
| 59 |
if vurl:
|
| 60 |
resp = requests.get(vurl, timeout=60)
|
| 61 |
if resp.status_code==200: return resp.content
|
| 62 |
+
except Exception as e: print(f" β fal: {e}")
|
| 63 |
return None
|
| 64 |
|
| 65 |
+
def try_hf(pil_image, prompt):
|
| 66 |
if not hf_client: return None
|
| 67 |
try:
|
| 68 |
+
r = hf_client.image_to_video(image=pil_to_bytes(pil_image),
|
| 69 |
+
model="Lightricks/LTX-2", prompt=prompt)
|
| 70 |
return r.read() if hasattr(r,"read") else r
|
| 71 |
+
except Exception as e: print(f" β HF: {e}")
|
| 72 |
return None
|
| 73 |
|
| 74 |
+
def get_ai_video(pil, prompt, cb=None):
|
| 75 |
+
for name, fn, sec in [
|
| 76 |
+
("π€ fal.ai LTX", try_fal, 90),
|
| 77 |
+
("π€ HF LTX-2", try_hf, 60),
|
| 78 |
+
]:
|
|
|
|
|
|
|
|
|
|
| 79 |
if cb: cb(f"β³ {name}...")
|
| 80 |
+
r = run_timeout(fn, sec, pil, prompt)
|
| 81 |
+
if r: return save_bytes(r), name
|
| 82 |
+
return None, "local"
|
|
|
|
| 83 |
|
| 84 |
|
| 85 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 86 |
+
# CINEMATIC ENGINE β image ALWAYS visible
|
| 87 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 88 |
|
| 89 |
+
def prep_image(pil, W, H):
|
| 90 |
+
"""Resize with smart crop β NO quality loss, NO black bars."""
|
| 91 |
+
img = pil.convert("RGB")
|
| 92 |
+
sw, sh = img.size
|
| 93 |
+
# Crop to target ratio
|
| 94 |
+
tr = W/H
|
| 95 |
+
if sw/sh > tr:
|
| 96 |
+
nw = int(sh*tr); img = img.crop(((sw-nw)//2, 0, (sw-nw)//2+nw, sh))
|
| 97 |
+
else:
|
| 98 |
+
nh = int(sw/tr); img = img.crop((0, (sh-nh)//2, sw, (sh-nh)//2+nh))
|
| 99 |
+
img = img.resize((W, H), Image.LANCZOS)
|
| 100 |
+
# Gentle sharpening only
|
| 101 |
+
img = img.filter(ImageFilter.UnsharpMask(radius=0.8, percent=110, threshold=3))
|
| 102 |
+
img = ImageEnhance.Contrast(img).enhance(1.05)
|
| 103 |
+
img = ImageEnhance.Color(img).enhance(1.08)
|
| 104 |
+
return np.array(img)
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
def grade(frame, style):
|
| 108 |
+
"""Subtle color grade β won't darken image."""
|
| 109 |
+
f = frame.astype(np.float32) / 255.0
|
| 110 |
+
if style == "premium":
|
| 111 |
+
# Slight warm highlights, cool shadows β VERY subtle
|
| 112 |
+
f[:,:,0] = np.clip(f[:,:,0] * 1.03 + 0.01, 0, 1)
|
| 113 |
+
f[:,:,2] = np.clip(f[:,:,2] * 1.02, 0, 1)
|
| 114 |
+
elif style == "energetic":
|
| 115 |
+
# Slight saturation boost
|
| 116 |
+
gray = 0.299*f[:,:,0:1] + 0.587*f[:,:,1:2] + 0.114*f[:,:,2:3]
|
| 117 |
+
f = np.clip(gray + 1.25*(f-gray), 0, 1)
|
| 118 |
+
f = np.clip(f * 1.05, 0, 1)
|
| 119 |
+
elif style == "fun":
|
| 120 |
+
f[:,:,0] = np.clip(f[:,:,0] * 1.06, 0, 1)
|
| 121 |
+
f[:,:,1] = np.clip(f[:,:,1] * 1.03, 0, 1)
|
| 122 |
+
return np.clip(f*255, 0, 255).astype(np.uint8)
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
def soft_vignette(frame):
|
| 126 |
+
"""Very subtle vignette β only darkens extreme edges."""
|
| 127 |
+
h, w = frame.shape[:2]
|
| 128 |
+
Y, X = np.ogrid[:h, :w]
|
| 129 |
+
dist = np.sqrt(((X-w/2)/(w/2))**2 + ((Y-h/2)/(h/2))**2)
|
| 130 |
+
# Only kicks in after 0.85 from center β very gentle
|
| 131 |
+
mask = np.clip(1.0 - 0.30 * np.maximum(dist - 0.85, 0)**2, 0, 1)
|
| 132 |
+
return np.clip(frame.astype(np.float32)*mask[:,:,None], 0, 255).astype(np.uint8)
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
class Bokeh:
|
| 136 |
+
def __init__(self, W, H, style):
|
| 137 |
+
self.W, self.H = W, H
|
| 138 |
+
cols = {"premium":[(255,220,100),(180,160,255)],
|
| 139 |
+
"energetic":[(80,180,255),(255,80,80)],
|
| 140 |
+
"fun":[(255,150,200),(150,255,180)]}
|
| 141 |
+
c = cols.get(style, cols["premium"])
|
| 142 |
+
self.p = [{
|
| 143 |
+
"x": np.random.uniform(0,W), "y": np.random.uniform(0,H),
|
| 144 |
+
"r": np.random.uniform(5,20),
|
| 145 |
+
"a": np.random.uniform(0.06, 0.20), # very transparent
|
| 146 |
+
"vx": np.random.uniform(-0.2,0.2),
|
| 147 |
+
"vy": np.random.uniform(-0.5,-0.05),
|
| 148 |
+
"col": c[np.random.randint(len(c))],
|
| 149 |
+
"ph": np.random.uniform(0, math.pi*2),
|
| 150 |
+
} for _ in range(18)]
|
| 151 |
+
|
| 152 |
+
def draw(self, frame, t_sec):
|
| 153 |
+
ov = frame.astype(np.float32)
|
| 154 |
+
for p in self.p:
|
| 155 |
+
px = int(p["x"] + p["vx"]*t_sec*50 + math.sin(t_sec*1.5+p["ph"])*6) % self.W
|
| 156 |
+
py = int((p["y"] + p["vy"]*t_sec*50) % self.H)
|
| 157 |
+
r = max(3, int(p["r"] * (0.8+0.2*math.sin(t_sec*2+p["ph"]))))
|
| 158 |
+
a = p["a"] * (0.7+0.3*math.sin(t_sec*2+p["ph"]))
|
| 159 |
+
tmp = np.zeros_like(ov)
|
| 160 |
+
cv2.circle(tmp, (px,py), r, p["col"], -1)
|
| 161 |
+
# soft glow: blur the circle
|
| 162 |
+
tmp_blurred = cv2.GaussianBlur(tmp, (r|1, r|1), r/2)
|
| 163 |
+
ov = ov*(1-a) + tmp_blurred.astype(np.float32)*a
|
| 164 |
+
return np.clip(ov, 0, 255).astype(np.uint8)
|
| 165 |
+
|
| 166 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
def get_font(size):
|
| 168 |
+
for p in ["/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
|
| 169 |
+
"/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
|
| 170 |
+
"/usr/share/fonts/truetype/freefont/FreeSansBold.ttf"]:
|
|
|
|
|
|
|
| 171 |
if os.path.exists(p):
|
| 172 |
+
try: return ImageFont.truetype(p, size)
|
| 173 |
except: pass
|
| 174 |
return ImageFont.load_default()
|
| 175 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
|
| 177 |
+
def draw_caption(frame_np, text, anim_t, W, H, style):
|
| 178 |
+
"""Animated caption β slide up from bottom."""
|
| 179 |
+
if not text.strip(): return frame_np
|
| 180 |
+
pil = Image.fromarray(frame_np).convert("RGBA")
|
| 181 |
+
overlay = Image.new("RGBA", pil.size, (0,0,0,0))
|
| 182 |
+
draw = ImageDraw.Draw(overlay)
|
| 183 |
|
| 184 |
+
font = get_font(max(30, W//20))
|
| 185 |
# Word wrap
|
| 186 |
+
words = text.split(); lines = []; line = ""
|
| 187 |
for w in words:
|
| 188 |
+
test = (line+" "+w).strip()
|
| 189 |
+
try: bbox = font.getbbox(test)
|
| 190 |
+
except: bbox = (0,0,len(test)*18,30)
|
| 191 |
+
if bbox[2] > W*0.82 and line:
|
| 192 |
+
lines.append(line); line = w
|
| 193 |
+
else: line = test
|
| 194 |
if line: lines.append(line)
|
| 195 |
|
| 196 |
+
lh = max(36, W//18)
|
| 197 |
+
total_h = len(lines)*lh + 24
|
| 198 |
+
base_y = H - total_h - 80
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
|
| 200 |
+
# Slide up animation
|
| 201 |
+
slide = ease_expo(min(anim_t/0.5, 1.0))
|
| 202 |
+
offset = int((1-slide)*50)
|
| 203 |
+
alpha = int(min(anim_t/0.4, 1.0) * 255)
|
| 204 |
|
| 205 |
+
txt_colors = {"premium":(255,210,60),"energetic":(60,200,255),"fun":(255,100,180)}
|
| 206 |
+
txt_col = txt_colors.get(style, (255,255,255))
|
|
|
|
| 207 |
|
| 208 |
+
for i, ln in enumerate(lines):
|
| 209 |
+
try: bbox = font.getbbox(ln); tw = bbox[2]-bbox[0]
|
| 210 |
+
except: tw = len(ln)*18
|
| 211 |
+
tx = (W-tw)//2; ty = base_y + i*lh + offset
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
|
| 213 |
+
# Background pill
|
| 214 |
+
pad = 14
|
| 215 |
+
draw.rounded_rectangle([tx-pad, ty-6, tx+tw+pad, ty+lh+4],
|
| 216 |
+
radius=12, fill=(0,0,0,min(170,alpha)))
|
| 217 |
# Shadow
|
| 218 |
+
draw.text((tx+2, ty+2), ln, font=font, fill=(0,0,0,min(200,alpha)))
|
| 219 |
+
# Text
|
| 220 |
+
r,g,b = txt_col
|
| 221 |
+
draw.text((tx, ty), ln, font=font, fill=(r,g,b,alpha))
|
| 222 |
+
|
| 223 |
+
combined = Image.alpha_composite(pil, overlay)
|
| 224 |
+
return np.array(combined.convert("RGB"))
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
def make_bgm(duration_sec, out_path, style="premium"):
|
| 228 |
+
import wave
|
| 229 |
+
sr = 44100; n = int(sr*duration_sec)
|
| 230 |
+
t = np.linspace(0, duration_sec, n, endpoint=False)
|
| 231 |
+
bpm = {"premium":90, "energetic":128, "fun":105}.get(style, 90)
|
| 232 |
+
beat = 60./bpm
|
| 233 |
+
|
| 234 |
+
# Kick
|
| 235 |
+
kick = np.zeros(n, np.float32)
|
| 236 |
+
for i in range(int(duration_sec/beat)+2):
|
| 237 |
+
s = int(i*beat*sr)
|
| 238 |
+
if s >= n: break
|
| 239 |
+
l = min(int(sr*.10), n-s)
|
| 240 |
+
env = np.exp(-20*np.arange(l)/sr)
|
| 241 |
+
kick[s:s+l] += env * np.sin(2*math.pi*55*np.exp(-30*np.arange(l)/sr)*np.arange(l)/sr) * 0.6
|
| 242 |
+
|
| 243 |
+
# Bassline
|
| 244 |
+
bass_f = {"premium":55,"energetic":80,"fun":65}.get(style,55)
|
| 245 |
+
bass = np.sin(2*math.pi*bass_f*t)*0.12*(0.5+0.5*np.sin(2*math.pi*(bpm/60/4)*t))
|
| 246 |
+
|
| 247 |
+
# Melody
|
| 248 |
+
mel_freqs = {"premium":[261,329,392],"energetic":[330,415,494],"fun":[392,494,587]}.get(style,[261,329,392])
|
| 249 |
+
mel = np.zeros(n, np.float32)
|
| 250 |
+
for j,f in enumerate(mel_freqs):
|
| 251 |
+
env = np.clip(0.5+0.5*np.sin(2*math.pi*1.5*t - j*2.1), 0, 1)
|
| 252 |
+
mel += np.sin(2*math.pi*f*t)*env*0.05
|
| 253 |
+
|
| 254 |
+
# Hi-hat
|
| 255 |
+
hat = np.zeros(n, np.float32)
|
| 256 |
+
hs = beat/2
|
| 257 |
+
for i in range(int(duration_sec/hs)+2):
|
| 258 |
+
s = int(i*hs*sr)
|
| 259 |
+
if s >= n: break
|
| 260 |
+
l = min(int(sr*.03), n-s)
|
| 261 |
+
hat[s:s+l] += np.random.randn(l)*np.exp(-80*np.arange(l)/sr)*0.07
|
| 262 |
+
|
| 263 |
+
mix = np.clip((kick+bass+mel+hat)*0.20, -1, 1)
|
| 264 |
+
fade = int(sr*.4)
|
| 265 |
+
mix[:fade] *= np.linspace(0,1,fade)
|
| 266 |
+
mix[-fade:] *= np.linspace(1,0,fade)
|
| 267 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
with wave.open(out_path,"w") as wf:
|
| 269 |
wf.setnchannels(1); wf.setsampwidth(2); wf.setframerate(sr)
|
| 270 |
+
wf.writeframes((mix*32767).astype(np.int16).tobytes())
|
| 271 |
return True
|
| 272 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 273 |
|
| 274 |
+
def add_audio_to_video(video_path, style, duration_sec, caption):
|
| 275 |
+
bgm = video_path.replace(".mp4","_bgm.wav")
|
| 276 |
+
final= video_path.replace(".mp4","_final.mp4")
|
| 277 |
+
make_bgm(duration_sec, bgm, style)
|
| 278 |
+
|
| 279 |
+
# Try TTS
|
| 280 |
+
tts_ok = False
|
| 281 |
+
tts = video_path.replace(".mp4","_tts.mp3")
|
| 282 |
+
try:
|
| 283 |
+
from gtts import gTTS
|
| 284 |
+
gTTS(text=caption[:180], lang="en", slow=False).save(tts)
|
| 285 |
+
# Mix tts(loud) + bgm(soft)
|
| 286 |
+
mixed = video_path.replace(".mp4","_mix.wav")
|
| 287 |
+
os.system(f'ffmpeg -y -i "{bgm}" -i "{tts}" '
|
| 288 |
+
f'-filter_complex "[0]volume=0.22[a];[1]volume=1.0[b];[a][b]amix=inputs=2:duration=first" '
|
| 289 |
+
f'-t {duration_sec} "{mixed}" -loglevel error')
|
| 290 |
+
if os.path.exists(mixed): bgm = mixed; tts_ok = True
|
| 291 |
+
except: pass
|
| 292 |
+
|
| 293 |
+
os.system(f'ffmpeg -y -i "{video_path}" -i "{bgm}" '
|
| 294 |
+
f'-c:v copy -c:a aac -b:a 128k -shortest "{final}" -loglevel error')
|
| 295 |
+
return final if os.path.exists(final) else video_path
|
| 296 |
+
|
| 297 |
+
|
| 298 |
+
# ββ MAIN RENDER βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 299 |
+
def render(pil, caption="Step into style.", style="premium",
|
| 300 |
+
duration_sec=7, fps=30, add_audio=True, add_caption=True, add_bokeh=True):
|
| 301 |
+
|
| 302 |
+
TW, TH = 720, 1280
|
| 303 |
+
PAD = 160 # extra canvas for zoom
|
| 304 |
+
BW, BH = TW+PAD*2, TH+PAD*2
|
| 305 |
+
|
| 306 |
+
base = prep_image(pil, BW, BH) # large canvas β FULL COLOR image
|
| 307 |
+
total = duration_sec * fps
|
| 308 |
+
|
| 309 |
+
bokeh = Bokeh(TW, TH, style) if add_bokeh else None
|
| 310 |
+
|
| 311 |
+
# Motion: gentle zoom + pan β no 3D, no warp
|
| 312 |
+
SEGS = [
|
| 313 |
+
(0.00, 0.22, 1.30, 1.12, 0, -int(PAD*.07), 0, -int(PAD*.08)),
|
| 314 |
+
(0.22, 0.52, 1.12, 1.07, -int(PAD*.04), int(PAD*.06), -int(PAD*.08),-int(PAD*.22)),
|
| 315 |
+
(0.52, 0.78, 1.07, 1.03, int(PAD*.06), int(PAD*.13), -int(PAD*.22),-int(PAD*.12)),
|
| 316 |
+
(0.78, 1.00, 1.03, 1.00, int(PAD*.13), 0, -int(PAD*.12), 0),
|
| 317 |
]
|
| 318 |
|
| 319 |
+
tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
|
| 320 |
+
writer = cv2.VideoWriter(tmp.name, cv2.VideoWriter_fourcc(*"mp4v"), fps, (TW,TH))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 321 |
|
| 322 |
+
# Caption segments
|
| 323 |
+
cap_words = caption.strip().split()
|
| 324 |
+
mid = max(1, len(cap_words)//2)
|
| 325 |
+
cap_segs = [
|
| 326 |
+
(1.0, 3.5, " ".join(cap_words[:mid])),
|
| 327 |
+
(3.8, 6.5, " ".join(cap_words[mid:]) or " ".join(cap_words)),
|
| 328 |
+
]
|
| 329 |
|
| 330 |
for i in range(total):
|
| 331 |
+
tg = i / max(total-1, 1)
|
| 332 |
+
|
| 333 |
+
# Get motion params
|
| 334 |
+
zoom = pan_x = pan_y = None
|
| 335 |
+
for (t0,t1,z0,z1,px0,px1,py0,py1) in SEGS:
|
| 336 |
+
if t0 <= tg <= t1:
|
| 337 |
+
te = ease_cubic((tg-t0)/(t1-t0))
|
| 338 |
+
zoom = z0+(z1-z0)*te
|
| 339 |
+
pan_x = int(px0+(px1-px0)*te)
|
| 340 |
+
pan_y = int(py0+(py1-py0)*te)
|
| 341 |
break
|
| 342 |
+
if zoom is None: zoom,pan_x,pan_y = 1.0,0,0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 343 |
|
| 344 |
+
# Micro camera shake (first 15%)
|
| 345 |
+
if tg < 0.15:
|
| 346 |
+
s = (0.15-tg)/0.15 * 1.8
|
| 347 |
+
pan_x += int(s*math.sin(i*1.5))
|
| 348 |
+
pan_y += int(s*math.cos(i*1.1))
|
| 349 |
|
| 350 |
+
# Crop from big canvas
|
| 351 |
+
cw, ch = int(TW/zoom), int(TH/zoom)
|
| 352 |
+
cx, cy = BW//2+pan_x, BH//2+pan_y
|
| 353 |
+
x1 = max(0, cx-cw//2); y1 = max(0, cy-ch//2)
|
| 354 |
+
x2 = min(BW, x1+cw); y2 = min(BH, y1+ch)
|
| 355 |
+
if (x2-x1)<20 or (y2-y1)<20: x1,y1,x2,y2=0,0,TW,TH
|
| 356 |
|
| 357 |
+
frame = cv2.resize(base[y1:y2,x1:x2], (TW,TH), interpolation=cv2.INTER_LINEAR)
|
|
|
|
| 358 |
|
| 359 |
+
# Subtle color grade (won't darken)
|
| 360 |
+
frame = grade(frame, style)
|
| 361 |
|
| 362 |
+
# Soft vignette (barely noticeable)
|
| 363 |
+
frame = soft_vignette(frame)
|
| 364 |
|
| 365 |
+
# Film grain β very light
|
| 366 |
+
frame = np.clip(frame.astype(np.float32) +
|
| 367 |
+
np.random.normal(0, 3.0, frame.shape), 0, 255).astype(np.uint8)
|
| 368 |
|
| 369 |
+
# Bokeh on top
|
| 370 |
+
if bokeh: frame = bokeh.draw(frame, tg*duration_sec)
|
| 371 |
|
| 372 |
+
# Cinematic bars β thin
|
| 373 |
+
frame[:36, :] = 0; frame[-36:, :] = 0
|
| 374 |
|
| 375 |
+
# Fade in (first 2%) / out (last 5%)
|
| 376 |
+
if tg < 0.02: alpha = ease_expo(tg/0.02)
|
| 377 |
+
elif tg > 0.95: alpha = ease(1-(tg-0.95)/0.05)
|
| 378 |
+
else: alpha = 1.0
|
| 379 |
+
if alpha < 1.0:
|
| 380 |
+
frame = np.clip(frame.astype(np.float32)*alpha, 0, 255).astype(np.uint8)
|
| 381 |
|
| 382 |
+
# Captions
|
| 383 |
if add_caption:
|
| 384 |
+
t_sec = tg*duration_sec
|
|
|
|
| 385 |
for (cs,ce,ct) in cap_segs:
|
| 386 |
+
if cs <= t_sec <= ce:
|
| 387 |
+
frame = draw_caption(frame, ct, t_sec-cs, TW, TH, style)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 388 |
|
| 389 |
+
writer.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
|
| 390 |
|
| 391 |
writer.release()
|
| 392 |
|
| 393 |
+
if add_audio:
|
| 394 |
+
return add_audio_to_video(tmp.name, style, duration_sec, caption)
|
| 395 |
+
return tmp.name
|
| 396 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 397 |
|
| 398 |
+
# ββ PIPELINE ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 399 |
+
def generate(image, caption, style, add_audio, add_caption, add_bokeh, progress=gr.Progress()):
|
| 400 |
+
if image is None: return None, "β οΈ Upload an image first!"
|
| 401 |
+
pil = image if isinstance(image,Image.Image) else Image.fromarray(image)
|
| 402 |
+
cap = caption.strip() or "Premium Quality. Shop Now."
|
| 403 |
+
prompt = f"cinematic product ad, {cap}, smooth motion, dramatic lighting"
|
| 404 |
+
lines = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 405 |
|
| 406 |
+
def log(msg): lines.append(msg); progress(min(.1+len(lines)*.12,.80),desc=msg)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 407 |
|
| 408 |
+
progress(.05, desc="π Starting...")
|
| 409 |
+
ai_path, model = get_ai_video(pil, prompt, cb=log)
|
| 410 |
|
| 411 |
+
if ai_path:
|
| 412 |
+
log(f"β
AI video: {model}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 413 |
if add_audio:
|
| 414 |
+
progress(.85, desc="π΅ Adding music...")
|
| 415 |
+
ai_path = add_audio_to_video(ai_path, style.lower(), 6, cap)
|
| 416 |
+
progress(1.0, desc="β
Done!")
|
| 417 |
+
return ai_path, "\n".join(lines)+f"\n\nβ
{model}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 418 |
|
| 419 |
+
log("π¬ Cinematic Engine...")
|
| 420 |
+
progress(.60, desc="π¬ Rendering...")
|
| 421 |
+
out = render(pil, caption=cap, style=style.lower(),
|
| 422 |
+
add_audio=add_audio, add_caption=add_caption, add_bokeh=add_bokeh)
|
| 423 |
+
progress(1.0, desc="β
Done!")
|
| 424 |
+
return out, "\n".join(lines)+"\n\nβ
π¬ Cinematic Engine"
|
| 425 |
|
| 426 |
+
|
| 427 |
+
# ββ UI ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 428 |
+
css="#title{text-align:center;font-size:2.3rem;font-weight:900}#sub{text-align:center;color:#888;margin-bottom:1.5rem}"
|
| 429 |
+
with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue="violet")) as demo:
|
| 430 |
+
gr.Markdown("# π¬ AI Reel Generator", elem_id="title")
|
| 431 |
+
gr.Markdown("Image + caption β cinematic reel with music & captions", elem_id="sub")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 432 |
|
| 433 |
with gr.Row():
|
| 434 |
with gr.Column(scale=1):
|
| 435 |
+
img_in = gr.Image(label="πΈ Upload Image", type="pil", height=280)
|
| 436 |
+
cap_in = gr.Textbox(label="βοΈ Caption", value="Step into style. Own the moment.", lines=2)
|
| 437 |
+
sty_dd = gr.Dropdown(["Premium","Energetic","Fun"], value="Premium", label="π¨ Style")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 438 |
with gr.Row():
|
| 439 |
+
audio_cb = gr.Checkbox(label="π΅ Music + Voice", value=True)
|
| 440 |
+
caption_cb= gr.Checkbox(label="π¬ Captions", value=True)
|
| 441 |
+
bokeh_cb = gr.Checkbox(label="β¨ Bokeh", value=True)
|
| 442 |
+
gen_btn = gr.Button("π Generate Reel", variant="primary", size="lg")
|
| 443 |
+
gr.Markdown("**Chain:** fal.ai LTX β HF LTX-2 β π¬ Cinematic Engine")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 444 |
|
| 445 |
with gr.Column(scale=1):
|
| 446 |
+
vid_out = gr.Video(label="π₯ Cinematic Reel", height=500)
|
| 447 |
+
log_out = gr.Textbox(label="π Log", lines=5, interactive=False)
|
| 448 |
|
| 449 |
+
gen_btn.click(fn=generate,
|
| 450 |
+
inputs=[img_in,cap_in,sty_dd,audio_cb,caption_cb,bokeh_cb],
|
| 451 |
+
outputs=[vid_out,log_out])
|
|
|
|
|
|
|
|
|
|
| 452 |
|
| 453 |
+
if __name__ == "__main__":
|
| 454 |
demo.launch()
|