ADDENEATOR / app.py
GLAkavya's picture
Update app.py
6078e17 verified
import os, tempfile, io, math, time, threading, re, random, json
import numpy as np
import cv2
import gradio as gr
from PIL import Image, ImageDraw, ImageFont, ImageFilter, ImageEnhance
# ── TOKENS ────────────────────────────────────────────────────────
hf_token = (os.environ.get("HF_TOKEN","") or os.environ.get("HF_KEY","")).strip()
hf_client = None
if hf_token:
try:
from huggingface_hub import login, InferenceClient
login(token=hf_token); hf_client = InferenceClient(token=hf_token)
print("✅ HF ready")
except Exception as e: print(f"⚠️ HF: {e}")
# ── TEMPLATE STORAGE ──────────────────────────────────────────────
TEMPLATES_FILE = "saved_templates.json"
def load_templates():
if os.path.exists(TEMPLATES_FILE):
try:
with open(TEMPLATES_FILE, "r") as f:
return json.load(f)
except: pass
return {}
def save_template(name, style, language, duration, caption, add_aud, add_cap):
templates = load_templates()
templates[name] = {
"style": style, "language": language, "duration": duration,
"caption": caption, "add_audio": add_aud, "add_captions": add_cap,
"created": time.strftime("%Y-%m-%d %H:%M")
}
with open(TEMPLATES_FILE, "w") as f:
json.dump(templates, f, indent=2)
return f"✅ Template '{name}' saved!", list(templates.keys())
def get_template_names():
return list(load_templates().keys())
def load_template(name):
templates = load_templates()
if name in templates:
t = templates[name]
return t["style"], t["language"], t["duration"], t["caption"], t["add_audio"], t["add_captions"]
return "Premium", "English", 6, "", True, True
def delete_template(name):
templates = load_templates()
if name in templates:
del templates[name]
with open(TEMPLATES_FILE, "w") as f:
json.dump(templates, f, indent=2)
return f"🗑️ Template '{name}' deleted!", list(templates.keys())
return "Template not found!", list(templates.keys())
def export_template(name):
templates = load_templates()
if name in templates:
out = f"template_{name.replace(' ','_')}.json"
with open(out, "w") as f:
json.dump({name: templates[name]}, f, indent=2)
return out
return None
# ══════════════════════════════════════════════════════════════════
# AUTO-DETECT
# ══════════════════════════════════════════════════════════════════
def auto_detect(pil_image, user_caption=""):
category = "Product/Other"
label = ""
if hf_client:
try:
buf = io.BytesIO(); pil_image.save(buf, format="JPEG", quality=85)
result = hf_client.image_classification(image=buf.getvalue(), model="google/vit-base-patch16-224")
if result:
label = result[0].get("label","").lower()
except Exception as e:
print(f" ⚠️ classifier skip: {e}")
label_map = {
"shoe":"Fashion","sneaker":"Fashion","boot":"Fashion","dress":"Fashion",
"shirt":"Fashion","jacket":"Fashion","jean":"Fashion","sandal":"Fashion","bag":"Fashion",
"pizza":"Food","burger":"Food","cake":"Food","food":"Food","coffee":"Food","sushi":"Food",
"laptop":"Tech","phone":"Tech","camera":"Tech","keyboard":"Tech","monitor":"Tech","tablet":"Tech",
"lipstick":"Beauty","cream":"Beauty","perfume":"Beauty","cosmetic":"Beauty","makeup":"Beauty",
"dumbbell":"Fitness","yoga":"Fitness","bottle":"Fitness","bicycle":"Fitness","jersey":"Fitness",
"plant":"Lifestyle","candle":"Lifestyle","chair":"Lifestyle","sofa":"Lifestyle","lamp":"Lifestyle",
}
for k,v in label_map.items():
if k in label: category=v; break
if category == "Product/Other" and user_caption:
cap_low = user_caption.lower()
if any(w in cap_low for w in ["shoe","sneaker","dress","outfit","wear","fashion","style","cloth","kurta"]): category="Fashion"
elif any(w in cap_low for w in ["food","eat","recipe","cook","restaurant","cafe","pizza","biryani"]): category="Food"
elif any(w in cap_low for w in ["phone","laptop","tech","gadget","device","app","camera"]): category="Tech"
elif any(w in cap_low for w in ["skin","beauty","makeup","lipstick","cream","hair","glow"]): category="Beauty"
elif any(w in cap_low for w in ["gym","fit","workout","protein","yoga","health","sport"]): category="Fitness"
elif any(w in cap_low for w in ["home","decor","interior","lifestyle","aesthetic","candle"]): category="Lifestyle"
prompts = {
"Fashion": "cinematic fashion product shot, model wearing outfit, soft studio lighting, slow zoom, luxury feel",
"Food": "cinematic food photography, steam rising, dramatic close-up, warm golden lighting, slow reveal",
"Tech": "cinematic tech product reveal, sleek background, blue accent lighting, smooth rotation, premium feel",
"Beauty": "cinematic beauty product shot, soft pink bokeh, gentle sparkle, slow zoom, elegant lighting",
"Fitness": "cinematic fitness product shot, energetic motion blur, bold lighting, dynamic angle, powerful",
"Lifestyle": "cinematic lifestyle shot, warm ambient light, cozy aesthetic, slow pan, aspirational feel",
"Product/Other": "cinematic product advertisement, dramatic lighting, smooth zoom, professional commercial look",
}
auto_prompt = prompts.get(category, prompts["Product/Other"])
if label: auto_prompt = f"{label} product, {auto_prompt}"
return category, auto_prompt, label
# ══════════════════════════════════════════════════════════════════
# SMART INSIGHTS
# ══════════════════════════════════════════════════════════════════
POSTING_TIMES = {
"Fashion": {"best":"9:00 PM", "days":"Tue, Thu, Fri", "slots":["7AM","12PM","6PM","9PM"]},
"Food": {"best":"12:00 PM", "days":"Mon, Wed, Sat", "slots":["11AM","1PM","7PM"]},
"Tech": {"best":"8:00 AM", "days":"Mon, Tue, Wed", "slots":["8AM","12PM","5PM"]},
"Beauty": {"best":"8:00 PM", "days":"Wed, Fri, Sun", "slots":["8AM","1PM","8PM"]},
"Fitness": {"best":"6:00 AM", "days":"Mon, Wed, Fri", "slots":["6AM","12PM","7PM"]},
"Lifestyle": {"best":"7:00 PM", "days":"Thu, Fri, Sat", "slots":["9AM","2PM","7PM"]},
"Product/Other":{"best":"8:00 PM", "days":"Tue, Thu, Sat", "slots":["10AM","3PM","8PM"]},
}
AUDIENCES = {
"Fashion": "👗 18-35 yo females · Fashion lovers · Insta scrollers · Trend followers",
"Food": "🍕 18-45 · Foodies · Home cooks · Restaurant goers · Food bloggers",
"Tech": "💻 20-40 · Tech enthusiasts · Early adopters · Gadget buyers",
"Beauty": "💄 16-35 yo · Beauty lovers · Skincare fans · Self-care community",
"Fitness": "💪 18-40 · Gym goers · Health-conscious · Athletes · Wellness seekers",
"Lifestyle": "🌿 22-40 · Aspirational buyers · Aesthetic lovers · Home decor fans",
"Product/Other":"🛍️ 18-45 · Online shoppers · Deal hunters · Value-conscious buyers",
}
CAPTIONS = {
"English": {
"Premium": ["✨ {cap} Quality that speaks for itself. 🛒 Shop Now → Link in bio",
"Elevate your game. {cap} 💫 DM to order!"],
"Energetic": ["🔥 {cap} Hit different. Grab yours NOW 👆 Limited stock!",
"⚡ Game changer! {cap} Don't sleep on this 🚀"],
"Fun": ["Obsessed!! 😍 {cap} Tag someone who needs this 👇",
"POV: You just found your new fav 🎉 {cap} Link in bio!"],
},
"Hindi": {
"Premium": ["✨ {cap} क्वालिटी जो बोलती है। 🛒 अभी खरीदें → Bio में link",
"अपना स्टाइल बढ़ाएं। {cap} 💫 Order के लिए DM करें!"],
"Energetic": ["🔥 {cap} एकदम अलग! अभी grab करो 👆 Limited stock!",
"⚡ Game changer! {cap} मत सोचो, order करो 🚀"],
"Fun": ["दीवाने हो जाओगे!! 😍 {cap} किसी को tag करो 👇",
"POV: नया favourite मिल गया 🎉 {cap} Bio में link!"],
},
"Hinglish": {
"Premium": ["✨ {cap} Quality toh dekho yaar! 🛒 Shop karo → Bio mein link",
"Style upgrade time! {cap} 💫 DM karo order ke liye!"],
"Energetic": ["🔥 {cap} Bilkul alag hai bhai! Abhi lo 👆 Limited stock!",
"⚡ Ek dum fire! {cap} Mat ruko, order karo 🚀"],
"Fun": ["Yaar yeh toh kamaal hai!! 😍 {cap} Kisi ko tag karo 👇",
"POV: Naya fav mil gaya 🎉 {cap} Bio mein link!"],
},
}
HASHTAGS = {
"Fashion": "#Fashion #OOTD #StyleInspo #NewCollection #Trending #ShopNow #Reels",
"Food": "#FoodLovers #Foodie #FoodPhotography #Yummy #FoodReels #MustTry",
"Tech": "#TechReview #Gadgets #TechLovers #Innovation #NewTech #MustHave",
"Beauty": "#BeautyTips #Skincare #MakeupLovers #GlowUp #BeautyReels #GRWM",
"Fitness": "#FitnessMotivation #GymLife #HealthyLifestyle #FitFam #WorkoutReels",
"Lifestyle": "#Lifestyle #Aesthetic #HomeDecor #VibeCheck #DailyInspo #Reels",
"Product/Other":"#NewProduct #MustHave #ShopNow #Trending #Viral #Reels #ForYou",
}
def get_insights(category, style, language, cap):
pt = POSTING_TIMES[category]
clean_cap = re.sub(r"[^\w\s!.,'-]","",cap).strip()[:60]
tmpl = CAPTIONS.get(language, CAPTIONS["English"]).get(style, CAPTIONS["English"]["Premium"])
ai_cap = random.choice(tmpl).replace("{cap}", clean_cap)
tags = HASHTAGS.get(category, HASHTAGS["Product/Other"])
insight = (
f"📊 SMART INSIGHTS\n"
f"{'━'*38}\n"
f"🎯 Category: {category}\n\n"
f"👥 Target Audience:\n{AUDIENCES[category]}\n\n"
f"⏰ Best Time to Post:\n"
f"🏆 Prime: {pt['best']} | 📅 Days: {pt['days']}\n"
f"🕐 All slots: {', '.join(pt['slots'])}\n\n"
f"💬 AI Caption ({language} · {style}):\n{ai_cap}\n\n"
f"#️⃣ Hashtags:\n{tags}\n"
f"{'━'*38}"
)
return insight, ai_cap
# ══════════════════════════════════════════════════════════════════
# ══════════════════════════════════════════════════════════════════
# HF VIDEO CHAIN
# ══════════════════════════════════════════════════════════════════
HF_MODELS = [
{"id":"Lightricks/LTX-2", "name":"LTX-2 ⚡"},
{"id":"Wan-AI/Wan2.2-I2V-A14B", "name":"Wan 2.2"},
{"id":"stabilityai/stable-video-diffusion-img2vid-xt", "name":"SVD-XT"},
{"id":"KlingTeam/LivePortrait", "name":"Kling"},
{"id":"Lightricks/LTX-Video", "name":"LTX-Video"},
{"id":"__local__", "name":"Ken Burns ✅"},
]
def run_timeout(fn, sec, *a, **kw):
box=[None]; err=[None]
def r():
try: box[0]=fn(*a,**kw)
except Exception as e: err[0]=str(e)
t=threading.Thread(target=r,daemon=True); t.start(); t.join(timeout=sec)
if t.is_alive(): return None
return box[0]
def try_hf(model_id, pil, prompt):
if not hf_client: return None
try:
b=io.BytesIO(); pil.save(b,format="JPEG",quality=92)
r=hf_client.image_to_video(image=b.getvalue(),model=model_id,prompt=prompt)
return r.read() if hasattr(r,"read") else r
except Exception as e: print(f" ❌ {model_id}: {e}"); return None
def get_video(pil, prompt, dur, cb=None):
for m in HF_MODELS:
mid,mname=m["id"],m["name"]
if cb: cb(f"⏳ Trying: {mname}")
if mid=="__local__":
return ken_burns(pil, duration_sec=dur), mname
data=run_timeout(try_hf,50,mid,pil,prompt)
if data:
t=tempfile.NamedTemporaryFile(suffix=".mp4",delete=False)
t.write(data); t.flush()
return t.name, mname
time.sleep(0.5)
return ken_burns(pil, duration_sec=dur), "Ken Burns"
# ══════════════════════════════════════════════════════════════════
# KEN BURNS
# ══════════════════════════════════════════════════════════════════
def ease_c(t): t=max(0.,min(1.,t)); return 4*t*t*t if t<.5 else 1-math.pow(-2*t+2,3)/2
def ease_e(t): return 1-math.pow(2,-10*t) if t<1 else 1.
def ease_s(t): t=max(0.,min(1.,t)); return t*t*(3-2*t)
def ken_burns(pil, duration_sec=6, fps=30, style="premium"):
TW,TH=720,1280; pad=60; BW,BH=TW+pad*2,TH+pad*2
total=int(duration_sec*fps)
img=pil.convert("RGB"); sw,sh=img.size
scale=min(TH/sh, TW/sw)
nw,nh=int(sw*scale),int(sh*scale)
img_r=img.resize((nw,nh),Image.LANCZOS)
img_r=img_r.filter(ImageFilter.UnsharpMask(radius=0.8,percent=110,threshold=2))
img_r=ImageEnhance.Contrast(img_r).enhance(1.05)
img_r=ImageEnhance.Color(img_r).enhance(1.08)
bg=img.resize((TW,TH),Image.LANCZOS).filter(ImageFilter.GaussianBlur(18))
bg=ImageEnhance.Brightness(bg).enhance(0.55)
canvas=bg.copy(); canvas.paste(img_r,((TW-nw)//2,(TH-nh)//2))
base=np.array(canvas.resize((BW,BH),Image.LANCZOS))
Y,X=np.ogrid[:TH,:TW]
dist=np.sqrt(((X-TW/2)/(TW/2))**2+((Y-TH/2)/(TH/2))**2)
vmask=np.clip(1.-0.22*np.maximum(dist-0.85,0)**2,0,1).astype(np.float32)
SEG=[(0.00,0.30,1.00,1.04,0,-int(pad*.4),0,-int(pad*.4)),
(0.30,0.60,1.04,1.06,-int(pad*.3),int(pad*.3),-int(pad*.4),-int(pad*.7)),
(0.60,0.80,1.06,1.04,int(pad*.3),int(pad*.5),-int(pad*.7),-int(pad*.4)),
(0.80,1.00,1.04,1.00,int(pad*.5),0,-int(pad*.4),0)]
tmp=tempfile.NamedTemporaryFile(suffix=".mp4",delete=False)
writer=cv2.VideoWriter(tmp.name,cv2.VideoWriter_fourcc(*"mp4v"),fps,(TW,TH))
for i in range(total):
tg=i/max(total-1,1)
zoom=pan_x=pan_y=None
for t0,t1,z0,z1,px0,px1,py0,py1 in SEG:
if t0<=tg<=t1:
te=ease_c((tg-t0)/(t1-t0))
zoom=z0+(z1-z0)*te; pan_x=int(px0+(px1-px0)*te); pan_y=int(py0+(py1-py0)*te); break
if zoom is None: zoom,pan_x,pan_y=1.,0,0
cw,ch=int(TW/zoom),int(TH/zoom)
ox,oy=BW//2+pan_x,BH//2+pan_y
x1,y1=max(0,ox-cw//2),max(0,oy-ch//2)
x2,y2=min(BW,x1+cw),min(BH,y1+ch)
if (x2-x1)<10 or (y2-y1)<10: x1,y1,x2,y2=0,0,TW,TH
frame=cv2.resize(base[y1:y2,x1:x2],(TW,TH),interpolation=cv2.INTER_LINEAR)
f=frame.astype(np.float32)/255.
if style=="premium":
f[:,:,0]=np.clip(f[:,:,0]*1.03+.01,0,1); f[:,:,2]=np.clip(f[:,:,2]*1.02,0,1)
elif style=="energetic":
g=0.299*f[:,:,0:1]+0.587*f[:,:,1:2]+0.114*f[:,:,2:3]
f=np.clip(g+1.2*(f-g),0,1); f=np.clip(f*1.04,0,1)
elif style=="fun":
f[:,:,0]=np.clip(f[:,:,0]*1.05,0,1); f[:,:,1]=np.clip(f[:,:,1]*1.03,0,1)
frame=np.clip(f*255,0,255).astype(np.uint8)
frame=np.clip(frame.astype(np.float32)*vmask[:,:,None],0,255).astype(np.uint8)
frame=np.clip(frame.astype(np.float32)+np.random.normal(0,2.5,frame.shape),0,255).astype(np.uint8)
frame[:36,:]=0; frame[-36:,:]=0
if tg<0.02: alpha=ease_e(tg/0.02)
elif tg>0.95: alpha=ease_s(1-(tg-0.95)/0.05)
else: alpha=1.
if alpha<1.: frame=np.clip(frame.astype(np.float32)*alpha,0,255).astype(np.uint8)
writer.write(cv2.cvtColor(frame,cv2.COLOR_RGB2BGR))
writer.release()
return tmp.name
# ══════════════════════════════════════════════════════════════════
# MULTI-VIDEO MERGE
# ══════════════════════════════════════════════════════════════════
def merge_videos(paths):
if len(paths)==1: return paths[0]
out=paths[0].replace(".mp4","_merged.mp4")
lst=tempfile.NamedTemporaryFile(suffix=".txt",mode="w",delete=False)
for p in paths: lst.write(f"file '{p}'\n")
lst.flush()
ret=os.system(
f'ffmpeg -y -f concat -safe 0 -i "{lst.name}" '
f'-c:v libx264 -c:a aac -b:a 128k -movflags +faststart '
f'"{out}" -loglevel error'
)
return out if (ret==0 and os.path.exists(out)) else paths[-1]
# ══════════════════════════════════════════════════════════════════
# CAPTIONS
# ══════════════════════════════════════════════════════════════════
def add_captions_ffmpeg(video_path, caption, duration_sec, style):
def clean(t): return re.sub(r"[^A-Za-z0-9 !.,\-\u0900-\u097F]","",t).strip()
words=caption.strip().split(); mid=max(1,len(words)//2)
line1=clean(" ".join(words[:mid])); line2=clean(" ".join(words[mid:])) if len(words)>1 else line1
col={"premium":"FFD232","energetic":"3CC8FF","fun":"FF78C8"}.get(style,"FFFFFF")
cta_col={"premium":"FF9900","energetic":"FF4444","fun":"AA44FF"}.get(style,"FF9900")
out=video_path.replace(".mp4","_cap.mp4")
font=""
for p in ["/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
"/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf"]:
if os.path.exists(p): font=f":fontfile='{p}'"; break
def dt(text,start,end,y,size=42,color=None,box_a="0.60"):
c=color or col; fd=0.4
return (f"drawtext=text='{text}'{font}:fontsize={size}:fontcolor=#{c}"
f":x=(w-text_w)/2:y={y}:box=1:boxcolor=black@{box_a}:boxborderw=14"
f":enable='between(t,{start},{end})'"
f":alpha='if(lt(t,{start+fd}),(t-{start})/{fd},if(gt(t,{end-fd}),({end}-t)/{fd},1))'")
e2=min(duration_sec-0.2,6.5)
vf=",".join([
dt(line1, 1.0, 3.5, "h-190"),
dt(line2, 3.8, e2, "h-190"),
dt("Shop Now >", min(4.5,e2-0.5), e2, "h-130", size=32, color=cta_col, box_a="0.70"),
dt("#NewCollection", 0.5, 3.0, "60", size=28, color="FFFFFF", box_a="0.40"),
])
ret=os.system(f'ffmpeg -y -i "{video_path}" -vf "{vf}" -c:a copy "{out}" -loglevel error')
return out if (ret==0 and os.path.exists(out)) else video_path
# ══════════════════════════════════════════════════════════════════
# AUDIO
# ══════════════════════════════════════════════════════════════════
def make_bgm(duration_sec, out_path, style="premium"):
import wave
sr=44100; n=int(sr*duration_sec); t=np.linspace(0,duration_sec,n,endpoint=False)
bpm={"premium":88,"energetic":126,"fun":104}.get(style,88); beat=60./bpm
kick=np.zeros(n,np.float32)
for i in range(int(duration_sec/beat)+2):
s=int(i*beat*sr)
if s>=n: break
l=min(int(sr*.10),n-s); env=np.exp(-20*np.arange(l)/sr)
kick[s:s+l]+=env*np.sin(2*math.pi*55*np.exp(-25*np.arange(l)/sr)*np.arange(l)/sr)*0.55
bf={"premium":55,"energetic":80,"fun":65}.get(style,55)
bass=np.sin(2*math.pi*bf*t)*0.10*(0.5+0.5*np.sin(2*math.pi*(bpm/60/4)*t))
mf={"premium":[261,329,392],"energetic":[330,415,494],"fun":[392,494,587]}.get(style,[261,329,392])
mel=np.zeros(n,np.float32)
for j,f in enumerate(mf):
mel+=np.sin(2*math.pi*f*t)*np.clip(0.5+0.5*np.sin(2*math.pi*1.5*t-j*2.1),0,1)*0.045
hat=np.zeros(n,np.float32)
for i in range(int(duration_sec/(beat/2))+2):
s=int(i*(beat/2)*sr)
if s>=n: break
l=min(int(sr*.03),n-s); hat[s:s+l]+=np.random.randn(l)*np.exp(-80*np.arange(l)/sr)*0.06
mix=np.clip((kick+bass+mel+hat)*0.18,-1,1)
fade=int(sr*.5); mix[:fade]*=np.linspace(0,1,fade); mix[-fade:]*=np.linspace(1,0,fade)
with wave.open(out_path,"w") as wf:
wf.setnchannels(1); wf.setsampwidth(2); wf.setframerate(sr)
wf.writeframes((mix*32767).astype(np.int16).tobytes())
def add_audio(video_path, caption, duration_sec, style):
bgm=video_path.replace(".mp4","_bgm.wav")
final=video_path.replace(".mp4","_final.mp4")
make_bgm(duration_sec, bgm, style)
audio=bgm
try:
from gtts import gTTS
tts=video_path.replace(".mp4","_tts.mp3"); gTTS(text=caption[:200],lang="en",slow=False).save(tts)
mixed=video_path.replace(".mp4","_mix.wav")
os.system(f'ffmpeg -y -i "{bgm}" -i "{tts}" -filter_complex '
f'"[0]volume=0.20[a];[1]volume=0.95[b];[a][b]amix=inputs=2:duration=first" '
f'-t {duration_sec} "{mixed}" -loglevel error')
if os.path.exists(mixed): audio=mixed
except: pass
os.system(f'ffmpeg -y -i "{video_path}" -i "{audio}" -c:v copy -c:a aac -b:a 128k -shortest "{final}" -loglevel error')
return final if os.path.exists(final) else video_path
# ══════════════════════════════════════════════════════════════════
# MAIN PIPELINE (FIXED: safe image conversion)
# ══════════════════════════════════════════════════════════════════
def generate(images, caption, style, language, duration, add_aud, add_cap, progress=gr.Progress()):
# ✅ FIX: Safe multi-format image handling
pils = []
if images:
for img in images:
if img is None:
continue
try:
if isinstance(img, Image.Image):
pils.append(img.convert("RGB"))
elif isinstance(img, np.ndarray):
pils.append(Image.fromarray(img).convert("RGB"))
elif isinstance(img, dict):
# Gradio sometimes wraps as dict
raw = img.get("composite") or img.get("image") or img.get("path")
if raw is not None:
if isinstance(raw, np.ndarray):
pils.append(Image.fromarray(raw).convert("RGB"))
elif isinstance(raw, Image.Image):
pils.append(raw.convert("RGB"))
elif isinstance(raw, str) and os.path.exists(raw):
pils.append(Image.open(raw).convert("RGB"))
elif isinstance(img, str) and os.path.exists(img):
pils.append(Image.open(img).convert("RGB"))
except Exception as e:
print(f" ⚠️ Skipping image: {e}")
continue
if not pils:
return None, "⚠️ Upload at least 1 valid image!", "No image provided."
cap = caption.strip() or ""
dur = int(duration)
lines = []
def log(msg): lines.append(msg); progress(min(.05+len(lines)*.08,.80), desc=msg)
progress(.02, desc="🔍 Auto-detecting category...")
category, auto_prompt, detected_label = auto_detect(pils[0], cap)
log(f"🔍 Detected: {detected_label or category}")
if not cap:
cap_hints = {
"Fashion":"Step into style. Own the moment.",
"Food":"Every bite tells a story.",
"Tech":"The future is here.",
"Beauty":"Glow different.",
"Fitness":"Push your limits.",
"Lifestyle":"Live the aesthetic.",
"Product/Other":"Quality that speaks for itself.",
}
cap = cap_hints.get(category,"Premium quality. Shop now.")
log(f"💡 Auto caption: {cap}")
insight, ai_cap = get_insights(category, style, language, cap)
video_paths = []
clip_dur = max(4, dur // len(pils))
for idx, pil in enumerate(pils):
log(f"🎬 Image {idx+1}/{len(pils)}...")
_, img_prompt, _ = auto_detect(pil, cap)
full_prompt = f"{img_prompt}, {cap[:60]}"
vpath, model = get_video(pil, full_prompt, clip_dur, cb=log if idx==0 else None)
if add_cap:
log(f"💬 Captions {idx+1}...")
video_caption = ai_cap if language != "English" else cap
vpath = add_captions_ffmpeg(vpath, video_caption, clip_dur, style.lower())
video_paths.append(vpath)
log(f"✅ Clip {idx+1} done ({model})")
if len(video_paths) > 1:
log("🔗 Merging clips...")
final = merge_videos(video_paths)
else:
final = video_paths[0]
if add_aud:
log("🎵 Adding music + voice...")
final = add_audio(final, cap, dur, style.lower())
progress(1.0, desc="✅ Done!")
return final, "\n".join(lines), insight
# ══════════════════════════════════════════════════════════════════
# UI
# ══════════════════════════════════════════════════════════════════
with gr.Blocks() as demo:
gr.Markdown("# 🎬 AI Reel Generator", elem_id="title")
gr.Markdown(
"Upload 1-5 images → AI auto-detects category → cinematic reel + smart posting strategy\n\n"
'<span class="feature-badge">Multi-Image</span>'
'<span class="feature-badge">Multilingual</span>'
'<span class="feature-badge">AI Chain</span>'
'<span class="feature-badge">Template Save/Share</span>',
elem_id="sub"
)
with gr.Tabs():
# ── TAB 1: GENERATOR ─────────────────────────────────────
with gr.Tab("🎬 Generator", elem_classes="tab-label"):
with gr.Row():
# LEFT
with gr.Column(scale=1):
img_in = gr.Gallery(
label="📸 Upload 1–5 Images (drag & drop)",
type="pil",
columns=5, rows=1,
height=200,
object_fit="contain",
)
cap_in = gr.Textbox(
label="✏️ Caption / Description (leave blank = auto-detect)",
placeholder="e.g. Premium sneakers with star design... or leave empty!",
lines=2,
)
with gr.Row():
sty_dd = gr.Dropdown(["Premium","Energetic","Fun"], value="Premium", label="🎨 Style")
lang_dd = gr.Dropdown(["English","Hindi","Hinglish"], value="English", label="🌐 Language")
dur_sl = gr.Slider(minimum=5, maximum=20, value=6, step=1,
label="⏱️ Total Duration (seconds)")
with gr.Row():
aud_cb = gr.Checkbox(label="🎵 Music + Voice", value=True)
cap_cb = gr.Checkbox(label="💬 Captions", value=True)
gen_btn = gr.Button("🚀 Generate Reel + Smart Insights", variant="primary", size="lg")
gr.Markdown(
"**🔗 AI Chain:** LTX-2 ⚡ → Wan 2.2 → SVD-XT → Kling → LTX-Video → Ken Burns ✅\n\n"
"💡 Upload multiple images for a multi-clip reel!"
)
# RIGHT
with gr.Column(scale=1):
vid_out = gr.Video(label="🎥 Cinematic Reel", height=400)
insight_out = gr.Textbox(
label="📊 Smart Insights",
lines=16, interactive=False, elem_classes="insight",
)
log_out = gr.Textbox(label="🔧 Log", lines=4, interactive=False)
# ── TAB 2: TEMPLATES ─────────────────────────────────────
with gr.Tab("💾 Templates", elem_classes="tab-label"):
gr.Markdown("### 💾 Save, Load & Share Your Reel Settings")
with gr.Row(elem_classes="save-row"):
tpl_name_in = gr.Textbox(label="Template Name", placeholder="e.g. My Brand Style", scale=3)
save_btn = gr.Button("💾 Save Current Settings", variant="primary", scale=1)
tpl_status = gr.Textbox(label="Status", interactive=False, lines=1)
tpl_list = gr.Dropdown(label="📂 Saved Templates", choices=get_template_names(), interactive=True)
with gr.Row():
load_btn = gr.Button("📂 Load Template", variant="secondary")
del_btn = gr.Button("🗑️ Delete Template", variant="stop")
export_btn = gr.Button("📤 Export as JSON")
export_file = gr.File(label="⬇️ Download Template JSON", visible=True)
gr.Markdown("""
**How to use Templates:**
1. Configure your settings in the Generator tab
2. Give it a name and click **Save Current Settings**
3. Next time, just pick from the dropdown and **Load Template**
4. **Export** to share with teammates or save as backup
""")
# ── TAB 4: TECH EXPLAINED ────────────────────────────────
with gr.Tab("📚 Tech Stack", elem_classes="tab-label"):
gr.Markdown("""
## 🛠️ Technology Used — Full Breakdown
### 🎬 Video Generation
| Component | Technology | Purpose |
|-----------|-----------|---------|
| **Ken Burns Effect** | OpenCV + NumPy | Cinematic zoom/pan animation |
| **Color Grading** | NumPy array ops | Style-based color correction |
| **Vignette** | NumPy distance map | Cinematic edge darkening |
| **Video Encoding** | OpenCV VideoWriter | MP4 output @ 30fps |
| **AI Video** | HuggingFace InferenceClient | Image-to-video (when available) |
### 🤗 AI Model Chain
| Priority | Model | Provider | Type |
|----------|-------|----------|------|
| 1 | LTX-2 ⚡ | Lightricks | Fast I2V |
| 2 | Wan 2.2 | Wan-AI | High quality I2V |
| 3 | SVD-XT | Stability AI | Stable Video Diffusion |
| 4 | Kling | KlingTeam | LivePortrait |
| 5 | LTX-Video | Lightricks | Fallback I2V |
| 6 ✅ | Ken Burns | Local | Always works! |
### 🎵 Audio System
| Component | Technology | Details |
|-----------|-----------|---------|
| **BGM Generation** | NumPy + wave | Sine waves, kick drum, hi-hat |
| **TTS Voice** | gTTS (Google TTS) | Caption narration |
| **Audio Mixing** | ffmpeg amix | BGM 20% + Voice 95% |
| **BPM by Style** | Custom logic | Premium=88, Energetic=126, Fun=104 |
### 💬 Caption System
| Feature | Technology |
|---------|-----------|
| Text Overlay | ffmpeg drawtext filter |
| Fade Animation | ffmpeg alpha expression |
| Font | DejaVu / Liberation Sans Bold |
| Languages | English / Hindi / Hinglish |
### 🔍 Auto-Detection
| Step | Technology |
|------|-----------|
| Image Classification | google/vit-base-patch16-224 |
| Label Mapping | Custom Python dict |
| Caption Fallback | Keyword matching |
### 🌟 Unique Points
> ✅ **No GPU required** — Ken Burns always as fallback
> ✅ **Multilingual** — Hindi captions with Devanagari support
> ✅ **Programmatic BGM** — No audio files needed
> ✅ **Template system** — Save/load/export settings as JSON
> ✅ **AI fallback chain** — 5 models tried before local fallback
> ✅ **Multi-image merge** — Up to 5 clips concatenated
> ✅ **Auto posting strategy** — AI-driven best time recommendation
""")
# ── EVENTS ────────────────────────────────────────────────────
gen_btn.click(
fn=generate,
inputs=[img_in, cap_in, sty_dd, lang_dd, dur_sl, aud_cb, cap_cb],
outputs=[vid_out, log_out, insight_out],
)
# Template events
save_btn.click(
fn=save_template,
inputs=[tpl_name_in, sty_dd, lang_dd, dur_sl, cap_in, aud_cb, cap_cb],
outputs=[tpl_status, tpl_list],
)
load_btn.click(
fn=load_template,
inputs=[tpl_list],
outputs=[sty_dd, lang_dd, dur_sl, cap_in, aud_cb, cap_cb],
)
del_btn.click(
fn=delete_template,
inputs=[tpl_list],
outputs=[tpl_status, tpl_list],
)
export_btn.click(
fn=export_template,
inputs=[tpl_list],
outputs=[export_file],
)
if __name__ == "__main__":
demo.launch()