Spaces:

ADXabhi
/

caption-txp1

Running

App Files Files Community

caption-txp1 / app.py

ADXabhi

Upload app.py

76e3db8 verified 3 months ago

raw

history blame contribute delete

47.5 kB

	"""
	Caption Greenscreen Service — FastAPI + Async Jobs
	V6: 8 styles + dynamic colors + 6 animations (none, pop, bounce, slam, underline, typewriter, slide_in) + sliding_toggle + sliding_toggle_light
	"""

	import os, uuid, time, math, shutil, tempfile, subprocess
	from typing import Dict, List, Optional, Tuple
	from fastapi import FastAPI, BackgroundTasks, HTTPException
	from fastapi.middleware.cors import CORSMiddleware
	from pydantic import BaseModel
	from PIL import Image, ImageDraw, ImageFont, ImageFilter
	import cloudinary, cloudinary.uploader

	# ── CONFIG ──────────────────────────────────────────────
	def _fetch_cloud_name():
	import urllib.request as _ur, json as _j, ssl as _ssl
	ctx = _ssl.create_default_context()
	req = _ur.Request("https://media.toolxp.org/config", headers={"User-Agent": "Mozilla/5.0"})
	for _i in range(3):
	try:
	with _ur.urlopen(req, timeout=10, context=ctx) as r:
	name = _j.loads(r.read().decode())["cloud_name"]
	if name:
	print(f"[config] cloud_name={name}")
	return name
	except Exception as _e:
	print(f"[config] attempt {_i+1} failed: {_e}")
	raise RuntimeError("[config] FATAL: could not fetch cloud_name after 3 attempts")
	CLOUD_NAME = _fetch_cloud_name()
	UPLOAD_PRESET = os.environ.get("CLOUDINARY_UPLOAD_PRESET", "testing")

	MEDIA_PROXY = "https://media.toolxp.org"

	def proxy_url(url: str) -> str:
	return url.replace(f"https://res.cloudinary.com/{CLOUD_NAME}", MEDIA_PROXY)
	WIDTH, HEIGHT, FPS = 1280, 200, 12
	TRANSPARENT = (0, 0, 0, 0)
	JOBS: Dict[str, dict] = {}

	_BLANK = None
	def blank_bytes():
	global _BLANK
	if _BLANK is None:
	_BLANK = Image.new('RGBA', (WIDTH, HEIGHT), TRANSPARENT).tobytes()
	return _BLANK

	# ── APP ────────────────────────────────────────────────
	app = FastAPI(title="Caption Greenscreen V5")
	app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True,
	allow_methods=[""], allow_headers=[""])

	# ── MODELS ─────────────────────────────────────────────
	class TranscriptWord(BaseModel):
	text: str; start: float; end: float

	class CaptionColors(BaseModel):
	active_fill: str = "#FFD700"
	active_stroke: str = "#000000"
	active_stroke_width: int = 7
	inactive_fill: str = "#FFFFFF"
	inactive_stroke: str = "#000000"
	inactive_stroke_width: int = 5
	active_bg: Optional[str] = None
	inactive_bg: Optional[str] = None

	class CaptionRequest(BaseModel):
	transcript: List[TranscriptWord]
	style: Optional[str] = "hormozi"
	duration: Optional[float] = None
	colors: Optional[CaptionColors] = None
	animation: Optional[str] = "pop" # none, pop, bounce, slam, slam_shake, underline, minimalist_sweep, typewriter, slide_in, karaoke_wipe

	# ── HELPERS ────────────────────────────────────────────
	def hex_rgb(h: str) -> Tuple[int,int,int]:
	h = h.lstrip('#')
	return tuple(int(h[i:i+2], 16) for i in (0,2,4))

	# ── FONTS ──────────────────────────────────────────────
	_FC = {} # Latin font cache
	_FC_DEVA = {} # Devanagari font cache

	def _is_devanagari(text: str) -> bool:
	"""Return True if text contains any Devanagari Unicode character (U+0900–U+097F)."""
	return any('\u0900' <= ch <= '\u097f' for ch in text)

	def get_font(size=72):
	"""Return a Latin/universal bold font at the given size."""
	if size in _FC: return _FC[size]
	for p in ["/app/fonts/Inter-Black.ttf", "/app/fonts/Inter-Bold.ttf",
	"/app/fonts/DejaVuSans-Bold.ttf",
	"/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
	"/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
	"/usr/share/fonts/truetype/freefont/FreeSansBold.ttf"]:
	if os.path.exists(p):
	try:
	f = ImageFont.truetype(p, size); _FC[size] = f; return f
	except: continue
	try:
	r = subprocess.run(["fc-match","--format=%{file}","sans:bold"], capture_output=True, text=True)
	if r.returncode == 0 and r.stdout.strip():
	f = ImageFont.truetype(r.stdout.strip(), size); _FC[size] = f; return f
	except: pass
	f = ImageFont.load_default(); _FC[size] = f; return f

	def get_devanagari_font(size=72):
	"""Return a Devanagari-capable font (Noto Sans Devanagari) at the given size."""
	if size in _FC_DEVA: return _FC_DEVA[size]
	for p in ["/app/fonts/NotoSansDevanagari-Bold.ttf",
	"/app/fonts/NotoSansDevanagari-Regular.ttf",
	"/app/fonts/NotoSans-Bold.ttf",
	"/usr/share/fonts/truetype/noto/NotoSansDevanagari-Bold.ttf",
	"/usr/share/fonts/truetype/noto/NotoSansDevanagari-Regular.ttf",
	"/usr/share/fonts/noto/NotoSansDevanagari-Bold.ttf",
	"/usr/share/fonts/noto/NotoSansDevanagari-Regular.ttf"]:
	if os.path.exists(p):
	try:
	f = ImageFont.truetype(p, size); _FC_DEVA[size] = f; return f
	except: continue
	# Fallback: try fc-match for devanagari script
	try:
	r = subprocess.run(["fc-match","--format=%{file}",":script=deva:bold"], capture_output=True, text=True)
	if r.returncode == 0 and r.stdout.strip():
	f = ImageFont.truetype(r.stdout.strip(), size); _FC_DEVA[size] = f; return f
	except: pass
	# Last resort: fall back to the Latin font (still better than load_default)
	return get_font(size)

	def get_font_for_text(text: str, size=72):
	"""Return the correct font for the given text (Devanagari or Latin)."""
	if _is_devanagari(text):
	return get_devanagari_font(size)
	return get_font(size)

	FONT_N = 72 # normal
	FONT_A = 86 # active (pop)
	PAD = 45 # word spacing
	BOX_PX, BOX_PY, BOX_R = 16, 8, 12 # box padding & radius

	# Anim timing
	WORD_ANIM_DUR = 0.4 # 400ms — matches CSS transition: left/width 400ms cubic-bezier(1,0,0.4,1)
	LINE_ANIM_DUR = 0.25 # seconds for slide-in

	# ── DEFAULT STYLE COLORS ──────────────────────────────
	DEFAULT_COLORS = {
	"hormozi": CaptionColors(active_fill="#FFD700",active_stroke="#000000",active_stroke_width=7,inactive_fill="#FFFFFF",inactive_stroke="#000000",inactive_stroke_width=5),
	"netflix": CaptionColors(active_fill="#E50914",active_stroke="#000000",active_stroke_width=7,inactive_fill="#FFFFFF",inactive_stroke="#000000",inactive_stroke_width=5),
	"karaoke": CaptionColors(active_fill="#00FF00",active_stroke="#000000",active_stroke_width=0,inactive_fill="#FFFFFF",inactive_stroke="#000000",inactive_stroke_width=0,active_bg="#1A1A1A",inactive_bg="#333333"),
	"mrbeast": CaptionColors(active_fill="#000000",active_stroke="#000000",active_stroke_width=0,inactive_fill="#FFFFFF",inactive_stroke="#000000",inactive_stroke_width=5,active_bg="#FFE100"),
	"minimal": CaptionColors(active_fill="#FFFFFF",active_stroke="#000000",active_stroke_width=0,inactive_fill="#999999",inactive_stroke="#000000",inactive_stroke_width=0),
	"gradient_pop": CaptionColors(active_fill="#FF00FF",active_stroke="#000000",active_stroke_width=7,inactive_fill="#FFFFFF",inactive_stroke="#000000",inactive_stroke_width=5),
	"boxing": CaptionColors(active_fill="#FFFFFF",active_stroke="#000000",active_stroke_width=0,inactive_fill="#FFFFFF",inactive_stroke="#000000",inactive_stroke_width=0,active_bg="#7C3AED",inactive_bg="#333333"),
	# Sliding toggle dark: dark pill slides behind active word
	"sliding_toggle": CaptionColors(active_fill="#FFFFFF",active_stroke="#000000",active_stroke_width=0,inactive_fill="#CCCCCC",inactive_stroke="#000000",inactive_stroke_width=0,active_bg="#3A3A3C",inactive_bg=None),
	# Sliding toggle light: frosted white container, white pill, dark text
	"sliding_toggle_light": CaptionColors(active_fill="#141428",active_stroke="#000000",active_stroke_width=0,inactive_fill="#505070",inactive_stroke="#000000",inactive_stroke_width=0,active_bg="#FFFFFF",inactive_bg=None),
	}


	# ── CORE RENDERER ─────────────────────────────────────
	def render_frame(words_in_line, active_word_idx, style, colors: CaptionColors,
	animation="pop",
	word_anim_t=1.0, # 0→1 progress of bounce/slam (1=settled)
	line_anim_t=1.0, # 0→1 progress of slide_in (1=settled)
	word_time_pct=0.0): # 0→1 how far through the active word's time
	"""
	Render one caption frame with style, colors, and animation state.
	"""
	img = Image.new('RGBA', (WIDTH, HEIGHT), TRANSPARENT)
	if not words_in_line:
	return img.tobytes()

	draw = ImageDraw.Draw(img)

	# ── SLIDING TOGGLE: special self-contained renderer ──
	if style == 'sliding_toggle':
	return _render_sliding_toggle(img, draw, words_in_line, active_word_idx, colors, slide_t=word_anim_t)
	if style == 'sliding_toggle_light':
	return _render_sliding_toggle_light(img, draw, words_in_line, active_word_idx, colors, slide_t=word_anim_t)

	# Style behavior flags
	use_pop = style in ('hormozi', 'netflix', 'gradient_pop', 'mrbeast')
	draw_all_boxes = style in ('karaoke', 'boxing')
	draw_active_box = style == 'mrbeast'
	# Minimal sweep should not have stroke to keep it clean
	use_stroke = style not in ('minimal',) and animation != 'minimalist_sweep'

	# Colors → RGB
	a_fill = hex_rgb(colors.active_fill)
	i_fill = hex_rgb(colors.inactive_fill)
	a_stroke = hex_rgb(colors.active_stroke)
	i_stroke = hex_rgb(colors.inactive_stroke)
	a_bg = hex_rgb(colors.active_bg) if colors.active_bg else None
	i_bg = hex_rgb(colors.inactive_bg) if colors.inactive_bg else None

	# ── SLAM & SLAM_SHAKE animation: active word font size varies with progress
	if animation in ('slam', 'slam_shake') and active_word_idx >= 0 and word_anim_t < 1.0:
	# Start at 1.5x, ease down to 1.0x
	ease = 1.0 - (1.0 - word_anim_t) ** 2 # ease-out
	slam_scale = 1.5 - 0.5 * ease
	slam_font_size = int(FONT_A * slam_scale)
	font_slam = get_font(slam_font_size)
	else:
	font_slam = None

	# ── Measure all words ──
	# Note: font selection is per-word to support mixed scripts (e.g., Hindi + Latin)

	word_data = []
	for idx, w in enumerate(words_in_line):
	is_active = (idx == active_word_idx)
	raw_text = w['text']
	# Only uppercase Latin; Devanagari has no case concept
	text = raw_text.upper() if not _is_devanagari(raw_text) else raw_text

	# Typewriter: show partial text for active word
	if animation == 'typewriter' and is_active and word_time_pct < 1.0:
	chars = max(1, int(math.ceil(len(text) * word_time_pct)))
	text = text[:chars]

	# Choose font size, then get the right font for this word's script
	if is_active and font_slam:
	font = font_slam
	elif is_active and use_pop:
	font = get_font_for_text(text, FONT_A)
	else:
	font = get_font_for_text(text, FONT_N)

	bbox = draw.textbbox((0, 0), text, font=font)
	word_data.append({
	'text': text, 'font': font,
	'width': bbox[2] - bbox[0],
	'height': bbox[3] - bbox[1],
	'is_active': is_active,
	'full_width': None, # for underline (full word width)
	})

	# For underline and minimalist_sweep, also measure full word width
	if animation in ('underline', 'minimalist_sweep') and is_active:
	full_raw = w['text']
	full_text = full_raw.upper() if not _is_devanagari(full_raw) else full_raw
	fbbox = draw.textbbox((0, 0), full_text, font=font)
	word_data[-1]['full_width'] = fbbox[2] - fbbox[0]

	has_boxes = draw_all_boxes or draw_active_box
	total_w = sum(d['width'] for d in word_data) + PAD * (len(word_data) - 1)
	if has_boxes:
	total_w += BOX_PX * 2 * len(word_data)

	base_x = (WIDTH - total_w) // 2

	# ── SLIDE_IN animation: offset entire line horizontally
	if animation == 'slide_in' and line_anim_t < 1.0:
	ease = 1.0 - (1.0 - line_anim_t) ** 3 # ease-out cubic
	x_offset = int(-400 * (1.0 - ease))
	else:
	x_offset = 0

	cur_x = base_x + x_offset

	# ── Draw each word ──
	for wd in word_data:
	text = wd['text']
	font = wd['font']
	is_active = wd['is_active']
	text_y = (HEIGHT - wd['height']) // 2

	# ── BOUNCE animation: active word shifts up
	if animation == 'bounce' and is_active and word_anim_t < 1.0:
	bounce_y = int(-22 * math.sin(word_anim_t * math.pi))
	text_y += bounce_y

	# ── SLAM_SHAKE animation: active word shakes after slamming
	if animation == 'slam_shake' and is_active and word_anim_t < 1.0:
	# Shake intensely between 0.3 and 0.8 of the animation curve
	if 0.2 < word_anim_t < 0.9:
	shake_intensity = (1.0 - word_anim_t) * 15 # Damps out over time
	shake_x = int(math.sin(word_anim_t * 50) * shake_intensity)
	shake_y = int(math.cos(word_anim_t * 55) * shake_intensity)
	cur_x += shake_x
	text_y += shake_y

	# Draw bg box
	if draw_all_boxes:
	bg_c = a_bg if is_active else i_bg
	if bg_c:
	bx1, by1 = cur_x - BOX_PX, text_y - BOX_PY
	bx2, by2 = cur_x + wd['width'] + BOX_PX, text_y + wd['height'] + BOX_PY

	# ── BOX GROW animation for box styles: active bg grows from center
	if animation in ('slam', 'slam_shake') and is_active and word_anim_t < 1.0:
	ease = 1.0 - (1.0 - word_anim_t) ** 2
	cx = (bx1 + bx2) // 2
	cy = (by1 + by2) // 2
	hw = int((bx2 - bx1) * 0.5 * ease)
	hh = int((by2 - by1) * 0.5 * ease)
	bx1, by1, bx2, by2 = cx - hw, cy - hh, cx + hw, cy + hh

	draw.rounded_rectangle([bx1, by1, bx2, by2], radius=BOX_R, fill=bg_c)

	elif draw_active_box and is_active and a_bg:
	bx1, by1 = cur_x - BOX_PX, text_y - BOX_PY
	bx2, by2 = cur_x + wd['width'] + BOX_PX, text_y + wd['height'] + BOX_PY
	draw.rounded_rectangle([bx1, by1, bx2, by2], radius=BOX_R, fill=a_bg)

	# Draw text
	fill_c = a_fill if is_active else i_fill

	# ── MINIMALIST_SWEEP animation: dim inactive words
	if animation == 'minimalist_sweep' and not is_active:
	# Dim the inactive words to 40% opacity
	r, g, b = i_fill
	fill_c = (r, g, b, 102) # 40% of 255 ≈ 102
	# Set stroke to fully transparent if any
	i_stroke_color = (0, 0, 0, 0)
	else:
	i_stroke_color = i_stroke

	if use_stroke:
	sc = a_stroke if is_active else i_stroke_color
	sw = colors.active_stroke_width if is_active else colors.inactive_stroke_width

	if animation == 'karaoke_wipe' and is_active:
	# KARAOKE_WIPE: Draw the inactive color base first
	draw.text((cur_x, text_y), text, font=font, fill=i_fill, stroke_width=sw, stroke_fill=sc)

	# Create a temporary image for the sweeping active color
	active_img = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
	active_draw = ImageDraw.Draw(active_img)
	active_draw.text((cur_x, text_y), text, font=font, fill=a_fill, stroke_width=sw, stroke_fill=sc)

	# Calculate sweep mask
	full_w = wd.get('full_width') or wd['width']
	sweep_w = int(full_w * word_time_pct)

	if sweep_w > 0:
	# Create a mask for the sweep progress
	mask = Image.new('L', (WIDTH, HEIGHT), 0)
	mask_draw = ImageDraw.Draw(mask)
	# Mask exactly over the active part of the current word
	mask_draw.rectangle([cur_x - sw, text_y - sw, cur_x + sweep_w, text_y + wd['height'] + sw], fill=255)

	# Composite the swept portion over the main image
	img = Image.composite(active_img, img, mask)
	draw = ImageDraw.Draw(img)
	else:
	draw.text((cur_x, text_y), text, font=font, fill=fill_c, stroke_width=sw, stroke_fill=sc)
	else:
	if animation == 'karaoke_wipe' and is_active:
	# KARAOKE_WIPE without stroke
	draw.text((cur_x, text_y), text, font=font, fill=i_fill)

	# Active sweep layer
	active_img = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
	active_draw = ImageDraw.Draw(active_img)
	active_draw.text((cur_x, text_y), text, font=font, fill=a_fill)

	full_w = wd.get('full_width') or wd['width']
	sweep_w = int(full_w * word_time_pct)

	if sweep_w > 0:
	mask = Image.new('L', (WIDTH, HEIGHT), 0)
	mask_draw = ImageDraw.Draw(mask)
	mask_draw.rectangle([cur_x, text_y - 10, cur_x + sweep_w, text_y + wd['height'] + 10], fill=255)
	img = Image.composite(active_img, img, mask)
	draw = ImageDraw.Draw(img)
	else:
	draw.text((cur_x, text_y), text, font=font, fill=fill_c)

	# ── UNDERLINE & MINIMALIST_SWEEP: draw a line under active word
	if animation in ('underline', 'minimalist_sweep') and is_active:
	full_w = wd.get('full_width') or wd['width']
	line_w = int(full_w * word_time_pct)
	if line_w > 0:
	ul_y = text_y + wd['height'] + 4

	if animation == 'minimalist_sweep':
	# Glowing underline logic: draw multiple semi-transparent rectangles
	draw.rectangle([cur_x, ul_y, cur_x + line_w, ul_y + 4], fill=a_fill)
	# Soft glow passes underneath
	r, g, b = a_fill
	draw.rectangle([cur_x, ul_y - 2, cur_x + line_w, ul_y + 6], fill=(r, g, b, 100))
	draw.rectangle([cur_x, ul_y - 4, cur_x + line_w, ul_y + 8], fill=(r, g, b, 40))
	else:
	# Standard underline
	draw.rectangle([cur_x, ul_y, cur_x + line_w, ul_y + 5], fill=a_fill)

	cur_x += wd['width'] + PAD
	if has_boxes:
	cur_x += BOX_PX * 2

	return img.tobytes()


	# ── SLIDING TOGGLE — PREMIUM DARK GLASS ───────────────────────────────────────
	# 11-layer composited render — maximum quality within Pillow:
	# L1+L2. Dual drop shadow (ambient wide + tight contact)
	# L3. Container flat dark fill
	# L4. Container top-gradient overlay (masked)
	# L5. Container border + edge highlights
	# L6. Pill tight shadow
	# L7. Pill gradient fill top→bottom (lighter slate → deep navy, masked)
	# L8. Pill specular gloss ellipse (blurred)
	# L9. Pill border + edge highlights
	# L10. Active word text glow (blurred)
	# L11. All words text sharp

	ST_FONT_SIZE = 72
	ST_WORD_GAP = 16 # CSS gap:8px × 2.25 scale ≈ 18 → rounded to 16
	ST_WORD_PAD_X = 28
	ST_CONT_PAD_X = 44
	ST_CONT_PAD_Y = 32
	ST_CONT_R = 200
	ST_INNER_R = 100
	ST_TRACKING = -1 # CSS letter-spacing:-0.01em at 72px ≈ -0.7px → -1px (tight, not spread)

	_ST_ACTIVE_TXT = (252, 253, 255, 255) # near-white, very slight cool shimmer
	_ST_INACT_TXT = (190, 190, 192, 255) # neutral mid-grey — no color tint (matches CSS #bbbbbc)
	_ST_TOP_HILIGHT = (255, 255, 255, 110)
	_ST_SIDE_HILIGHT = (255, 255, 255, 45)
	_ST_BOT_SHADOW = (0, 0, 0, 60)


	def _gradient_layer(x1, y1, x2, y2, radius, rgba_top, rgba_bottom, bands=80):
	"""RGBA layer with a vertical gradient clipped to a rounded rectangle."""
	mask = Image.new('L', (WIDTH, HEIGHT), 0)
	ImageDraw.Draw(mask).rounded_rectangle([x1, y1, x2, y2], radius=radius, fill=255)
	layer = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
	ld = ImageDraw.Draw(layer)
	H = max(y2 - y1, 1)
	for i in range(bands):
	ty = y1 + int(i / bands * H)
	ty2 = y1 + int((i + 1) / bands * H) + 1
	t = i / max(bands - 1, 1)
	col = tuple(int(rgba_top[c] + (rgba_bottom[c] - rgba_top[c]) * t) for c in range(4))
	ld.rectangle([x1, ty, x2, ty2], fill=col)
	blank = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
	return Image.composite(layer, blank, mask)


	def _draw_glass_highlights(draw, x1, y1, x2, y2, r, *, scale=1):
	"""No-op: flat lines look artificial in Pillow. Border handled by rounded_rectangle outline."""
	pass


	def _pill_bounds(positions, idx, cont_y1, pi):
	"""
	Return (px1, px2) for the pill at word index `idx` in `positions`.
	idx=-1 means "no previous word" — return the first word's position.
	"""
	safe = max(0, min(idx, len(positions) - 1))
	px, _, wd = positions[safe]
	return px - ST_WORD_PAD_X, px + wd['w'] + ST_WORD_PAD_X


	def _ease_out_cubic(t):
	"""easeOutBack — iOS spring constant (c=1.70158): slight overshoot then settle."""
	t = max(0.0, min(1.0, t))
	c = 1.70158
	return 1.0 + (c + 1) * (t - 1) ** 3 + c * (t - 1) ** 2


	def _measure_tracked(draw, text, font, tracking=ST_TRACKING):
	"""Total pixel width of text with per-character tracking gap."""
	w = 0
	for i, ch in enumerate(text):
	b = draw.textbbox((0, 0), ch, font=font)
	w += b[2] - b[0]
	if i < len(text) - 1:
	w += tracking
	return w


	def _draw_tracked(draw_obj, x, y, text, font, fill, ink_top=0, tracking=ST_TRACKING):
	"""Draw text char-by-char with tracking; y is corrected for ink_top offset."""
	cx = x
	for ch in text:
	draw_obj.text((cx, y - ink_top), ch, font=font, fill=fill)
	b = draw_obj.textbbox((0, 0), ch, font=font)
	cx += (b[2] - b[0]) + tracking


	def _render_sliding_toggle(img, draw, words_in_line, active_word_idx, colors: CaptionColors, slide_t=1.0):
	"""Premium 11-layer glass-pill caption renderer."""
	# measure
	word_data, max_h = [], 0
	for idx, w in enumerate(words_in_line):
	raw = w['text']
	text = raw.upper() if not _is_devanagari(raw) else raw
	font = get_font_for_text(text, ST_FONT_SIZE)
	bbox = draw.textbbox((0, 0), text, font=font)
	tw = _measure_tracked(draw, text, font) # tracked width (priority #3)
	th = bbox[3] - bbox[1]
	max_h = max(max_h, th)
	word_data.append({'text': text, 'font': font, 'w': tw, 'h': th,
	'ink_top': bbox[1], # Pillow top-offset; subtract at draw for true centering
	'is_active': (idx == active_word_idx)})
	if not word_data:
	return img.tobytes()

	# geometry
	inner_w = sum(d['w'] + ST_WORD_PAD_X * 2 for d in word_data) + ST_WORD_GAP * (len(word_data) - 1)
	cont_w = inner_w + ST_CONT_PAD_X * 2
	cont_h = max_h + ST_CONT_PAD_Y * 2
	cx1 = (WIDTH - cont_w) // 2
	cy1 = (HEIGHT - cont_h) // 2
	cx2 = cx1 + cont_w
	cy2 = cy1 + cont_h
	pi = 12 # pill inset from container edges

	# L1: wide ambient shadow — neutral dark
	amb = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
	ImageDraw.Draw(amb).rounded_rectangle(
	[cx1 - 12, cy1 + 18, cx2 + 12, cy2 + 18], radius=ST_CONT_R, fill=(0, 0, 0, 55))
	img = Image.alpha_composite(img, amb.filter(ImageFilter.GaussianBlur(radius=30)))

	# L2: tight contact shadow — neutral dark
	ctc = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
	ImageDraw.Draw(ctc).rounded_rectangle(
	[cx1 - 2, cy1 + 6, cx2 + 2, cy2 + 6], radius=ST_CONT_R, fill=(0, 0, 0, 120))
	img = Image.alpha_composite(img, ctc.filter(ImageFilter.GaussianBlur(radius=8)))

	# L3: container flat dark fill
	c_flat = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
	ImageDraw.Draw(c_flat).rounded_rectangle(
	[cx1, cy1, cx2, cy2], radius=ST_CONT_R, fill=(18, 18, 20, 30)) # neutral dark grey — no blue tint
	img = Image.alpha_composite(img, c_flat)

	# L4: container top-gradient overlay (subtle top lighting)
	img = Image.alpha_composite(img, _gradient_layer(
	cx1, cy1, cx2, cy2, ST_CONT_R,
	rgba_top=(255, 255, 255, 18),
	rgba_bottom=(0, 0, 0, 0), bands=50))

	# L5: container border + edge highlights
	c_edge = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
	ced = ImageDraw.Draw(c_edge)
	ced.rounded_rectangle([cx1, cy1, cx2, cy2], radius=ST_CONT_R,
	outline=(255, 255, 255, 26), width=1) # CSS: inset 0 0 0 1px rgba(white,10%)
	_draw_glass_highlights(ced, cx1, cy1, cx2, cy2, ST_CONT_R, scale=1.0)
	img = Image.alpha_composite(img, c_edge)
	draw = ImageDraw.Draw(img)

	# word positions
	cur_x = cx1 + ST_CONT_PAD_X
	text_y = cy1 + ST_CONT_PAD_Y
	positions = []
	for wd in word_data:
	positions.append((cur_x, text_y, wd))
	cur_x += wd['w'] + ST_WORD_PAD_X * 2 + ST_WORD_GAP

	# per-pill layers — pill position is lerped for smooth sliding
	py1_p, py2_p = cy1 + pi, cy2 - pi

	# Compute pill bounds with smooth spring interpolation (priority #1)
	if active_word_idx >= 0 and len(positions) > 0:
	curr_px1, curr_px2 = _pill_bounds(positions, active_word_idx, cy1, pi)
	prev_px1, prev_px2 = _pill_bounds(positions, active_word_idx - 1, cy1, pi)
	e = _ease_out_cubic(min(1.0, slide_t)) # spring easeOutBack
	pill_x1 = int(prev_px1 + (curr_px1 - prev_px1) * e)
	pill_x2 = int(prev_px2 + (curr_px2 - prev_px2) * e)
	else:
	pill_x1, pill_x2 = cy1, cy1 # off-screen fallback

	# Squish/stretch — horizontal stretch + vertical compress simultaneously
	# Mirrors CSS scaleToggle2: scale(1.08, 1) at 50% → pill elongates horizontally
	# and flattens very slightly vertically (like a water drop in motion)
	if active_word_idx > 0 and slide_t < 1.0:
	s = math.sin(math.pi * min(1.0, slide_t)) # 0→1→0 arc
	squish_x = 1.0 + 0.12 * s
	squish_y = 1.0 - 0.04 * s # subtle vertical compress
	p_cx = (pill_x1 + pill_x2) / 2
	p_hw = (pill_x2 - pill_x1) / 2
	pill_x1 = int(p_cx - p_hw * squish_x)
	pill_x2 = int(p_cx + p_hw * squish_x)
	# Vertical: grow py1 down, shrink py2 up by squish_y factor
	p_cy = (py1_p + py2_p) / 2
	p_hh = (py2_p - py1_p) / 2
	py1_p = int(p_cy - p_hh * squish_y)
	py2_p = int(p_cy + p_hh * squish_y)

	for (px, py, wd) in positions:
	if not wd['is_active']:
	continue
	px1, px2 = pill_x1, pill_x2
	py1, py2 = py1_p, py2_p

	# L6: pill tight shadow — neutral dark
	pshadow = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
	ImageDraw.Draw(pshadow).rounded_rectangle(
	[px1, py1 + 4, px2, py2 + 4], radius=ST_INNER_R, fill=(0, 0, 0, 130))
	img = Image.alpha_composite(img, pshadow.filter(ImageFilter.GaussianBlur(radius=6)))

	# L7: pill gradient fill — translucent gold
	img = Image.alpha_composite(img, _gradient_layer(
	px1, py1, px2, py2, ST_INNER_R,
	rgba_top=(215, 175, 55, 190), # warm amber-gold top
	rgba_bottom=(160, 118, 18, 170), # deep burnished gold bottom
	bands=80))

	# L8: specular gloss — top-LEFT (Apple light direction: priority #4)
	spec_cx = px1 + int((px2 - px1) * 0.30) # 30% from left = upper-left highlight
	spec_cy = py1 + int((py2 - py1) * 0.20)
	spec_rx = (px2 - px1) // 4
	spec_ry = max(10, (py2 - py1) // 5)
	spec = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
	ImageDraw.Draw(spec).ellipse(
	[spec_cx - spec_rx, spec_cy - spec_ry,
	spec_cx + spec_rx, spec_cy + spec_ry],
	fill=(255, 255, 255, 70))
	img = Image.alpha_composite(img, spec.filter(ImageFilter.GaussianBlur(radius=max(5, spec_ry // 2))))

	# L9: pill border + edge highlights
	p_edge = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
	ped = ImageDraw.Draw(p_edge)
	ped.rounded_rectangle([px1, py1, px2, py2], radius=ST_INNER_R,
	outline=(255, 255, 255, 26), width=1) # CSS: inset 0 0 0 1px rgba(white,10%)
	_draw_glass_highlights(ped, px1, py1, px2, py2, ST_INNER_R, scale=0.6)
	img = Image.alpha_composite(img, p_edge)
	draw = ImageDraw.Draw(img)

	# L10: active word text glow (priority #5: fade with slide)
	t_active_fade = max(0.0, (min(1.0, slide_t) - 0.5) / 0.5) if slide_t < 1.0 else 1.0
	glow = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
	gd = ImageDraw.Draw(glow)
	for (px, py, wd) in positions:
	if wd['is_active']:
	glow_a = int(120 * t_active_fade)
	_draw_tracked(gd, px, py, wd['text'], wd['font'],
	(255, 255, 255, glow_a), ink_top=wd['ink_top'])
	img = Image.alpha_composite(img, glow.filter(ImageFilter.GaussianBlur(radius=6)))
	draw = ImageDraw.Draw(img)

	# L11: all words sharp text (priority #3 tracked + #5 crossfade)
	for i, (px, py, wd) in enumerate(positions):
	if wd['is_active']:
	# Active: fade from inactive color (slide_t=0.5) to full white (slide_t=1.0)
	r_i, g_i, b_i, _ = _ST_INACT_TXT
	r_a, g_a, b_a, _ = _ST_ACTIVE_TXT
	ta = max(0.0, (min(1.0, slide_t) - 0.5) / 0.5) if slide_t < 1.0 else 1.0
	fill = (int(r_i + (r_a - r_i) * ta), int(g_i + (g_a - g_i) * ta),
	int(b_i + (b_a - b_i) * ta), 255)
	elif i == active_word_idx - 1 and slide_t < 1.0:
	# Previous word: fade from active white to inactive as pill leaves
	r_a, g_a, b_a, _ = _ST_ACTIVE_TXT
	r_i, g_i, b_i, _ = _ST_INACT_TXT
	td = max(0.0, min(1.0, slide_t) / 0.4) # 0→1 over first 40% of slide
	fill = (int(r_a + (r_i - r_a) * td), int(g_a + (g_i - g_a) * td),
	int(b_a + (b_i - b_a) * td), 255)
	else:
	fill = _ST_INACT_TXT
	_draw_tracked(draw, px, py, wd['text'], wd['font'], fill, ink_top=wd['ink_top'])

	return img.tobytes()


	# ── SLIDING TOGGLE LIGHT ───────────────────────────────────────────────────────
	# Same 11-layer approach as dark, but frosted white container + white pill +
	# dark navy text — premium macOS/iOS frosted glass look.

	_STL_ACTIVE_TXT = (15, 15, 40, 255) # deep navy on white
	_STL_INACT_TXT = (80, 85, 115, 220) # muted slate-blue on frosted container
	_STL_TOP_HILIGHT = (255, 255, 255, 200) # strong top reflex (white surface)
	_STL_SIDE_HILIGHT = (255, 255, 255, 100)
	_STL_BOT_SHADOW = (0, 0, 0, 25) # very soft bottom shadow on light


	def _draw_glass_highlights_l(draw, x1, y1, x2, y2, r, *, scale=1):
	"""Light-mode edge highlights (same logic, different base colours)."""
	margin = min(r // 2, 40)
	alpha_top = min(255, int(_STL_TOP_HILIGHT[3] * scale))
	alpha_side = min(255, int(_STL_SIDE_HILIGHT[3] * scale))
	alpha_bot = min(255, int(_STL_BOT_SHADOW[3] * scale))
	tx1, tx2 = x1 + margin, x2 - margin
	if tx2 > tx1:
	draw.line([(tx1, y1 + 2), (tx2, y1 + 2)],
	fill=(*_STL_TOP_HILIGHT[:3], alpha_top), width=2)
	lx = x1 + 2
	ly1_h, ly2_h = y1 + margin, y2 - margin
	if ly2_h > ly1_h:
	draw.line([(lx, ly1_h), (lx, ly2_h)],
	fill=(*_STL_SIDE_HILIGHT[:3], alpha_side), width=2)
	bx1, bx2 = x1 + margin, x2 - margin
	if bx2 > bx1:
	draw.line([(bx1, y2 - 3), (bx2, y2 - 3)],
	fill=(*_STL_BOT_SHADOW[:3], alpha_bot), width=2)


	def _render_sliding_toggle_light(img, draw, words_in_line, active_word_idx, colors: CaptionColors, slide_t=1.0):
	"""Light-mode sliding toggle: frosted white container, white gradient pill, dark text."""
	# measure
	word_data, max_h = [], 0
	for idx, w in enumerate(words_in_line):
	raw = w['text']
	text = raw.upper() if not _is_devanagari(raw) else raw
	font = get_font_for_text(text, ST_FONT_SIZE)
	bbox = draw.textbbox((0, 0), text, font=font)
	tw = _measure_tracked(draw, text, font) # tracked width (priority #3)
	th = bbox[3] - bbox[1]
	max_h = max(max_h, th)
	word_data.append({'text': text, 'font': font, 'w': tw, 'h': th,
	'ink_top': bbox[1], # Pillow top-offset; subtract at draw for true centering
	'is_active': (idx == active_word_idx)})
	if not word_data:
	return img.tobytes()

	# geometry
	inner_w = sum(d['w'] + ST_WORD_PAD_X * 2 for d in word_data) + ST_WORD_GAP * (len(word_data) - 1)
	cont_w = inner_w + ST_CONT_PAD_X * 2
	cont_h = max_h + ST_CONT_PAD_Y * 2
	cx1 = (WIDTH - cont_w) // 2
	cy1 = (HEIGHT - cont_h) // 2
	cx2 = cx1 + cont_w
	cy2 = cy1 + cont_h
	pi = 12

	# L1: soft blue-grey ambient shadow (light-mode window shadow)
	amb = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
	ImageDraw.Draw(amb).rounded_rectangle(
	[cx1 - 10, cy1 + 16, cx2 + 10, cy2 + 16], radius=ST_CONT_R, fill=(80, 90, 140, 40))
	img = Image.alpha_composite(img, amb.filter(ImageFilter.GaussianBlur(radius=28)))

	# L2: tight contact shadow
	ctc = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
	ImageDraw.Draw(ctc).rounded_rectangle(
	[cx1 - 2, cy1 + 5, cx2 + 2, cy2 + 5], radius=ST_CONT_R, fill=(60, 70, 110, 80))
	img = Image.alpha_composite(img, ctc.filter(ImageFilter.GaussianBlur(radius=7)))

	# L3: container — frosted white fill
	c_flat = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
	ImageDraw.Draw(c_flat).rounded_rectangle(
	[cx1, cy1, cx2, cy2], radius=ST_CONT_R, fill=(245, 246, 252, 185))
	img = Image.alpha_composite(img, c_flat)

	# L4: container top-gradient overlay (brighter at top)
	img = Image.alpha_composite(img, _gradient_layer(
	cx1, cy1, cx2, cy2, ST_CONT_R,
	rgba_top=(255, 255, 255, 60),
	rgba_bottom=(200, 202, 220, 0), bands=50))

	# L5: container border (subtle dark outline on light surface) + highlights
	c_edge = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
	ced = ImageDraw.Draw(c_edge)
	ced.rounded_rectangle([cx1, cy1, cx2, cy2], radius=ST_CONT_R,
	outline=(120, 125, 160, 45), width=1)
	_draw_glass_highlights_l(ced, cx1, cy1, cx2, cy2, ST_CONT_R, scale=1.0)
	img = Image.alpha_composite(img, c_edge)
	draw = ImageDraw.Draw(img)

	# word positions
	cur_x = cx1 + ST_CONT_PAD_X
	text_y = cy1 + ST_CONT_PAD_Y
	positions = []
	for wd in word_data:
	positions.append((cur_x, text_y, wd))
	cur_x += wd['w'] + ST_WORD_PAD_X * 2 + ST_WORD_GAP

	# per-pill layers — pill position is lerped for smooth sliding
	py1_p, py2_p = cy1 + pi, cy2 - pi

	# Compute pill bounds with smooth spring interpolation (priority #1)
	if active_word_idx >= 0 and len(positions) > 0:
	curr_px1, curr_px2 = _pill_bounds(positions, active_word_idx, cy1, pi)
	prev_px1, prev_px2 = _pill_bounds(positions, active_word_idx - 1, cy1, pi)
	e = _ease_out_cubic(min(1.0, slide_t)) # spring easeOutBack
	pill_x1 = int(prev_px1 + (curr_px1 - prev_px1) * e)
	pill_x2 = int(prev_px2 + (curr_px2 - prev_px2) * e)
	else:
	pill_x1, pill_x2 = cy1, cy1

	# Priority #2: squish/stretch — pill elongates 7% at mid-flight
	if active_word_idx > 0 and slide_t < 1.0:
	squish = 1.0 + 0.07 * math.sin(math.pi * min(1.0, slide_t))
	p_cx = (pill_x1 + pill_x2) / 2
	p_hw = (pill_x2 - pill_x1) / 2
	pill_x1 = int(p_cx - p_hw * squish)
	pill_x2 = int(p_cx + p_hw * squish)

	for (px, py, wd) in positions:
	if not wd['is_active']:
	continue
	px1, px2 = pill_x1, pill_x2
	py1, py2 = py1_p, py2_p

	# L6: pill shadow (soft blue-grey)
	pshadow = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
	ImageDraw.Draw(pshadow).rounded_rectangle(
	[px1, py1 + 3, px2, py2 + 3], radius=ST_INNER_R, fill=(60, 70, 110, 70))
	img = Image.alpha_composite(img, pshadow.filter(ImageFilter.GaussianBlur(radius=5)))

	# L7: pill gradient — pure white top → soft blue-white bottom (neumorphic)
	img = Image.alpha_composite(img, _gradient_layer(
	px1, py1, px2, py2, ST_INNER_R,
	rgba_top=(255, 255, 255, 255),
	rgba_bottom=(218, 220, 240, 255), bands=80))

	# L8: specular gloss — top-LEFT, Apple light direction (priority #4)
	spec_cx = px1 + int((px2 - px1) * 0.30) # 30% from left
	spec_cy = py1 + int((py2 - py1) * 0.20)
	spec_rx = (px2 - px1) // 4
	spec_ry = max(8, (py2 - py1) // 6)
	spec = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
	ImageDraw.Draw(spec).ellipse(
	[spec_cx - spec_rx, spec_cy - spec_ry,
	spec_cx + spec_rx, spec_cy + spec_ry],
	fill=(255, 255, 255, 120))
	img = Image.alpha_composite(img, spec.filter(ImageFilter.GaussianBlur(radius=max(4, spec_ry // 2))))

	# L9: pill border (subtle grey) + highlights
	p_edge = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
	ped = ImageDraw.Draw(p_edge)
	ped.rounded_rectangle([px1, py1, px2, py2], radius=ST_INNER_R,
	outline=(160, 165, 200, 60), width=1)
	_draw_glass_highlights_l(ped, px1, py1, px2, py2, ST_INNER_R, scale=0.8)
	img = Image.alpha_composite(img, p_edge)
	draw = ImageDraw.Draw(img)

	# L10: active word dark text glow (priority #5: fade with slide)
	t_active_fade = max(0.0, (min(1.0, slide_t) - 0.5) / 0.5) if slide_t < 1.0 else 1.0
	glow = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
	gd = ImageDraw.Draw(glow)
	for (px, py, wd) in positions:
	if wd['is_active']:
	glow_a = int(60 * t_active_fade)
	_draw_tracked(gd, px, py, wd['text'], wd['font'],
	(15, 15, 40, glow_a), ink_top=wd['ink_top'])
	img = Image.alpha_composite(img, glow.filter(ImageFilter.GaussianBlur(radius=4)))
	draw = ImageDraw.Draw(img)

	# L11: all words sharp text (priority #3 tracked + #5 crossfade)
	for i, (px, py, wd) in enumerate(positions):
	if wd['is_active']:
	r_i, g_i, b_i, _ = _STL_INACT_TXT
	r_a, g_a, b_a, _ = _STL_ACTIVE_TXT
	ta = max(0.0, (min(1.0, slide_t) - 0.5) / 0.5) if slide_t < 1.0 else 1.0
	fill = (int(r_i + (r_a - r_i) * ta), int(g_i + (g_a - g_i) * ta),
	int(b_i + (b_a - b_i) * ta), 255)
	elif i == active_word_idx - 1 and slide_t < 1.0:
	r_a, g_a, b_a, _ = _STL_ACTIVE_TXT
	r_i, g_i, b_i, _ = _STL_INACT_TXT
	td = max(0.0, min(1.0, slide_t) / 0.4)
	fill = (int(r_a + (r_i - r_a) * td), int(g_a + (g_i - g_a) * td),
	int(b_a + (b_i - b_a) * td), 255)
	else:
	fill = _STL_INACT_TXT
	_draw_tracked(draw, px, py, wd['text'], wd['font'], fill, ink_top=wd['ink_top'])

	return img.tobytes()




	# ── BACKGROUND WORKER ─────────────────────────────────
	def process_caption_job(job_id: str, req: CaptionRequest):
	style = req.style or "hormozi"
	colors = req.colors or DEFAULT_COLORS.get(style, DEFAULT_COLORS["hormozi"])
	animation = req.animation or "pop"

	print(f"[{job_id}] style={style} anim={animation} words={len(req.transcript)}")
	JOBS[job_id]["status"] = "processing"
	work_dir = tempfile.mkdtemp(prefix=f"cap_{job_id[:8]}_")

	try:
	JOBS[job_id]["progress"] = "Preparing..."
	transcript = [{"text": w.text, "start": w.start, "end": w.end} for w in req.transcript]
	total_dur = req.duration if (req.duration and req.duration > 0) else max(w['end'] for w in transcript) + 0.5
	total_frames = int(total_dur * FPS)

	# Group words (3 per line)
	lines = []
	for i in range(0, len(transcript), 3):
	g = transcript[i:i+3]
	if g:
	lines.append({'start': g[0]['start'], 'end': g[-1]['end'], 'words': g})

	# Decide rendering strategy
	use_cache = animation in ('none', 'pop')

	if use_cache:
	# ── PRE-RENDER static frames (fast path) ──
	JOBS[job_id]["progress"] = "Pre-rendering frames..."
	cache: Dict[tuple, bytes] = {(-1, -1): blank_bytes()}
	for li, line in enumerate(lines):
	cache[(li, -1)] = render_frame(line['words'], -1, style, colors, animation)
	for wi in range(len(line['words'])):
	cache[(li, wi)] = render_frame(line['words'], wi, style, colors, animation)
	print(f"[{job_id}] Cached {len(cache)} states")

	# ── FFMPEG pipe ──
	JOBS[job_id]["progress"] = f"Encoding {total_frames} frames..."
	out = os.path.join(work_dir, "output.webm")
	log_path = os.path.join(work_dir, "ff.log")

	log_fh = open(log_path, "w")
	ffproc = subprocess.Popen(
	["ffmpeg", "-y", "-f", "rawvideo", "-pix_fmt", "rgba",
	"-s", f"{WIDTH}x{HEIGHT}", "-r", str(FPS), "-i", "pipe:0",
	"-c:v", "libvpx-vp9", "-b:v", "2M", "-pix_fmt", "yuva420p",
	"-auto-alt-ref", "0", "-deadline", "realtime", "-cpu-used", "8", out],
	stdin=subprocess.PIPE, stdout=subprocess.DEVNULL, stderr=log_fh)

	t0 = time.time()
	last_li = 0
	prev_line_idx = -1 # track line changes for slide_in

	for fi in range(total_frames):
	ct = fi / FPS

	# Find active line & word
	ali, awi = -1, -1
	for i in range(last_li, len(lines)):
	if lines[i]['start'] <= ct <= lines[i]['end']:
	ali = i; last_li = i
	for wi, w in enumerate(lines[i]['words']):
	if w['start'] <= ct < w['end']:
	awi = wi; break
	break
	if lines[i]['start'] > ct: break
	if lines and ct < lines[0]['start']:
	last_li = 0

	if use_cache:
	# Static — look up cached frame
	ffproc.stdin.write(cache.get((ali, awi), cache[(-1, -1)]))
	else:
	# Animated — render per frame
	word_anim_t = 1.0
	line_anim_t = 1.0
	word_time_pct = 0.0

	if awi >= 0 and ali >= 0:
	ws = lines[ali]['words'][awi]['start']
	we = lines[ali]['words'][awi]['end']
	word_elapsed = ct - ws
	word_anim_t = min(1.0, word_elapsed / WORD_ANIM_DUR)
	word_time_pct = min(1.0, (ct - ws) / max(0.01, we - ws))

	# Detect line change for slide_in
	if ali >= 0 and ali != prev_line_idx:
	line_start_time = lines[ali]['start']
	else:
	line_start_time = None

	if ali >= 0 and line_start_time is not None:
	line_elapsed = ct - lines[ali]['start']
	line_anim_t = min(1.0, line_elapsed / LINE_ANIM_DUR)
	prev_line_idx = ali

	if ali >= 0:
	frame_bytes = render_frame(
	lines[ali]['words'], awi, style, colors, animation,
	word_anim_t, line_anim_t, word_time_pct)
	else:
	frame_bytes = blank_bytes()

	ffproc.stdin.write(frame_bytes)

	# Progress
	if fi > 0 and fi % max(1, total_frames // 4) == 0:
	JOBS[job_id]["progress"] = f"Encoding ({int(fi/total_frames*100)}%, {time.time()-t0:.1f}s)..."

	ffproc.stdin.close()
	ffproc.wait()
	log_fh.close()

	if ffproc.returncode != 0:
	with open(log_path) as f: raise Exception(f"FFmpeg: {f.read()[-500:]}")
	if not os.path.exists(out) or os.path.getsize(out) < 500:
	raise Exception("FFmpeg empty output")

	print(f"[{job_id}] Encoded in {time.time()-t0:.1f}s — {os.path.getsize(out)} bytes")

	# Upload
	JOBS[job_id]["progress"] = "Uploading..."
	res = cloudinary.uploader.unsigned_upload(out, UPLOAD_PRESET, cloud_name=CLOUD_NAME, resource_type="video")
	JOBS[job_id].update(status="completed", progress="Done",
	result={"public_id": res.get("public_id",""), "secure_url": proxy_url(res.get("secure_url",""))})
	print(f"[{job_id}] Done: {res.get('public_id')}")

	except Exception as e:
	import traceback
	print(f"[{job_id}] FAIL: {traceback.format_exc()}")
	JOBS[job_id].update(status="failed", error=str(e))
	finally:
	shutil.rmtree(work_dir, ignore_errors=True)


	# ── ENDPOINTS ──────────────────────────────────────────

	@app.post("/jobs")
	def submit_job(req: CaptionRequest, bg: BackgroundTasks):
	jid = str(uuid.uuid4())
	JOBS[jid] = {"status":"queued","progress":"Waiting...","result":None,"created_at":time.time()}
	bg.add_task(process_caption_job, jid, req)
	return {"job_id": jid, "status": "queued"}

	@app.get("/jobs/{job_id}")
	def get_job(job_id: str):
	j = JOBS.get(job_id)
	if not j: raise HTTPException(404, "Job not found")
	return j

	@app.get("/")
	def home():
	return {"service": "Caption Greenscreen V6", "status": "running",
	"styles": list(DEFAULT_COLORS.keys()),
	"animations": ["none","pop","bounce","slam","underline","typewriter","slide_in"]}

	@app.get("/styles")
	def list_styles():
	return {n: c.dict() for n, c in DEFAULT_COLORS.items()}

	@app.get("/debug/fonts")
	def debug_fonts():
	f = get_font(FONT_N)
	r = subprocess.run("fc-list : family \| sort \| head -20", shell=True, capture_output=True, text=True)
	return {"font": str(f), "canvas": f"{WIDTH}x{HEIGHT}", "fps": FPS,
	"fonts": r.stdout.strip().split("\n")}