"""
Caption Greenscreen Service — FastAPI + Async Jobs
V6: 8 styles + dynamic colors + 6 animations (none, pop, bounce, slam, underline, typewriter, slide_in) + sliding_toggle + sliding_toggle_light
"""

import os, uuid, time, math, shutil, tempfile, subprocess
from typing import Dict, List, Optional, Tuple
from fastapi import FastAPI, BackgroundTasks, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from PIL import Image, ImageDraw, ImageFont, ImageFilter
import cloudinary, cloudinary.uploader

# ── CONFIG ──────────────────────────────────────────────
def _fetch_cloud_name():
    import urllib.request as _ur, json as _j, ssl as _ssl
    ctx = _ssl.create_default_context()
    req = _ur.Request("https://media.toolxp.org/config", headers={"User-Agent": "Mozilla/5.0"})
    for _i in range(3):
        try:
            with _ur.urlopen(req, timeout=10, context=ctx) as r:
                name = _j.loads(r.read().decode())["cloud_name"]
                if name:
                    print(f"[config] cloud_name={name}")
                    return name
        except Exception as _e:
            print(f"[config] attempt {_i+1} failed: {_e}")
    raise RuntimeError("[config] FATAL: could not fetch cloud_name after 3 attempts")
CLOUD_NAME = _fetch_cloud_name()
UPLOAD_PRESET = os.environ.get("CLOUDINARY_UPLOAD_PRESET", "testing")

MEDIA_PROXY = "https://media.toolxp.org"

def proxy_url(url: str) -> str:
    return url.replace(f"https://res.cloudinary.com/{CLOUD_NAME}", MEDIA_PROXY)
WIDTH, HEIGHT, FPS = 1280, 200, 12
TRANSPARENT = (0, 0, 0, 0)
JOBS: Dict[str, dict] = {}

_BLANK = None
def blank_bytes():
    global _BLANK
    if _BLANK is None:
        _BLANK = Image.new('RGBA', (WIDTH, HEIGHT), TRANSPARENT).tobytes()
    return _BLANK

# ── APP ────────────────────────────────────────────────
app = FastAPI(title="Caption Greenscreen V5")
app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True,
                   allow_methods=["*"], allow_headers=["*"])

# ── MODELS ─────────────────────────────────────────────
class TranscriptWord(BaseModel):
    text: str; start: float; end: float

class CaptionColors(BaseModel):
    active_fill: str = "#FFD700"
    active_stroke: str = "#000000"
    active_stroke_width: int = 7
    inactive_fill: str = "#FFFFFF"
    inactive_stroke: str = "#000000"
    inactive_stroke_width: int = 5
    active_bg: Optional[str] = None
    inactive_bg: Optional[str] = None

class CaptionRequest(BaseModel):
    transcript: List[TranscriptWord]
    style: Optional[str] = "hormozi"
    duration: Optional[float] = None
    colors: Optional[CaptionColors] = None
    animation: Optional[str] = "pop"   # none, pop, bounce, slam, slam_shake, underline, minimalist_sweep, typewriter, slide_in, karaoke_wipe

# ── HELPERS ────────────────────────────────────────────
def hex_rgb(h: str) -> Tuple[int,int,int]:
    h = h.lstrip('#')
    return tuple(int(h[i:i+2], 16) for i in (0,2,4))

# ── FONTS ──────────────────────────────────────────────
_FC = {}   # Latin font cache
_FC_DEVA = {}  # Devanagari font cache

def _is_devanagari(text: str) -> bool:
    """Return True if text contains any Devanagari Unicode character (U+0900–U+097F)."""
    return any('\u0900' <= ch <= '\u097f' for ch in text)

def get_font(size=72):
    """Return a Latin/universal bold font at the given size."""
    if size in _FC: return _FC[size]
    for p in ["/app/fonts/Inter-Black.ttf", "/app/fonts/Inter-Bold.ttf",
              "/app/fonts/DejaVuSans-Bold.ttf",
              "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
              "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
              "/usr/share/fonts/truetype/freefont/FreeSansBold.ttf"]:
        if os.path.exists(p):
            try:
                f = ImageFont.truetype(p, size); _FC[size] = f; return f
            except: continue
    try:
        r = subprocess.run(["fc-match","--format=%{file}","sans:bold"], capture_output=True, text=True)
        if r.returncode == 0 and r.stdout.strip():
            f = ImageFont.truetype(r.stdout.strip(), size); _FC[size] = f; return f
    except: pass
    f = ImageFont.load_default(); _FC[size] = f; return f

def get_devanagari_font(size=72):
    """Return a Devanagari-capable font (Noto Sans Devanagari) at the given size."""
    if size in _FC_DEVA: return _FC_DEVA[size]
    for p in ["/app/fonts/NotoSansDevanagari-Bold.ttf",
              "/app/fonts/NotoSansDevanagari-Regular.ttf",
              "/app/fonts/NotoSans-Bold.ttf",
              "/usr/share/fonts/truetype/noto/NotoSansDevanagari-Bold.ttf",
              "/usr/share/fonts/truetype/noto/NotoSansDevanagari-Regular.ttf",
              "/usr/share/fonts/noto/NotoSansDevanagari-Bold.ttf",
              "/usr/share/fonts/noto/NotoSansDevanagari-Regular.ttf"]:
        if os.path.exists(p):
            try:
                f = ImageFont.truetype(p, size); _FC_DEVA[size] = f; return f
            except: continue
    # Fallback: try fc-match for devanagari script
    try:
        r = subprocess.run(["fc-match","--format=%{file}",":script=deva:bold"], capture_output=True, text=True)
        if r.returncode == 0 and r.stdout.strip():
            f = ImageFont.truetype(r.stdout.strip(), size); _FC_DEVA[size] = f; return f
    except: pass
    # Last resort: fall back to the Latin font (still better than load_default)
    return get_font(size)

def get_font_for_text(text: str, size=72):
    """Return the correct font for the given text (Devanagari or Latin)."""
    if _is_devanagari(text):
        return get_devanagari_font(size)
    return get_font(size)

FONT_N = 72   # normal
FONT_A = 86   # active (pop)
PAD = 45      # word spacing
BOX_PX, BOX_PY, BOX_R = 16, 8, 12  # box padding & radius

# Anim timing
WORD_ANIM_DUR = 0.4    # 400ms — matches CSS transition: left/width 400ms cubic-bezier(1,0,0.4,1)
LINE_ANIM_DUR = 0.25   # seconds for slide-in

# ── DEFAULT STYLE COLORS ──────────────────────────────
DEFAULT_COLORS = {
    "hormozi": CaptionColors(active_fill="#FFD700",active_stroke="#000000",active_stroke_width=7,inactive_fill="#FFFFFF",inactive_stroke="#000000",inactive_stroke_width=5),
    "netflix": CaptionColors(active_fill="#E50914",active_stroke="#000000",active_stroke_width=7,inactive_fill="#FFFFFF",inactive_stroke="#000000",inactive_stroke_width=5),
    "karaoke": CaptionColors(active_fill="#00FF00",active_stroke="#000000",active_stroke_width=0,inactive_fill="#FFFFFF",inactive_stroke="#000000",inactive_stroke_width=0,active_bg="#1A1A1A",inactive_bg="#333333"),
    "mrbeast": CaptionColors(active_fill="#000000",active_stroke="#000000",active_stroke_width=0,inactive_fill="#FFFFFF",inactive_stroke="#000000",inactive_stroke_width=5,active_bg="#FFE100"),
    "minimal": CaptionColors(active_fill="#FFFFFF",active_stroke="#000000",active_stroke_width=0,inactive_fill="#999999",inactive_stroke="#000000",inactive_stroke_width=0),
    "gradient_pop": CaptionColors(active_fill="#FF00FF",active_stroke="#000000",active_stroke_width=7,inactive_fill="#FFFFFF",inactive_stroke="#000000",inactive_stroke_width=5),
    "boxing": CaptionColors(active_fill="#FFFFFF",active_stroke="#000000",active_stroke_width=0,inactive_fill="#FFFFFF",inactive_stroke="#000000",inactive_stroke_width=0,active_bg="#7C3AED",inactive_bg="#333333"),
    # Sliding toggle dark: dark pill slides behind active word
    "sliding_toggle": CaptionColors(active_fill="#FFFFFF",active_stroke="#000000",active_stroke_width=0,inactive_fill="#CCCCCC",inactive_stroke="#000000",inactive_stroke_width=0,active_bg="#3A3A3C",inactive_bg=None),
    # Sliding toggle light: frosted white container, white pill, dark text
    "sliding_toggle_light": CaptionColors(active_fill="#141428",active_stroke="#000000",active_stroke_width=0,inactive_fill="#505070",inactive_stroke="#000000",inactive_stroke_width=0,active_bg="#FFFFFF",inactive_bg=None),
}


# ── CORE RENDERER ─────────────────────────────────────
def render_frame(words_in_line, active_word_idx, style, colors: CaptionColors,
                 animation="pop",
                 word_anim_t=1.0,      # 0→1 progress of bounce/slam (1=settled)
                 line_anim_t=1.0,      # 0→1 progress of slide_in (1=settled)
                 word_time_pct=0.0):   # 0→1 how far through the active word's time
    """
    Render one caption frame with style, colors, and animation state.
    """
    img = Image.new('RGBA', (WIDTH, HEIGHT), TRANSPARENT)
    if not words_in_line:
        return img.tobytes()

    draw = ImageDraw.Draw(img)

    # ── SLIDING TOGGLE: special self-contained renderer ──
    if style == 'sliding_toggle':
        return _render_sliding_toggle(img, draw, words_in_line, active_word_idx, colors, slide_t=word_anim_t)
    if style == 'sliding_toggle_light':
        return _render_sliding_toggle_light(img, draw, words_in_line, active_word_idx, colors, slide_t=word_anim_t)

    # Style behavior flags
    use_pop = style in ('hormozi', 'netflix', 'gradient_pop', 'mrbeast')
    draw_all_boxes = style in ('karaoke', 'boxing')
    draw_active_box = style == 'mrbeast'
    # Minimal sweep should not have stroke to keep it clean
    use_stroke = style not in ('minimal',) and animation != 'minimalist_sweep'

    # Colors → RGB
    a_fill = hex_rgb(colors.active_fill)
    i_fill = hex_rgb(colors.inactive_fill)
    a_stroke = hex_rgb(colors.active_stroke)
    i_stroke = hex_rgb(colors.inactive_stroke)
    a_bg = hex_rgb(colors.active_bg) if colors.active_bg else None
    i_bg = hex_rgb(colors.inactive_bg) if colors.inactive_bg else None

    # ── SLAM & SLAM_SHAKE animation: active word font size varies with progress
    if animation in ('slam', 'slam_shake') and active_word_idx >= 0 and word_anim_t < 1.0:
        # Start at 1.5x, ease down to 1.0x
        ease = 1.0 - (1.0 - word_anim_t) ** 2  # ease-out
        slam_scale = 1.5 - 0.5 * ease
        slam_font_size = int(FONT_A * slam_scale)
        font_slam = get_font(slam_font_size)
    else:
        font_slam = None

    # ── Measure all words ──
    # Note: font selection is per-word to support mixed scripts (e.g., Hindi + Latin)

    word_data = []
    for idx, w in enumerate(words_in_line):
        is_active = (idx == active_word_idx)
        raw_text = w['text']
        # Only uppercase Latin; Devanagari has no case concept
        text = raw_text.upper() if not _is_devanagari(raw_text) else raw_text

        # Typewriter: show partial text for active word
        if animation == 'typewriter' and is_active and word_time_pct < 1.0:
            chars = max(1, int(math.ceil(len(text) * word_time_pct)))
            text = text[:chars]

        # Choose font size, then get the right font for this word's script
        if is_active and font_slam:
            font = font_slam
        elif is_active and use_pop:
            font = get_font_for_text(text, FONT_A)
        else:
            font = get_font_for_text(text, FONT_N)

        bbox = draw.textbbox((0, 0), text, font=font)
        word_data.append({
            'text': text, 'font': font,
            'width': bbox[2] - bbox[0],
            'height': bbox[3] - bbox[1],
            'is_active': is_active,
            'full_width': None,  # for underline (full word width)
        })

        # For underline and minimalist_sweep, also measure full word width
        if animation in ('underline', 'minimalist_sweep') and is_active:
            full_raw = w['text']
            full_text = full_raw.upper() if not _is_devanagari(full_raw) else full_raw
            fbbox = draw.textbbox((0, 0), full_text, font=font)
            word_data[-1]['full_width'] = fbbox[2] - fbbox[0]

    has_boxes = draw_all_boxes or draw_active_box
    total_w = sum(d['width'] for d in word_data) + PAD * (len(word_data) - 1)
    if has_boxes:
        total_w += BOX_PX * 2 * len(word_data)

    base_x = (WIDTH - total_w) // 2

    # ── SLIDE_IN animation: offset entire line horizontally
    if animation == 'slide_in' and line_anim_t < 1.0:
        ease = 1.0 - (1.0 - line_anim_t) ** 3  # ease-out cubic
        x_offset = int(-400 * (1.0 - ease))
    else:
        x_offset = 0

    cur_x = base_x + x_offset

    # ── Draw each word ──
    for wd in word_data:
        text = wd['text']
        font = wd['font']
        is_active = wd['is_active']
        text_y = (HEIGHT - wd['height']) // 2

        # ── BOUNCE animation: active word shifts up
        if animation == 'bounce' and is_active and word_anim_t < 1.0:
            bounce_y = int(-22 * math.sin(word_anim_t * math.pi))
            text_y += bounce_y

        # ── SLAM_SHAKE animation: active word shakes after slamming
        if animation == 'slam_shake' and is_active and word_anim_t < 1.0:
            # Shake intensely between 0.3 and 0.8 of the animation curve
            if 0.2 < word_anim_t < 0.9:
                shake_intensity = (1.0 - word_anim_t) * 15 # Damps out over time
                shake_x = int(math.sin(word_anim_t * 50) * shake_intensity)
                shake_y = int(math.cos(word_anim_t * 55) * shake_intensity)
                cur_x += shake_x
                text_y += shake_y

        # Draw bg box
        if draw_all_boxes:
            bg_c = a_bg if is_active else i_bg
            if bg_c:
                bx1, by1 = cur_x - BOX_PX, text_y - BOX_PY
                bx2, by2 = cur_x + wd['width'] + BOX_PX, text_y + wd['height'] + BOX_PY

                # ── BOX GROW animation for box styles: active bg grows from center
                if animation in ('slam', 'slam_shake') and is_active and word_anim_t < 1.0:
                    ease = 1.0 - (1.0 - word_anim_t) ** 2
                    cx = (bx1 + bx2) // 2
                    cy = (by1 + by2) // 2
                    hw = int((bx2 - bx1) * 0.5 * ease)
                    hh = int((by2 - by1) * 0.5 * ease)
                    bx1, by1, bx2, by2 = cx - hw, cy - hh, cx + hw, cy + hh

                draw.rounded_rectangle([bx1, by1, bx2, by2], radius=BOX_R, fill=bg_c)

        elif draw_active_box and is_active and a_bg:
            bx1, by1 = cur_x - BOX_PX, text_y - BOX_PY
            bx2, by2 = cur_x + wd['width'] + BOX_PX, text_y + wd['height'] + BOX_PY
            draw.rounded_rectangle([bx1, by1, bx2, by2], radius=BOX_R, fill=a_bg)

        # Draw text
        fill_c = a_fill if is_active else i_fill
        
        # ── MINIMALIST_SWEEP animation: dim inactive words
        if animation == 'minimalist_sweep' and not is_active:
            # Dim the inactive words to 40% opacity
            r, g, b = i_fill
            fill_c = (r, g, b, 102) # 40% of 255 ≈ 102
            # Set stroke to fully transparent if any
            i_stroke_color = (0, 0, 0, 0)
        else:
            i_stroke_color = i_stroke
            
        if use_stroke:
            sc = a_stroke if is_active else i_stroke_color
            sw = colors.active_stroke_width if is_active else colors.inactive_stroke_width
            
            if animation == 'karaoke_wipe' and is_active:
                # KARAOKE_WIPE: Draw the inactive color base first
                draw.text((cur_x, text_y), text, font=font, fill=i_fill, stroke_width=sw, stroke_fill=sc)
                
                # Create a temporary image for the sweeping active color
                active_img = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
                active_draw = ImageDraw.Draw(active_img)
                active_draw.text((cur_x, text_y), text, font=font, fill=a_fill, stroke_width=sw, stroke_fill=sc)
                
                # Calculate sweep mask
                full_w = wd.get('full_width') or wd['width']
                sweep_w = int(full_w * word_time_pct)
                
                if sweep_w > 0:
                    # Create a mask for the sweep progress
                    mask = Image.new('L', (WIDTH, HEIGHT), 0)
                    mask_draw = ImageDraw.Draw(mask)
                    # Mask exactly over the active part of the current word
                    mask_draw.rectangle([cur_x - sw, text_y - sw, cur_x + sweep_w, text_y + wd['height'] + sw], fill=255)
                    
                    # Composite the swept portion over the main image
                    img = Image.composite(active_img, img, mask)
                    draw = ImageDraw.Draw(img)
            else:
                draw.text((cur_x, text_y), text, font=font, fill=fill_c, stroke_width=sw, stroke_fill=sc)
        else:
            if animation == 'karaoke_wipe' and is_active:
                # KARAOKE_WIPE without stroke
                draw.text((cur_x, text_y), text, font=font, fill=i_fill)
                
                # Active sweep layer
                active_img = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
                active_draw = ImageDraw.Draw(active_img)
                active_draw.text((cur_x, text_y), text, font=font, fill=a_fill)
                
                full_w = wd.get('full_width') or wd['width']
                sweep_w = int(full_w * word_time_pct)
                
                if sweep_w > 0:
                    mask = Image.new('L', (WIDTH, HEIGHT), 0)
                    mask_draw = ImageDraw.Draw(mask)
                    mask_draw.rectangle([cur_x, text_y - 10, cur_x + sweep_w, text_y + wd['height'] + 10], fill=255)
                    img = Image.composite(active_img, img, mask)
                    draw = ImageDraw.Draw(img)
            else:
                draw.text((cur_x, text_y), text, font=font, fill=fill_c)

        # ── UNDERLINE & MINIMALIST_SWEEP: draw a line under active word
        if animation in ('underline', 'minimalist_sweep') and is_active:
            full_w = wd.get('full_width') or wd['width']
            line_w = int(full_w * word_time_pct)
            if line_w > 0:
                ul_y = text_y + wd['height'] + 4
                
                if animation == 'minimalist_sweep':
                    # Glowing underline logic: draw multiple semi-transparent rectangles
                    draw.rectangle([cur_x, ul_y, cur_x + line_w, ul_y + 4], fill=a_fill)
                    # Soft glow passes underneath
                    r, g, b = a_fill
                    draw.rectangle([cur_x, ul_y - 2, cur_x + line_w, ul_y + 6], fill=(r, g, b, 100))
                    draw.rectangle([cur_x, ul_y - 4, cur_x + line_w, ul_y + 8], fill=(r, g, b, 40))
                else:
                    # Standard underline
                    draw.rectangle([cur_x, ul_y, cur_x + line_w, ul_y + 5], fill=a_fill)

        cur_x += wd['width'] + PAD
        if has_boxes:
            cur_x += BOX_PX * 2

    return img.tobytes()


# ── SLIDING TOGGLE — PREMIUM DARK GLASS ───────────────────────────────────────
# 11-layer composited render — maximum quality within Pillow:
#   L1+L2. Dual drop shadow (ambient wide + tight contact)
#   L3.    Container flat dark fill
#   L4.    Container top-gradient overlay (masked)
#   L5.    Container border + edge highlights
#   L6.    Pill tight shadow
#   L7.    Pill gradient fill top→bottom (lighter slate → deep navy, masked)
#   L8.    Pill specular gloss ellipse (blurred)
#   L9.    Pill border + edge highlights
#   L10.   Active word text glow (blurred)
#   L11.   All words text sharp

ST_FONT_SIZE  = 72
ST_WORD_GAP   = 16    # CSS gap:8px × 2.25 scale ≈ 18 → rounded to 16
ST_WORD_PAD_X = 28
ST_CONT_PAD_X = 44
ST_CONT_PAD_Y = 32
ST_CONT_R     = 200
ST_INNER_R    = 100
ST_TRACKING   = -1    # CSS letter-spacing:-0.01em at 72px ≈ -0.7px → -1px (tight, not spread)

_ST_ACTIVE_TXT   = (252, 253, 255, 255)   # near-white, very slight cool shimmer
_ST_INACT_TXT    = (190, 190, 192, 255)   # neutral mid-grey — no color tint (matches CSS #bbbbbc)
_ST_TOP_HILIGHT  = (255, 255, 255, 110)
_ST_SIDE_HILIGHT = (255, 255, 255, 45)
_ST_BOT_SHADOW   = (0,   0,   0,   60)


def _gradient_layer(x1, y1, x2, y2, radius, rgba_top, rgba_bottom, bands=80):
    """RGBA layer with a vertical gradient clipped to a rounded rectangle."""
    mask = Image.new('L', (WIDTH, HEIGHT), 0)
    ImageDraw.Draw(mask).rounded_rectangle([x1, y1, x2, y2], radius=radius, fill=255)
    layer = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
    ld = ImageDraw.Draw(layer)
    H = max(y2 - y1, 1)
    for i in range(bands):
        ty  = y1 + int(i       / bands * H)
        ty2 = y1 + int((i + 1) / bands * H) + 1
        t   = i / max(bands - 1, 1)
        col = tuple(int(rgba_top[c] + (rgba_bottom[c] - rgba_top[c]) * t) for c in range(4))
        ld.rectangle([x1, ty, x2, ty2], fill=col)
    blank = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
    return Image.composite(layer, blank, mask)


def _draw_glass_highlights(draw, x1, y1, x2, y2, r, *, scale=1):
    """No-op: flat lines look artificial in Pillow. Border handled by rounded_rectangle outline."""
    pass


def _pill_bounds(positions, idx, cont_y1, pi):
    """
    Return (px1, px2) for the pill at word index `idx` in `positions`.
    idx=-1 means "no previous word" — return the first word's position.
    """
    safe = max(0, min(idx, len(positions) - 1))
    px, _, wd = positions[safe]
    return px - ST_WORD_PAD_X, px + wd['w'] + ST_WORD_PAD_X


def _ease_out_cubic(t):
    """easeOutBack — iOS spring constant (c=1.70158): slight overshoot then settle."""
    t = max(0.0, min(1.0, t))
    c = 1.70158
    return 1.0 + (c + 1) * (t - 1) ** 3 + c * (t - 1) ** 2


def _measure_tracked(draw, text, font, tracking=ST_TRACKING):
    """Total pixel width of text with per-character tracking gap."""
    w = 0
    for i, ch in enumerate(text):
        b = draw.textbbox((0, 0), ch, font=font)
        w += b[2] - b[0]
        if i < len(text) - 1:
            w += tracking
    return w


def _draw_tracked(draw_obj, x, y, text, font, fill, ink_top=0, tracking=ST_TRACKING):
    """Draw text char-by-char with tracking; y is corrected for ink_top offset."""
    cx = x
    for ch in text:
        draw_obj.text((cx, y - ink_top), ch, font=font, fill=fill)
        b = draw_obj.textbbox((0, 0), ch, font=font)
        cx += (b[2] - b[0]) + tracking


def _render_sliding_toggle(img, draw, words_in_line, active_word_idx, colors: CaptionColors, slide_t=1.0):
    """Premium 11-layer glass-pill caption renderer."""
    # measure
    word_data, max_h = [], 0
    for idx, w in enumerate(words_in_line):
        raw  = w['text']
        text = raw.upper() if not _is_devanagari(raw) else raw
        font = get_font_for_text(text, ST_FONT_SIZE)
        bbox = draw.textbbox((0, 0), text, font=font)
        tw   = _measure_tracked(draw, text, font)   # tracked width (priority #3)
        th   = bbox[3] - bbox[1]
        max_h = max(max_h, th)
        word_data.append({'text': text, 'font': font, 'w': tw, 'h': th,
                          'ink_top': bbox[1],   # Pillow top-offset; subtract at draw for true centering
                          'is_active': (idx == active_word_idx)})
    if not word_data:
        return img.tobytes()

    # geometry
    inner_w = sum(d['w'] + ST_WORD_PAD_X * 2 for d in word_data) + ST_WORD_GAP * (len(word_data) - 1)
    cont_w  = inner_w + ST_CONT_PAD_X * 2
    cont_h  = max_h   + ST_CONT_PAD_Y * 2
    cx1 = (WIDTH  - cont_w) // 2
    cy1 = (HEIGHT - cont_h) // 2
    cx2 = cx1 + cont_w
    cy2 = cy1 + cont_h
    pi  = 12    # pill inset from container edges

    # L1: wide ambient shadow — neutral dark
    amb = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
    ImageDraw.Draw(amb).rounded_rectangle(
        [cx1 - 12, cy1 + 18, cx2 + 12, cy2 + 18], radius=ST_CONT_R, fill=(0, 0, 0, 55))
    img = Image.alpha_composite(img, amb.filter(ImageFilter.GaussianBlur(radius=30)))

    # L2: tight contact shadow — neutral dark
    ctc = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
    ImageDraw.Draw(ctc).rounded_rectangle(
        [cx1 - 2, cy1 + 6, cx2 + 2, cy2 + 6], radius=ST_CONT_R, fill=(0, 0, 0, 120))
    img = Image.alpha_composite(img, ctc.filter(ImageFilter.GaussianBlur(radius=8)))

    # L3: container flat dark fill
    c_flat = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
    ImageDraw.Draw(c_flat).rounded_rectangle(
        [cx1, cy1, cx2, cy2], radius=ST_CONT_R, fill=(18, 18, 20, 30))  # neutral dark grey — no blue tint
    img = Image.alpha_composite(img, c_flat)

    # L4: container top-gradient overlay (subtle top lighting)
    img = Image.alpha_composite(img, _gradient_layer(
        cx1, cy1, cx2, cy2, ST_CONT_R,
        rgba_top=(255, 255, 255, 18),
        rgba_bottom=(0, 0, 0, 0), bands=50))

    # L5: container border + edge highlights
    c_edge = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
    ced = ImageDraw.Draw(c_edge)
    ced.rounded_rectangle([cx1, cy1, cx2, cy2], radius=ST_CONT_R,
                           outline=(255, 255, 255, 26), width=1)   # CSS: inset 0 0 0 1px rgba(white,10%)
    _draw_glass_highlights(ced, cx1, cy1, cx2, cy2, ST_CONT_R, scale=1.0)
    img = Image.alpha_composite(img, c_edge)
    draw = ImageDraw.Draw(img)

    # word positions
    cur_x  = cx1 + ST_CONT_PAD_X
    text_y = cy1 + ST_CONT_PAD_Y
    positions = []
    for wd in word_data:
        positions.append((cur_x, text_y, wd))
        cur_x += wd['w'] + ST_WORD_PAD_X * 2 + ST_WORD_GAP

    # per-pill layers — pill position is lerped for smooth sliding
    py1_p, py2_p = cy1 + pi, cy2 - pi

    # Compute pill bounds with smooth spring interpolation (priority #1)
    if active_word_idx >= 0 and len(positions) > 0:
        curr_px1, curr_px2 = _pill_bounds(positions, active_word_idx, cy1, pi)
        prev_px1, prev_px2 = _pill_bounds(positions, active_word_idx - 1, cy1, pi)
        e = _ease_out_cubic(min(1.0, slide_t))   # spring easeOutBack
        pill_x1 = int(prev_px1 + (curr_px1 - prev_px1) * e)
        pill_x2 = int(prev_px2 + (curr_px2 - prev_px2) * e)
    else:
        pill_x1, pill_x2 = cy1, cy1   # off-screen fallback

    # Squish/stretch — horizontal stretch + vertical compress simultaneously
    # Mirrors CSS scaleToggle2: scale(1.08, 1) at 50% → pill elongates horizontally
    # and flattens very slightly vertically (like a water drop in motion)
    if active_word_idx > 0 and slide_t < 1.0:
        s = math.sin(math.pi * min(1.0, slide_t))   # 0→1→0 arc
        squish_x = 1.0 + 0.12 * s
        squish_y = 1.0 - 0.04 * s   # subtle vertical compress
        p_cx = (pill_x1 + pill_x2) / 2
        p_hw = (pill_x2 - pill_x1) / 2
        pill_x1 = int(p_cx - p_hw * squish_x)
        pill_x2 = int(p_cx + p_hw * squish_x)
        # Vertical: grow py1 down, shrink py2 up by squish_y factor
        p_cy  = (py1_p + py2_p) / 2
        p_hh  = (py2_p - py1_p) / 2
        py1_p = int(p_cy - p_hh * squish_y)
        py2_p = int(p_cy + p_hh * squish_y)

    for (px, py, wd) in positions:
        if not wd['is_active']:
            continue
        px1, px2 = pill_x1, pill_x2
        py1, py2 = py1_p, py2_p

        # L6: pill tight shadow — neutral dark
        pshadow = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
        ImageDraw.Draw(pshadow).rounded_rectangle(
            [px1, py1 + 4, px2, py2 + 4], radius=ST_INNER_R, fill=(0, 0, 0, 130))
        img = Image.alpha_composite(img, pshadow.filter(ImageFilter.GaussianBlur(radius=6)))

        # L7: pill gradient fill — translucent gold
        img = Image.alpha_composite(img, _gradient_layer(
            px1, py1, px2, py2, ST_INNER_R,
            rgba_top=(215, 175, 55, 190),    # warm amber-gold top
            rgba_bottom=(160, 118, 18, 170), # deep burnished gold bottom
            bands=80))

        # L8: specular gloss — top-LEFT (Apple light direction: priority #4)
        spec_cx = px1 + int((px2 - px1) * 0.30)   # 30% from left = upper-left highlight
        spec_cy = py1 + int((py2 - py1) * 0.20)
        spec_rx = (px2 - px1) // 4
        spec_ry = max(10, (py2 - py1) // 5)
        spec = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
        ImageDraw.Draw(spec).ellipse(
            [spec_cx - spec_rx, spec_cy - spec_ry,
             spec_cx + spec_rx, spec_cy + spec_ry],
            fill=(255, 255, 255, 70))
        img = Image.alpha_composite(img, spec.filter(ImageFilter.GaussianBlur(radius=max(5, spec_ry // 2))))

        # L9: pill border + edge highlights
        p_edge = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
        ped = ImageDraw.Draw(p_edge)
        ped.rounded_rectangle([px1, py1, px2, py2], radius=ST_INNER_R,
                               outline=(255, 255, 255, 26), width=1)   # CSS: inset 0 0 0 1px rgba(white,10%)
        _draw_glass_highlights(ped, px1, py1, px2, py2, ST_INNER_R, scale=0.6)
        img = Image.alpha_composite(img, p_edge)
        draw = ImageDraw.Draw(img)

    # L10: active word text glow (priority #5: fade with slide)
    t_active_fade = max(0.0, (min(1.0, slide_t) - 0.5) / 0.5) if slide_t < 1.0 else 1.0
    glow = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
    gd   = ImageDraw.Draw(glow)
    for (px, py, wd) in positions:
        if wd['is_active']:
            glow_a = int(120 * t_active_fade)
            _draw_tracked(gd, px, py, wd['text'], wd['font'],
                          (255, 255, 255, glow_a), ink_top=wd['ink_top'])
    img = Image.alpha_composite(img, glow.filter(ImageFilter.GaussianBlur(radius=6)))
    draw = ImageDraw.Draw(img)

    # L11: all words sharp text (priority #3 tracked + #5 crossfade)
    for i, (px, py, wd) in enumerate(positions):
        if wd['is_active']:
            # Active: fade from inactive color (slide_t=0.5) to full white (slide_t=1.0)
            r_i, g_i, b_i, _ = _ST_INACT_TXT
            r_a, g_a, b_a, _ = _ST_ACTIVE_TXT
            ta = max(0.0, (min(1.0, slide_t) - 0.5) / 0.5) if slide_t < 1.0 else 1.0
            fill = (int(r_i + (r_a - r_i) * ta), int(g_i + (g_a - g_i) * ta),
                    int(b_i + (b_a - b_i) * ta), 255)
        elif i == active_word_idx - 1 and slide_t < 1.0:
            # Previous word: fade from active white to inactive as pill leaves
            r_a, g_a, b_a, _ = _ST_ACTIVE_TXT
            r_i, g_i, b_i, _ = _ST_INACT_TXT
            td = max(0.0, min(1.0, slide_t) / 0.4)   # 0→1 over first 40% of slide
            fill = (int(r_a + (r_i - r_a) * td), int(g_a + (g_i - g_a) * td),
                    int(b_a + (b_i - b_a) * td), 255)
        else:
            fill = _ST_INACT_TXT
        _draw_tracked(draw, px, py, wd['text'], wd['font'], fill, ink_top=wd['ink_top'])

    return img.tobytes()


# ── SLIDING TOGGLE LIGHT ───────────────────────────────────────────────────────
# Same 11-layer approach as dark, but frosted white container + white pill +
# dark navy text — premium macOS/iOS frosted glass look.

_STL_ACTIVE_TXT   = (15,  15,  40,  255)   # deep navy on white
_STL_INACT_TXT    = (80,  85, 115,  220)   # muted slate-blue on frosted container
_STL_TOP_HILIGHT  = (255, 255, 255, 200)   # strong top reflex (white surface)
_STL_SIDE_HILIGHT = (255, 255, 255, 100)
_STL_BOT_SHADOW   = (0,   0,   0,   25)    # very soft bottom shadow on light


def _draw_glass_highlights_l(draw, x1, y1, x2, y2, r, *, scale=1):
    """Light-mode edge highlights (same logic, different base colours)."""
    margin     = min(r // 2, 40)
    alpha_top  = min(255, int(_STL_TOP_HILIGHT[3]  * scale))
    alpha_side = min(255, int(_STL_SIDE_HILIGHT[3] * scale))
    alpha_bot  = min(255, int(_STL_BOT_SHADOW[3]   * scale))
    tx1, tx2 = x1 + margin, x2 - margin
    if tx2 > tx1:
        draw.line([(tx1, y1 + 2), (tx2, y1 + 2)],
                  fill=(*_STL_TOP_HILIGHT[:3], alpha_top), width=2)
    lx = x1 + 2
    ly1_h, ly2_h = y1 + margin, y2 - margin
    if ly2_h > ly1_h:
        draw.line([(lx, ly1_h), (lx, ly2_h)],
                  fill=(*_STL_SIDE_HILIGHT[:3], alpha_side), width=2)
    bx1, bx2 = x1 + margin, x2 - margin
    if bx2 > bx1:
        draw.line([(bx1, y2 - 3), (bx2, y2 - 3)],
                  fill=(*_STL_BOT_SHADOW[:3], alpha_bot), width=2)


def _render_sliding_toggle_light(img, draw, words_in_line, active_word_idx, colors: CaptionColors, slide_t=1.0):
    """Light-mode sliding toggle: frosted white container, white gradient pill, dark text."""
    # measure
    word_data, max_h = [], 0
    for idx, w in enumerate(words_in_line):
        raw  = w['text']
        text = raw.upper() if not _is_devanagari(raw) else raw
        font = get_font_for_text(text, ST_FONT_SIZE)
        bbox = draw.textbbox((0, 0), text, font=font)
        tw   = _measure_tracked(draw, text, font)   # tracked width (priority #3)
        th   = bbox[3] - bbox[1]
        max_h = max(max_h, th)
        word_data.append({'text': text, 'font': font, 'w': tw, 'h': th,
                          'ink_top': bbox[1],   # Pillow top-offset; subtract at draw for true centering
                          'is_active': (idx == active_word_idx)})
    if not word_data:
        return img.tobytes()

    # geometry
    inner_w = sum(d['w'] + ST_WORD_PAD_X * 2 for d in word_data) + ST_WORD_GAP * (len(word_data) - 1)
    cont_w  = inner_w + ST_CONT_PAD_X * 2
    cont_h  = max_h   + ST_CONT_PAD_Y * 2
    cx1 = (WIDTH  - cont_w) // 2
    cy1 = (HEIGHT - cont_h) // 2
    cx2 = cx1 + cont_w
    cy2 = cy1 + cont_h
    pi  = 12

    # L1: soft blue-grey ambient shadow (light-mode window shadow)
    amb = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
    ImageDraw.Draw(amb).rounded_rectangle(
        [cx1 - 10, cy1 + 16, cx2 + 10, cy2 + 16], radius=ST_CONT_R, fill=(80, 90, 140, 40))
    img = Image.alpha_composite(img, amb.filter(ImageFilter.GaussianBlur(radius=28)))

    # L2: tight contact shadow
    ctc = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
    ImageDraw.Draw(ctc).rounded_rectangle(
        [cx1 - 2, cy1 + 5, cx2 + 2, cy2 + 5], radius=ST_CONT_R, fill=(60, 70, 110, 80))
    img = Image.alpha_composite(img, ctc.filter(ImageFilter.GaussianBlur(radius=7)))

    # L3: container — frosted white fill
    c_flat = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
    ImageDraw.Draw(c_flat).rounded_rectangle(
        [cx1, cy1, cx2, cy2], radius=ST_CONT_R, fill=(245, 246, 252, 185))
    img = Image.alpha_composite(img, c_flat)

    # L4: container top-gradient overlay (brighter at top)
    img = Image.alpha_composite(img, _gradient_layer(
        cx1, cy1, cx2, cy2, ST_CONT_R,
        rgba_top=(255, 255, 255, 60),
        rgba_bottom=(200, 202, 220, 0), bands=50))

    # L5: container border (subtle dark outline on light surface) + highlights
    c_edge = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
    ced = ImageDraw.Draw(c_edge)
    ced.rounded_rectangle([cx1, cy1, cx2, cy2], radius=ST_CONT_R,
                           outline=(120, 125, 160, 45), width=1)
    _draw_glass_highlights_l(ced, cx1, cy1, cx2, cy2, ST_CONT_R, scale=1.0)
    img = Image.alpha_composite(img, c_edge)
    draw = ImageDraw.Draw(img)

    # word positions
    cur_x  = cx1 + ST_CONT_PAD_X
    text_y = cy1 + ST_CONT_PAD_Y
    positions = []
    for wd in word_data:
        positions.append((cur_x, text_y, wd))
        cur_x += wd['w'] + ST_WORD_PAD_X * 2 + ST_WORD_GAP

    # per-pill layers — pill position is lerped for smooth sliding
    py1_p, py2_p = cy1 + pi, cy2 - pi

    # Compute pill bounds with smooth spring interpolation (priority #1)
    if active_word_idx >= 0 and len(positions) > 0:
        curr_px1, curr_px2 = _pill_bounds(positions, active_word_idx, cy1, pi)
        prev_px1, prev_px2 = _pill_bounds(positions, active_word_idx - 1, cy1, pi)
        e = _ease_out_cubic(min(1.0, slide_t))   # spring easeOutBack
        pill_x1 = int(prev_px1 + (curr_px1 - prev_px1) * e)
        pill_x2 = int(prev_px2 + (curr_px2 - prev_px2) * e)
    else:
        pill_x1, pill_x2 = cy1, cy1

    # Priority #2: squish/stretch — pill elongates 7% at mid-flight
    if active_word_idx > 0 and slide_t < 1.0:
        squish = 1.0 + 0.07 * math.sin(math.pi * min(1.0, slide_t))
        p_cx = (pill_x1 + pill_x2) / 2
        p_hw = (pill_x2 - pill_x1) / 2
        pill_x1 = int(p_cx - p_hw * squish)
        pill_x2 = int(p_cx + p_hw * squish)

    for (px, py, wd) in positions:
        if not wd['is_active']:
            continue
        px1, px2 = pill_x1, pill_x2
        py1, py2 = py1_p, py2_p

        # L6: pill shadow (soft blue-grey)
        pshadow = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
        ImageDraw.Draw(pshadow).rounded_rectangle(
            [px1, py1 + 3, px2, py2 + 3], radius=ST_INNER_R, fill=(60, 70, 110, 70))
        img = Image.alpha_composite(img, pshadow.filter(ImageFilter.GaussianBlur(radius=5)))

        # L7: pill gradient — pure white top → soft blue-white bottom (neumorphic)
        img = Image.alpha_composite(img, _gradient_layer(
            px1, py1, px2, py2, ST_INNER_R,
            rgba_top=(255, 255, 255, 255),
            rgba_bottom=(218, 220, 240, 255), bands=80))

        # L8: specular gloss — top-LEFT, Apple light direction (priority #4)
        spec_cx = px1 + int((px2 - px1) * 0.30)   # 30% from left
        spec_cy = py1 + int((py2 - py1) * 0.20)
        spec_rx = (px2 - px1) // 4
        spec_ry = max(8, (py2 - py1) // 6)
        spec = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
        ImageDraw.Draw(spec).ellipse(
            [spec_cx - spec_rx, spec_cy - spec_ry,
             spec_cx + spec_rx, spec_cy + spec_ry],
            fill=(255, 255, 255, 120))
        img = Image.alpha_composite(img, spec.filter(ImageFilter.GaussianBlur(radius=max(4, spec_ry // 2))))

        # L9: pill border (subtle grey) + highlights
        p_edge = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
        ped = ImageDraw.Draw(p_edge)
        ped.rounded_rectangle([px1, py1, px2, py2], radius=ST_INNER_R,
                               outline=(160, 165, 200, 60), width=1)
        _draw_glass_highlights_l(ped, px1, py1, px2, py2, ST_INNER_R, scale=0.8)
        img = Image.alpha_composite(img, p_edge)
        draw = ImageDraw.Draw(img)

    # L10: active word dark text glow (priority #5: fade with slide)
    t_active_fade = max(0.0, (min(1.0, slide_t) - 0.5) / 0.5) if slide_t < 1.0 else 1.0
    glow = Image.new('RGBA', (WIDTH, HEIGHT), (0, 0, 0, 0))
    gd   = ImageDraw.Draw(glow)
    for (px, py, wd) in positions:
        if wd['is_active']:
            glow_a = int(60 * t_active_fade)
            _draw_tracked(gd, px, py, wd['text'], wd['font'],
                          (15, 15, 40, glow_a), ink_top=wd['ink_top'])
    img = Image.alpha_composite(img, glow.filter(ImageFilter.GaussianBlur(radius=4)))
    draw = ImageDraw.Draw(img)

    # L11: all words sharp text (priority #3 tracked + #5 crossfade)
    for i, (px, py, wd) in enumerate(positions):
        if wd['is_active']:
            r_i, g_i, b_i, _ = _STL_INACT_TXT
            r_a, g_a, b_a, _ = _STL_ACTIVE_TXT
            ta = max(0.0, (min(1.0, slide_t) - 0.5) / 0.5) if slide_t < 1.0 else 1.0
            fill = (int(r_i + (r_a - r_i) * ta), int(g_i + (g_a - g_i) * ta),
                    int(b_i + (b_a - b_i) * ta), 255)
        elif i == active_word_idx - 1 and slide_t < 1.0:
            r_a, g_a, b_a, _ = _STL_ACTIVE_TXT
            r_i, g_i, b_i, _ = _STL_INACT_TXT
            td = max(0.0, min(1.0, slide_t) / 0.4)
            fill = (int(r_a + (r_i - r_a) * td), int(g_a + (g_i - g_a) * td),
                    int(b_a + (b_i - b_a) * td), 255)
        else:
            fill = _STL_INACT_TXT
        _draw_tracked(draw, px, py, wd['text'], wd['font'], fill, ink_top=wd['ink_top'])

    return img.tobytes()


# ── BACKGROUND WORKER ─────────────────────────────────
def process_caption_job(job_id: str, req: CaptionRequest):
    style = req.style or "hormozi"
    colors = req.colors or DEFAULT_COLORS.get(style, DEFAULT_COLORS["hormozi"])
    animation = req.animation or "pop"

    print(f"[{job_id}] style={style} anim={animation} words={len(req.transcript)}")
    JOBS[job_id]["status"] = "processing"
    work_dir = tempfile.mkdtemp(prefix=f"cap_{job_id[:8]}_")

    try:
        JOBS[job_id]["progress"] = "Preparing..."
        transcript = [{"text": w.text, "start": w.start, "end": w.end} for w in req.transcript]
        total_dur = req.duration if (req.duration and req.duration > 0) else max(w['end'] for w in transcript) + 0.5
        total_frames = int(total_dur * FPS)

        # Group words (3 per line)
        lines = []
        for i in range(0, len(transcript), 3):
            g = transcript[i:i+3]
            if g:
                lines.append({'start': g[0]['start'], 'end': g[-1]['end'], 'words': g})

        # Decide rendering strategy
        use_cache = animation in ('none', 'pop')

        if use_cache:
            # ── PRE-RENDER static frames (fast path) ──
            JOBS[job_id]["progress"] = "Pre-rendering frames..."
            cache: Dict[tuple, bytes] = {(-1, -1): blank_bytes()}
            for li, line in enumerate(lines):
                cache[(li, -1)] = render_frame(line['words'], -1, style, colors, animation)
                for wi in range(len(line['words'])):
                    cache[(li, wi)] = render_frame(line['words'], wi, style, colors, animation)
            print(f"[{job_id}] Cached {len(cache)} states")

        # ── FFMPEG pipe ──
        JOBS[job_id]["progress"] = f"Encoding {total_frames} frames..."
        out = os.path.join(work_dir, "output.webm")
        log_path = os.path.join(work_dir, "ff.log")

        log_fh = open(log_path, "w")
        ffproc = subprocess.Popen(
            ["ffmpeg", "-y", "-f", "rawvideo", "-pix_fmt", "rgba",
             "-s", f"{WIDTH}x{HEIGHT}", "-r", str(FPS), "-i", "pipe:0",
             "-c:v", "libvpx-vp9", "-b:v", "2M", "-pix_fmt", "yuva420p",
             "-auto-alt-ref", "0", "-deadline", "realtime", "-cpu-used", "8", out],
            stdin=subprocess.PIPE, stdout=subprocess.DEVNULL, stderr=log_fh)

        t0 = time.time()
        last_li = 0
        prev_line_idx = -1  # track line changes for slide_in

        for fi in range(total_frames):
            ct = fi / FPS

            # Find active line & word
            ali, awi = -1, -1
            for i in range(last_li, len(lines)):
                if lines[i]['start'] <= ct <= lines[i]['end']:
                    ali = i; last_li = i
                    for wi, w in enumerate(lines[i]['words']):
                        if w['start'] <= ct < w['end']:
                            awi = wi; break
                    break
                if lines[i]['start'] > ct: break
            if lines and ct < lines[0]['start']:
                last_li = 0

            if use_cache:
                # Static — look up cached frame
                ffproc.stdin.write(cache.get((ali, awi), cache[(-1, -1)]))
            else:
                # Animated — render per frame
                word_anim_t = 1.0
                line_anim_t = 1.0
                word_time_pct = 0.0

                if awi >= 0 and ali >= 0:
                    ws = lines[ali]['words'][awi]['start']
                    we = lines[ali]['words'][awi]['end']
                    word_elapsed = ct - ws
                    word_anim_t = min(1.0, word_elapsed / WORD_ANIM_DUR)
                    word_time_pct = min(1.0, (ct - ws) / max(0.01, we - ws))

                # Detect line change for slide_in
                if ali >= 0 and ali != prev_line_idx:
                    line_start_time = lines[ali]['start']
                else:
                    line_start_time = None

                if ali >= 0 and line_start_time is not None:
                    line_elapsed = ct - lines[ali]['start']
                    line_anim_t = min(1.0, line_elapsed / LINE_ANIM_DUR)
                prev_line_idx = ali

                if ali >= 0:
                    frame_bytes = render_frame(
                        lines[ali]['words'], awi, style, colors, animation,
                        word_anim_t, line_anim_t, word_time_pct)
                else:
                    frame_bytes = blank_bytes()

                ffproc.stdin.write(frame_bytes)

            # Progress
            if fi > 0 and fi % max(1, total_frames // 4) == 0:
                JOBS[job_id]["progress"] = f"Encoding ({int(fi/total_frames*100)}%, {time.time()-t0:.1f}s)..."

        ffproc.stdin.close()
        ffproc.wait()
        log_fh.close()

        if ffproc.returncode != 0:
            with open(log_path) as f: raise Exception(f"FFmpeg: {f.read()[-500:]}")
        if not os.path.exists(out) or os.path.getsize(out) < 500:
            raise Exception("FFmpeg empty output")

        print(f"[{job_id}] Encoded in {time.time()-t0:.1f}s — {os.path.getsize(out)} bytes")

        # Upload
        JOBS[job_id]["progress"] = "Uploading..."
        res = cloudinary.uploader.unsigned_upload(out, UPLOAD_PRESET, cloud_name=CLOUD_NAME, resource_type="video")
        JOBS[job_id].update(status="completed", progress="Done",
            result={"public_id": res.get("public_id",""), "secure_url": proxy_url(res.get("secure_url",""))})
        print(f"[{job_id}] Done: {res.get('public_id')}")

    except Exception as e:
        import traceback
        print(f"[{job_id}] FAIL: {traceback.format_exc()}")
        JOBS[job_id].update(status="failed", error=str(e))
    finally:
        shutil.rmtree(work_dir, ignore_errors=True)


# ── ENDPOINTS ──────────────────────────────────────────

@app.post("/jobs")
def submit_job(req: CaptionRequest, bg: BackgroundTasks):
    jid = str(uuid.uuid4())
    JOBS[jid] = {"status":"queued","progress":"Waiting...","result":None,"created_at":time.time()}
    bg.add_task(process_caption_job, jid, req)
    return {"job_id": jid, "status": "queued"}

@app.get("/jobs/{job_id}")
def get_job(job_id: str):
    j = JOBS.get(job_id)
    if not j: raise HTTPException(404, "Job not found")
    return j

@app.get("/")
def home():
    return {"service": "Caption Greenscreen V6", "status": "running",
            "styles": list(DEFAULT_COLORS.keys()),
            "animations": ["none","pop","bounce","slam","underline","typewriter","slide_in"]}

@app.get("/styles")
def list_styles():
    return {n: c.dict() for n, c in DEFAULT_COLORS.items()}

@app.get("/debug/fonts")
def debug_fonts():
    f = get_font(FONT_N)
    r = subprocess.run("fc-list : family | sort | head -20", shell=True, capture_output=True, text=True)
    return {"font": str(f), "canvas": f"{WIDTH}x{HEIGHT}", "fps": FPS,
            "fonts": r.stdout.strip().split("\n")}