diff --git "a/backend/lens_core.py" "b/backend/lens_core.py"
new file mode 100644--- /dev/null
+++ "b/backend/lens_core.py"
@@ -0,0 +1,4039 @@
+import base64, copy, hashlib, json, math, os, re, struct, time, unicodedata, cv2, httpx, numpy as np, budoux
+
+from urllib.parse import parse_qs, urlencode, urlparse
+from PIL import Image, ImageChops, ImageDraw, ImageFilter, ImageFont
+
+IMAGE_PATH = "33.jpg"
+OUT_JSON = "output.json"
+LANG = "th"
+
+AI_API_KEY = os.getenv("AI_API_KEY", "").strip()
+
+FIREBASE_URL = "https://cookie-6e1cd-default-rtdb.asia-southeast1.firebasedatabase.app/lens/cookie.json"
+
+WRITE_OUT_JSON = True
+
+DECODE_IMAGEURL_TO_DATAURI = True
+
+DO_ORIGINAL = True
+DO_TRANSLATED = True
+DO_ORIGINAL_HTML = True
+DO_TRANSLATED_HTML = True
+DO_AI_HTML = True
+HTML_INCLUDE_CSS = True
+
+DRAW_OVERLAY_ORIGINAL = False
+DRAW_OVERLAY_TRANSLATED = False
+OVERLAY_ORIGINAL_PATH = "overlay_original.png"
+OVERLAY_TRANSLATED_PATH = "overlay_translated.png"
+
+TRANSLATED_OVERLAY_FONT_SCALE = 1.0
+TRANSLATED_OVERLAY_FIT_TO_BOX = True
+
+AI_OVERLAY_FONT_SCALE = 1.5
+AI_OVERLAY_FIT_TO_BOX = True
+
+DO_AI = True
+DO_AI_JSON = False
+DO_AI_OVERLAY = False
+AI_CACHE = False
+AI_CACHE_PATH = "ai_cache.json"
+AI_PATH_OVERLAY = "overlay_ai.png"
+AI_PROVIDER = "auto"
+AI_MODEL = "auto"
+AI_BASE_URL = "auto"
+AI_TEMPERATURE = 0.2
+
+AI_MAX_TOKENS = 1200
+AI_TIMEOUT_SEC = 120
+
+DRAW_BOX_OUTLINE = True
+AUTO_TEXT_COLOR = True
+TEXT_COLOR = (0, 0, 0, 255)
+TEXT_COLOR_DARK = (0, 0, 0, 255)
+TEXT_COLOR_LIGHT = (255, 255, 255, 255)
+BOX_OUTLINE = (0, 255, 0, 255)
+BOX_OUTLINE_WIDTH = 2
+
+DRAW_OUTLINE_PARA = False
+DRAW_OUTLINE_ITEM = False
+DRAW_OUTLINE_SPAN = False
+PARA_OUTLINE = (0, 0, 255, 255)
+ITEM_OUTLINE = (255, 0, 0, 255)
+SPAN_OUTLINE = BOX_OUTLINE
+PARA_OUTLINE_WIDTH = 3
+ITEM_OUTLINE_WIDTH = 2
+SPAN_OUTLINE_WIDTH = BOX_OUTLINE_WIDTH
+
+ERASE_OLD_TEXT_WITH_ORIGINAL_BOXES = True
+ERASE_PADDING_PX = 2
+ERASE_SAMPLE_MARGIN_PX = 6
+ERASE_MODE = "inpaint"
+ERASE_MOSAIC_BLOCK_PX = 10
+ERASE_CLONE_GAP_PX = 4
+ERASE_CLONE_BORDER_PX = 6
+ERASE_CLONE_FEATHER_PX = 3
+
+ERASE_BLEND_GAP_PX = 3
+ERASE_BLEND_FEATHER_PX = 4
+
+INPAINT_RADIUS = 3
+INPAINT_METHOD = "telea"
+INPAINT_DILATE_PX = 1
+
+BG_SAMPLE_BORDER_PX = 3
+
+BASELINE_SHIFT = True
+BASELINE_SHIFT_FACTOR = 0.40
+
+FONT_DOWNLOD = True
+FONT_THAI_PATH = "NotoSansThai-Regular.ttf"
+FONT_LATIN_PATH = "NotoSans-Regular.ttf"
+
+FONT_THAI_URLS = [
+    "https://github.com/google/fonts/raw/main/ofl/notosansthai/NotoSansThai-Regular.ttf",
+    "https://github.com/google/fonts/raw/main/ofl/notosansthaiui/NotoSansThaiUI-Regular.ttf",
+]
+FONT_LATIN_URLS = [
+    "https://github.com/google/fonts/raw/main/ofl/notosans/NotoSans-Regular.ttf",
+]
+FONT_JA_PATH = "NotoSansCJKjp-Regular.otf"
+FONT_JA_URLS = [
+    "https://raw.githubusercontent.com/googlefonts/noto-cjk/main/Sans/OTF/Japanese/NotoSansCJKjp-Regular.otf",
+    "https://github.com/googlefonts/noto-cjk/raw/main/Sans/OTF/Japanese/NotoSansCJKjp-Regular.otf",
+]
+FONT_ZH_SC_PATH = "NotoSansCJKsc-Regular.otf"
+FONT_ZH_SC_URLS = [
+    "https://raw.githubusercontent.com/googlefonts/noto-cjk/main/Sans/OTF/SimplifiedChinese/NotoSansCJKsc-Regular.otf",
+    "https://github.com/googlefonts/noto-cjk/raw/main/Sans/OTF/SimplifiedChinese/NotoSansCJKsc-Regular.otf",
+]
+FONT_ZH_TC_PATH = "NotoSansCJKtc-Regular.otf"
+FONT_ZH_TC_URLS = [
+    "https://raw.githubusercontent.com/googlefonts/noto-cjk/main/Sans/OTF/TraditionalChinese/NotoSansCJKtc-Regular.otf",
+    "https://github.com/googlefonts/noto-cjk/raw/main/Sans/OTF/TraditionalChinese/NotoSansCJKtc-Regular.otf",
+]
+
+UI_LANGUAGES = [
+    {"code": "en", "name": "English"},
+    {"code": "th", "name": "Thai"},
+    {"code": "ja", "name": "Japanese"},
+    {"code": "ko", "name": "Korean"},
+    {"code": "zh-CN", "name": "Chinese (Simplified)"},
+    {"code": "vi", "name": "Vietnamese"},
+    {"code": "es", "name": "Spanish"},
+    {"code": "de", "name": "German"},
+    {"code": "fr", "name": "French"},
+]
+
+AI_PROVIDER_DEFAULTS = {
+    "gemini": {
+        "model": "gemini-2.5-flash",
+        "base_url": "",
+    },
+    "openai": {
+        "model": "gpt-4o-mini",
+        "base_url": "https://api.openai.com/v1",
+    },
+    "openrouter": {
+        "model": "openai/o4-mini",
+        "base_url": "https://openrouter.ai/api/v1",
+    },
+    "huggingface": {
+        "model": "google/gemma-2-2b-it",
+        "base_url": "https://router.huggingface.co/v1",
+    },
+    "featherless": {
+        "model": "Qwen/Qwen2.5-7B-Instruct",
+        "base_url": "https://api.featherless.ai/v1",
+    },
+    "groq": {
+        "model": "openai/gpt-oss-20b",
+        "base_url": "https://api.groq.com/openai/v1",
+    },
+    "together": {
+        "model": "openai/gpt-oss-20b",
+        "base_url": "https://api.together.xyz/v1",
+    },
+    "deepseek": {
+        "model": "deepseek-chat",
+        "base_url": "https://api.deepseek.com/v1",
+    },
+    "anthropic": {
+        "model": "claude-sonnet-4-20250514",
+        "base_url": "https://api.anthropic.com",
+    },
+}
+
+AI_PROVIDER_ALIASES = {
+    "hf": "huggingface",
+    "huggingface_router": "huggingface",
+    "hf_router": "huggingface",
+    "openai_compat": "openai",
+    "openai-compatible": "openai",
+    "gemini3": "gemini",
+    "gemini-3": "gemini",
+    "google": "gemini",
+}
+
+AI_MODEL_ALIASES = {
+    "gemini": {
+        "flash-lite": "gemini-2.5-flash-lite",
+        "flash": "gemini-2.5-flash",
+        "pro": "gemini-2.5-pro",
+        "3-flash": "gemini-3-flash-preview",
+        "3-pro": "gemini-3-pro-preview",
+        "3-pro-image": "gemini-3-pro-image-preview",
+        "flash-image": "gemini-2.5-flash-image",
+    }
+}
+
+AI_PROMPT_SYSTEM_BASE = (
+    "You are a professional manga translator and dialogue localizer.\n"
+    "Rewrite each paragraph as natural dialogue in the target language while preserving meaning, tone, intent, and character voice.\n"
+    "Keep lines concise for speech bubbles. Do not add new information. Do not omit meaning. Do not explain.\n"
+    "Preserve emphasis (… ! ?). Avoid excessive punctuation.\n"
+    "If the input is already in the target language, improve it (dialogue polish) without changing meaning."
+)
+
+AI_LANG_STYLE = {
+    "th": (
+        "Target language: Thai\\n"
+        "Write Thai manga dialogue that reads like a high-quality Thai scanlation: natural, concise, and in-character.\\n"
+        "Keep lines short for speech bubbles; avoid stiff, literal phrasing.\\n"
+        "Default: omit pronouns and omit gendered polite sentence-final particles unless the source line clearly requires them.\\n"
+        "Never use a male-coded second-person pronoun. When addressing someone by name, do not add a second-person pronoun after the name; prefer NAME + clause.\\n"
+        "If a second-person reference is unavoidable, use a neutral/casual form appropriate to tone, but keep it gender-neutral and consistent with the line.\\n"
+        "Use particles/interjections sparingly to match tone; do not overuse.\\n"
+        "Keep names/terms consistent; transliterate when appropriate.\\n"
+        "Output only the translated text."
+    ),
+    "en": (
+        "Target language: English\n"
+        "Write natural English manga dialogue: concise, conversational, with contractions where natural.\n"
+        "Localize tone and character voice; keep emotion and emphasis.\n"
+        "Keep proper nouns consistent; do not over-explain."
+    ),
+    "ja": (
+        "Target language: Japanese\n"
+        "Write natural Japanese manga dialogue: concise, spoken.\n"
+        "Choose 丁寧語/タメ口 to match context; keep emotion and emphasis.\n"
+        "Keep proper nouns consistent; keep SFX natural in Japanese."
+    ),
+    "default": (
+        "Write natural manga dialogue in the target language: concise, spoken, faithful to meaning and tone."
+    ),
+}
+
+AI_PROMPT_USER_BY_LANG = {
+    "th": """Thai manga translation guidelines (OCR input)
+
+Goal: Produce Thai text that reads like a skilled Thai manga translator: natural, concise, and faithful to tone/intent, without guessing wildly.
+
+A) Identify the type of text and translate accordingly
+- Narration / inner monologue: smooth Thai narration, natural flow.
+- Spoken dialogue: real spoken Thai, short and punchy for speech bubbles.
+- Labels / status / announcements / UI text: short, clear, list-like formatting when appropriate.
+
+B) Character voice & register
+- Match intensity (calm / angry / teasing / rude) but do not add extra rudeness that is not present.
+- Use particles/interjections only when they help the voice; do not overuse.
+- Keep SFX / elongated sounds manga-like (elongation, repetition) but not excessively long.
+
+C) Addressing, pronouns, and gendered endings
+- Default: omit pronouns and omit gendered polite sentence-final particles unless the source line clearly requires them.
+- Never use a male-coded second-person pronoun.
+- When a line addresses someone by name, keep the name and write the sentence without inserting a second-person pronoun after the name. Prefer: NAME + sentence.
+- If a second-person reference is truly needed for readability, pick a neutral/casual option appropriate to tone, and keep it gender-neutral; do not guess gender from the name alone.
+- Do not guess speaker gender. Only use clearly gendered first-person forms or gendered sentence endings when the same source line strongly signals them. Keep consistency within the line and never mix conflicting forms.
+
+D) OCR noise / incomplete words (be conservative)
+- OCR may drop/swap letters or insert duplicates. Fix ONLY when it is high-confidence and obvious (1–2 characters off and the intended word is clear).
+- Do not “correct” words that already look valid. Do not over-correct names, terms, or stylistic spellings.
+- If uncertain, keep the original token or transliterate; do not invent a different word.
+
+E) Proper nouns & recurring terms
+- Keep character names, places, skills, and key terms consistent across the page.
+- Preserve honorifics only when present and meaningful.
+
+Do not add explanations. Return only the translated Thai text, preserving paragraph boundaries and order.""".strip(),
+    "en": """Style preferences:
+- Keep English dialogue concise and conversational.
+- Keep lines short for speech bubbles.
+- Keep names and recurring terms consistent.
+- Keep SFX short; avoid very long repeated characters.
+""".strip(),
+    "ja": """Style preferences:
+- Keep Japanese dialogue concise and natural for manga.
+- Keep lines short for speech bubbles.
+- Keep names and recurring terms consistent.
+- Keep SFX short; avoid very long repeated characters.
+""".strip(),
+    "default": """Style preferences:
+- Keep dialogue concise, spoken, and faithful to tone.
+- Keep lines short for speech bubbles.
+- Keep names and recurring terms consistent.
+- Keep SFX short; avoid very long repeated characters.
+""".strip(),
+}
+
+AI_PROMPT_RESPONSE_CONTRACT_JSON = (
+    "Return ONLY valid JSON (no markdown, no extra text).\n"
+    "Output JSON MUST have exactly one key: \"aiTextFull\".\n"
+    "\"aiTextFull\" MUST be a single JSON string WITHOUT raw newlines.\n"
+    "Use literal \\n and \\n\\n to represent line breaks.\n"
+    "You MUST preserve paragraph boundaries and order. Paragraphs are separated by a blank line (\\n\\n).\n"
+    "Do NOT add extra paragraphs. Do NOT remove paragraphs.\n"
+    "Never include code fences or XML/HTML tags.\n"
+    "All string values MUST NOT contain raw newlines."
+)
+
+AI_PROMPT_RESPONSE_CONTRACT_TEXT = (
+    "Return ONLY the translated text (no JSON, no markdown, no commentary).\n"
+    "You MUST preserve paragraph boundaries and order. Paragraphs are separated by a blank line.\n"
+    "Use actual newlines for line breaks.\n"
+    "Do NOT add extra paragraphs. Do NOT remove paragraphs.\n"
+    "Never include code fences or XML/HTML tags."
+)
+AI_PROMPT_DATA_TEMPLATE = (
+    "Input JSON:\n{input_json}\n\n"
+    "Output JSON schema (MUST match exactly):\n{output_schema}"
+)
+
+AI_PROMPT_DATA_TEMPLATE_TEXT = (
+    "Input JSON:\n{input_json}\n\n"
+    "Return the translation as plain text only."
+)
+
+FIREBASE_COOKIE_TTL_SEC = int(os.getenv("FIREBASE_COOKIE_TTL_SEC", "900"))
+_FIREBASE_COOKIE_CACHE = {"ts": 0.0, "url": "", "data": None}
+_FONT_RESOLVE_CACHE = {}
+_HF_MODELS_CACHE = {}
+_FONT_PAIR_CACHE = {}
+_TP_HTML_EPS_PX = 0.0
+ZWSP = "\u200b"
+
+def ai_prompt_user_default(lang: str) -> str:
+    l = _normalize_lang(lang)
+    return (AI_PROMPT_USER_BY_LANG.get(l) or AI_PROMPT_USER_BY_LANG.get("default") or "").strip()
+
+def _active_ai_contract() -> str:
+    return AI_PROMPT_RESPONSE_CONTRACT_JSON if DO_AI_JSON else AI_PROMPT_RESPONSE_CONTRACT_TEXT
+
+def _active_ai_data_template() -> str:
+    return AI_PROMPT_DATA_TEMPLATE if DO_AI_JSON else AI_PROMPT_DATA_TEMPLATE_TEXT
+
+def _canonical_provider(provider: str) -> str:
+    p = (provider or "").strip().lower()
+    return AI_PROVIDER_ALIASES.get(p, p)
+
+def _resolve_model(provider: str, model: str) -> str:
+    m = (model or "").strip()
+    if not m or m.lower() == "auto":
+        d = AI_PROVIDER_DEFAULTS.get(provider) or {}
+        return (d.get("model") or "").strip() or AI_PROVIDER_DEFAULTS["openai"]["model"]
+    key = m.lower()
+    aliases = AI_MODEL_ALIASES.get(provider) or {}
+    return aliases.get(key) or m
+
+def _normalize_lang(lang: str) -> str:
+    t = (lang or "").strip().lower()
+    if t in ("jp", "jpn", "japanese"):
+        return "ja"
+    if t in ("thai",):
+        return "th"
+    if t in ("eng", "english"):
+        return "en"
+    if t.startswith("zh"):
+        return t
+    if len(t) >= 2:
+        return t[:2]
+    return t
+
+def _sha1(s: str) -> str:
+    return hashlib.sha1(s.encode("utf-8")).hexdigest()
+
+def _hf_router_available_models(api_key: str, base_url: str) -> list[str]:
+    if not api_key or not base_url:
+        return []
+    key = _sha1(f"{_sha1(api_key)}|{base_url}")
+    now = time.time()
+    cached = _HF_MODELS_CACHE.get(key) or {}
+    if cached.get("ts") and now - float(cached["ts"]) < 3600 and isinstance(cached.get("models"), list):
+        return cached["models"]
+
+    url = base_url.rstrip("/") + "/models"
+    headers = {"Authorization": f"Bearer {api_key}"}
+    try:
+        with httpx.Client(timeout=float(AI_TIMEOUT_SEC)) as client:
+            r = client.get(url, headers=headers)
+            r.raise_for_status()
+            data = r.json()
+    except Exception:
+        return []
+
+    models = []
+    for m in (data.get("data") or []):
+        mid = (m.get("id") if isinstance(m, dict) else None)
+        if isinstance(mid, str) and mid.strip():
+            models.append(mid.strip())
+    _HF_MODELS_CACHE[key] = {"ts": now, "models": models}
+    return models
+
+def _pick_hf_fallback_model(models: list[str]) -> str:
+    if not models:
+        return ""
+    priority_substrings = (
+        "gemma-3",
+        "gemma-2",
+        "llama-3.1",
+        "llama-3",
+        "mistral",
+        "qwen",
+        "glm",
+    )
+    lowered = [(m, m.lower()) for m in models]
+    for sub in priority_substrings:
+        for m, ml in lowered:
+            if sub in ml and ("instruct" in ml or ml.endswith("-it") or ":" in ml):
+                return m
+    for m, ml in lowered:
+        if "instruct" in ml or ml.endswith("-it") or ":" in ml:
+            return m
+    return models[0]
+
+def _load_ai_cache(path: str):
+    if not path:
+        return {}
+    if not os.path.exists(path):
+        return {}
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            d = json.load(f)
+            return d if isinstance(d, dict) else {}
+    except Exception:
+        return {}
+
+def _save_ai_cache(path: str, cache: dict):
+    if not path:
+        return
+    tmp = path + ".tmp"
+    with open(tmp, "w", encoding="utf-8") as f:
+        json.dump(cache, f, ensure_ascii=False)
+    os.replace(tmp, path)
+
+def _build_ai_prompt_packet(target_lang: str, original_text_full: str):
+    lang = _normalize_lang(target_lang)
+    input_json = json.dumps(
+        {"target_lang": lang, "originalTextFull": original_text_full}, ensure_ascii=False)
+    output_schema = json.dumps({"aiTextFull": "..."}, ensure_ascii=False)
+    data_template = _active_ai_data_template()
+    if DO_AI_JSON:
+        data_text = data_template.format(
+            input_json=input_json, output_schema=output_schema)
+    else:
+        data_text = data_template.format(input_json=input_json)
+
+    style = AI_LANG_STYLE.get(lang) or AI_LANG_STYLE.get("default") or ""
+    editable = (ai_prompt_user_default(lang) or "").strip()
+
+    system_parts = [AI_PROMPT_SYSTEM_BASE]
+    if style:
+        system_parts.append(style)
+    system_parts.append(_active_ai_contract())
+    system_text = "\n\n".join([p for p in system_parts if p])
+
+    user_parts = []
+    if editable:
+        user_parts.append(editable)
+    user_parts.append(data_text)
+    return system_text, user_parts
+
+def _gemini_generate_json(api_key: str, model: str, system_text: str, user_parts: list[str]):
+    url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={api_key}"
+    parts = [{"text": p} for p in user_parts if (p or "").strip()]
+    payload = {
+        "systemInstruction": {"parts": [{"text": system_text}]},
+        "contents": [{"role": "user", "parts": parts}],
+        "generationConfig": {
+            "temperature": float(AI_TEMPERATURE),
+            "maxOutputTokens": int(AI_MAX_TOKENS),
+            "responseMimeType": "text/plain",
+        },
+    }
+    with httpx.Client(timeout=float(AI_TIMEOUT_SEC)) as client:
+        r = client.post(url, json=payload)
+        try:
+            r.raise_for_status()
+        except httpx.HTTPStatusError as e:
+            raise Exception(f"Gemini HTTP {r.status_code}: {r.text}") from e
+        data = r.json()
+    candidates = data.get("candidates") or []
+    if not candidates:
+        raise Exception("Gemini returned no candidates")
+    c = (candidates[0].get("content") or {})
+    out_parts = c.get("parts") or []
+    if not out_parts:
+        raise Exception("Gemini returned empty content parts")
+    txt = "".join([str(p.get("text") or "") for p in out_parts]).strip()
+    if not txt:
+        raise Exception("Gemini returned empty text")
+    return txt
+
+def _read_first_env(*names: str) -> str:
+    for n in names:
+        v = (os.environ.get(n) or "").strip()
+        if v:
+            return v
+    return ""
+
+def _detect_ai_provider_from_key(api_key: str) -> str:
+    k = (api_key or "").strip()
+    if k.startswith("AIza"):
+        return "gemini"
+    if k.startswith("hf_"):
+        return "huggingface"
+    if k.startswith("sk-or-"):
+        return "openrouter"
+    if k.startswith("sk-ant-"):
+        return "anthropic"
+    if k.startswith("gsk_"):
+        return "groq"
+    return "openai"
+
+def _resolve_ai_config():
+    api_key = (AI_API_KEY or _read_first_env(
+        "AI_API_KEY",
+        "OPENAI_API_KEY",
+        "HF_TOKEN",
+        "HUGGINGFACEHUB_API_TOKEN",
+        "GEMINI_API_KEY",
+        "OPENROUTER_API_KEY",
+        "FEATHERLESS_API_KEY",
+        "GROQ_API_KEY",
+        "TOGETHER_API_KEY",
+        "DEEPSEEK_API_KEY",
+        "ANTHROPIC_API_KEY",
+    )).strip()
+
+    provider = _canonical_provider((AI_PROVIDER or "auto"))
+    model = (AI_MODEL or "auto").strip()
+    base_url = (AI_BASE_URL or "auto").strip()
+
+    if provider in ("", "auto"):
+        provider = _canonical_provider(_detect_ai_provider_from_key(api_key))
+
+    preset = AI_PROVIDER_DEFAULTS.get(provider) or {}
+
+    model = _resolve_model(provider, model)
+
+    if base_url in ("", "auto"):
+        base_url = (preset.get("base_url") or "").strip()
+
+    if provider not in ("gemini", "anthropic"):
+        if not base_url:
+            base_url = (AI_PROVIDER_DEFAULTS.get("openai") or {}).get(
+                "base_url") or "https://api.openai.com/v1"
+
+    return provider, api_key, model, base_url
+
+def _openai_compat_generate_json(api_key: str, base_url: str, model: str, system_text: str, user_parts: list[str]):
+    url = (base_url.rstrip("/") + "/chat/completions")
+    messages = [{"role": "system", "content": system_text}]
+    for p in user_parts:
+        if (p or "").strip():
+            messages.append({"role": "user", "content": p})
+    payload = {
+        "model": model,
+        "messages": messages,
+        "temperature": float(AI_TEMPERATURE),
+        "max_tokens": int(AI_MAX_TOKENS),
+    }
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json",
+    }
+    used_model = model
+    with httpx.Client(timeout=float(AI_TIMEOUT_SEC)) as client:
+        r = client.post(url, json=payload, headers=headers)
+        try:
+            r.raise_for_status()
+            data = r.json()
+        except httpx.HTTPStatusError as e:
+            if (
+                r.status_code == 400
+                and "router.huggingface.co" in (base_url or "")
+                and ((AI_MODEL or "").strip().lower() in ("", "auto") or model == (AI_PROVIDER_DEFAULTS.get("huggingface") or {}).get("model"))
+            ):
+                try:
+                    err = r.json().get("error") or {}
+                except Exception:
+                    err = {}
+                if (err.get("code") or "") == "model_not_supported":
+                    models = _hf_router_available_models(api_key, base_url)
+                    fallback = _pick_hf_fallback_model(models)
+                    if fallback and fallback != model:
+                        payload["model"] = fallback
+                        used_model = fallback
+                        r2 = client.post(url, json=payload, headers=headers)
+                        try:
+                            r2.raise_for_status()
+                        except httpx.HTTPStatusError as e2:
+                            raise Exception(
+                                f"AI HTTP {r2.status_code}: {r2.text}") from e2
+                        data = r2.json()
+                    else:
+                        preview = ", ".join(models[:8])
+                        hint = f"\nAvailable models (first 8): {preview}" if preview else ""
+                        raise Exception(
+                            f"AI HTTP {r.status_code}: {r.text}{hint}") from e
+                else:
+                    raise Exception(
+                        f"AI HTTP {r.status_code}: {r.text}") from e
+            else:
+                raise Exception(f"AI HTTP {r.status_code}: {r.text}") from e
+    choices = data.get("choices") or []
+    if not choices:
+        raise Exception("AI returned no choices")
+    msg = (choices[0].get("message") or {})
+    txt = (msg.get("content") or "").strip()
+    if not txt:
+        raise Exception("AI returned empty text")
+    return txt, used_model
+
+def _anthropic_generate_json(api_key: str, model: str, system_text: str, user_parts: list[str]):
+    url = "https://api.anthropic.com/v1/messages"
+    messages = []
+    for p in user_parts:
+        if (p or "").strip():
+            messages.append({"role": "user", "content": p})
+    payload = {
+        "model": model,
+        "max_tokens": int(AI_MAX_TOKENS),
+        "temperature": float(AI_TEMPERATURE),
+        "system": system_text,
+        "messages": messages,
+    }
+    headers = {
+        "x-api-key": api_key,
+        "content-type": "application/json",
+    }
+    with httpx.Client(timeout=float(AI_TIMEOUT_SEC)) as client:
+        r = client.post(url, json=payload, headers=headers)
+        try:
+            r.raise_for_status()
+        except httpx.HTTPStatusError as e:
+            raise Exception(f"Anthropic HTTP {r.status_code}: {r.text}") from e
+        data = r.json()
+    content = data.get("content") or []
+    txt = "".join([(c.get("text") or "") for c in content if isinstance(
+        c, dict) and c.get("type") == "text"]).strip()
+    if not txt:
+        raise Exception("Anthropic returned empty text")
+    return txt
+
+def _strip_wrappers(s: str) -> str:
+    t = (s or "").strip()
+    if not t:
+        return ""
+    t = t.replace("\r\n", "\n").replace("\r", "\n")
+    if "```" in t:
+        t = re.sub(r"```[a-zA-Z0-9_-]*", "", t)
+        t = t.replace("```", "")
+    t = re.sub(r"</?AiTextFull>", "", t, flags=re.IGNORECASE).strip()
+    return t
+
+def _sanitize_json_like_text(raw: str) -> str:
+    t = _strip_wrappers(raw)
+    if not t:
+        return ""
+    out = []
+    in_str = False
+    esc = False
+    run_ch = ""
+    run_len = 0
+
+    def _flush_run():
+        nonlocal run_ch, run_len
+        if run_len:
+            out.append(run_ch * min(run_len, 3))
+        run_ch = ""
+        run_len = 0
+
+    for ch in t:
+        if in_str:
+            if esc:
+                _flush_run()
+                out.append(ch)
+                esc = False
+                continue
+            if ch == "\\":
+                _flush_run()
+                out.append(ch)
+                esc = True
+                continue
+            if ch == '"':
+                _flush_run()
+                out.append(ch)
+                in_str = False
+                continue
+            if ch == "\n":
+                _flush_run()
+                out.append("\\n")
+                continue
+            if ch == "\t":
+                _flush_run()
+                out.append("\\t")
+                continue
+            if ch == run_ch:
+                run_len += 1
+                continue
+            _flush_run()
+            run_ch = ch
+            run_len = 1
+            continue
+
+        _flush_run()
+        if ch == '"':
+            out.append(ch)
+            in_str = True
+            esc = False
+            continue
+        out.append(ch)
+
+    _flush_run()
+    return "".join(out)
+
+def _extract_first_json(raw: str):
+    t = _sanitize_json_like_text(raw)
+    if not t:
+        raise Exception("AI returned empty text")
+    start = t.find("{")
+    if start < 0:
+        raise Exception("AI returned no JSON object")
+
+    in_str = False
+    esc = False
+    depth = 0
+    json_start = None
+
+    for i in range(start, len(t)):
+        ch = t[i]
+        if in_str:
+            if esc:
+                esc = False
+            elif ch == "\\":
+                esc = True
+            elif ch == '"':
+                in_str = False
+            continue
+
+        if ch == '"':
+            in_str = True
+            continue
+        if ch == "{":
+            if depth == 0:
+                json_start = i
+            depth += 1
+            continue
+        if ch == "}":
+            if depth > 0:
+                depth -= 1
+                if depth == 0 and json_start is not None:
+                    cand = t[json_start: i + 1]
+                    return json.loads(cand)
+
+    raise Exception("Failed to parse AI JSON")
+
+def _parse_ai_textfull_only(raw: str) -> str:
+    obj = _extract_first_json(raw)
+    if not isinstance(obj, dict):
+        raise Exception("AI JSON is not an object")
+    txt = obj.get("aiTextFull")
+    if txt is None:
+        txt = obj.get("textFull")
+    if txt is None:
+        raise Exception("AI JSON missing aiTextFull")
+    t = str(txt)
+    if "\\n" in t and "\n" not in t:
+        t = t.replace("\\n", "\n")
+    t = t.replace("\r\n", "\n").replace("\r", "\n").strip()
+    return t
+
+def _parse_ai_textfull_text_only(raw: str) -> str:
+    t = _strip_wrappers(raw)
+    if not t:
+        raise Exception("AI returned empty text")
+    if t.lstrip().startswith("{"):
+        return _parse_ai_textfull_only(t)
+    if "\\n" in t and "\n" not in t:
+        t = t.replace("\\n", "\n")
+    t = re.sub(r"^aiTextFull\s*[:=]\s*", "", t, flags=re.IGNORECASE).strip()
+    return t
+
+def _budoux_parser_for_lang(lang: str):
+    lang = _normalize_lang(lang)
+    if not budoux:
+        return None
+    if lang == "th":
+        return budoux.load_default_thai_parser()
+    if lang == "ja":
+        return budoux.load_default_japanese_parser()
+    if lang in ("zh", "zh-hans", "zh_cn", "zh-cn", "zh_hans"):
+        return budoux.load_default_simplified_chinese_parser()
+    if lang in ("zh-hant", "zh_tw", "zh-tw", "zh_hant"):
+        return budoux.load_default_traditional_chinese_parser()
+    model_path = os.environ.get("BUDOUX_MODEL_PATH")
+    if not model_path:
+        return None
+    with open(model_path, "r", encoding="utf-8") as f:
+        model = json.load(f)
+    return budoux.Parser(model)
+
+def _ensure_box_fields(box: dict):
+    if not isinstance(box, dict):
+        return {}
+    b = copy.deepcopy(box)
+    if "rotation_deg" not in b:
+        b["rotation_deg"] = 0.0
+    if "rotation_deg_css" not in b:
+        b["rotation_deg_css"] = 0.0
+    if "center" not in b and all(k in b for k in ("left", "top", "width", "height")):
+        b["center"] = {"x": b["left"] + b["width"] /
+                       2.0, "y": b["top"] + b["height"]/2.0}
+    if all(k in b for k in ("left", "top", "width", "height")):
+        if "left_pct" not in b:
+            b["left_pct"] = b["left"] * 100.0
+        if "top_pct" not in b:
+            b["top_pct"] = b["top"] * 100.0
+        if "width_pct" not in b:
+            b["width_pct"] = b["width"] * 100.0
+        if "height_pct" not in b:
+            b["height_pct"] = b["height"] * 100.0
+    return b
+
+def _tokens_with_spaces(text: str, parser, lang: str):
+    t = (text or "")
+    if not t:
+        return []
+    out = []
+    parts = re.findall(r"\s+|\S+", t)
+    for part in parts:
+        if not part:
+            continue
+        if part.isspace():
+            out.append(("space", part))
+            continue
+        segs = parser.parse(part) if parser else [part]
+        for seg in segs:
+            if seg:
+                out.append(("word", seg))
+    return out
+
+def _line_cap_px_for_item(item: dict, img_w: int, img_h: int) -> float:
+    p1 = item.get("baseline_p1") or {}
+    p2 = item.get("baseline_p2") or {}
+    dx = (float(p2.get("x") or 0.0) - float(p1.get("x") or 0.0)) * float(img_w)
+    dy = (float(p2.get("y") or 0.0) - float(p1.get("y") or 0.0)) * float(img_h)
+    cap = float(math.hypot(dx, dy))
+    if cap > 1e-6:
+        return cap
+    b = _ensure_box_fields(item.get("box") or {})
+    return float(b.get("width") or 0.0) * float(img_w)
+
+def _wrap_tokens_to_lines_px(tokens, items, img_w: int, img_h: int, thai_font: str, latin_font: str, font_size: int, min_lines: int):
+    max_lines = len(items)
+    if max_lines <= 0:
+        return []
+
+    caps = [_line_cap_px_for_item(it, img_w, img_h) for it in items]
+    desired = max(1, min(int(min_lines), max_lines))
+    soft_factor = 0.90 if desired > 1 else 1.0
+
+    lines = [[]]
+    cur_w = 0.0
+    li = 0
+
+    last_word_hint = ""
+    pending_space = ""
+
+    tmp = Image.new("RGBA", (10, 10), (0, 0, 0, 0))
+    dtmp = ImageDraw.Draw(tmp)
+
+    def _measure_w(font, txt: str) -> float:
+        try:
+            return float(font.getlength(txt))
+        except Exception:
+            try:
+                bb = dtmp.textbbox((0, 0), txt, font=font, anchor="ls")
+                return float(bb[2] - bb[0])
+            except Exception:
+                w, _ = dtmp.textsize(txt, font=font)
+                return float(w)
+
+    def _cap_for_line(idx: int) -> float:
+        return float(caps[min(idx, max_lines - 1)])
+
+    for k, s in (tokens or []):
+        if k == "space":
+            if not lines[-1]:
+                continue
+            pending_space += str(s)
+            continue
+
+        if k != "word":
+            continue
+
+        txt = str(s)
+        if not txt:
+            continue
+
+        font = pick_font(txt, thai_font, latin_font, int(font_size))
+        w = _measure_w(font, txt)
+
+        sw = 0.0
+        if pending_space:
+            hint = last_word_hint or txt
+            font_s = pick_font(hint, thai_font, latin_font, int(font_size))
+            sw = _measure_w(font_s, pending_space)
+
+        cap = _cap_for_line(li)
+        soft_cap = cap * soft_factor if (li < desired and cap > 0.0) else cap
+
+        need_w = cur_w + sw + w
+        if lines[-1] and li < max_lines - 1:
+            if cap > 0.0 and need_w > cap:
+                lines.append([])
+                li += 1
+                cur_w = 0.0
+                pending_space = ""
+                sw = 0.0
+            elif soft_cap > 0.0 and need_w > soft_cap:
+                lines.append([])
+                li += 1
+                cur_w = 0.0
+                pending_space = ""
+                sw = 0.0
+
+        if pending_space and lines[-1]:
+            lines[-1].append(("space", pending_space, sw))
+            cur_w += sw
+            pending_space = ""
+
+        lines[-1].append(("word", txt, w))
+        cur_w += w
+        last_word_hint = txt
+
+    if len(lines) > max_lines:
+        head = lines[: max_lines - 1]
+        tail = []
+        for seg in lines[max_lines - 1:]:
+            tail.extend(seg)
+        lines = head + [tail]
+
+    for i in range(len(lines)):
+        while lines[i] and lines[i][0][0] == "space":
+            lines[i] = lines[i][1:]
+        while lines[i] and lines[i][-1][0] == "space":
+            lines[i] = lines[i][:-1]
+
+    return lines
+
+def _ensure_min_lines_by_split(lines, min_lines: int, max_lines: int):
+    if not lines:
+        return []
+    min_lines = int(min_lines)
+    max_lines = int(max_lines)
+    if min_lines <= 1:
+        return lines
+
+    target = min(min_lines, max_lines)
+    lines = [list(seg) for seg in (lines or [])]
+
+    def _trim(seg):
+        while seg and seg[0][0] == "space":
+            seg.pop(0)
+        while seg and seg[-1][0] == "space":
+            seg.pop()
+        return seg
+
+    while len(lines) < target:
+        idx = None
+        best = 0
+        for i, seg in enumerate(lines):
+            n_words = sum(1 for k, s, _ in seg if k == "word" and s != ZWSP)
+            if n_words > best and n_words > 1:
+                best = n_words
+                idx = i
+        if idx is None:
+            break
+
+        seg = lines[idx]
+        word_pos = [i for i, (k, s, _) in enumerate(seg)
+                    if k == "word" and s != ZWSP]
+        if len(word_pos) <= 1:
+            break
+        cut_word = len(word_pos) // 2
+        cut_pos = word_pos[cut_word]
+
+        left = _trim(seg[:cut_pos])
+        right = _trim(seg[cut_pos:])
+
+        lines[idx] = left
+        lines.insert(idx + 1, right)
+        if len(lines) >= max_lines:
+            break
+
+    return lines
+
+def _fit_para_size_and_lines(ptext: str, parser, items, img_w: int, img_h: int, thai_font: str, latin_font: str, base_size: int, min_lines: int, lang: str):
+    tokens2 = _tokens_with_spaces(ptext, parser, lang)
+    if not tokens2 or not items:
+        return int(base_size), [[] for _ in range(len(items))]
+
+    max_lines = len(items)
+    n_words = 0
+    for k, s in tokens2:
+        if k == "word" and str(s):
+            n_words += 1
+    desired_lines = max(1, min(max_lines, n_words))
+    size = max(10, int(base_size))
+
+    heights = []
+    for it in items:
+        b = _ensure_box_fields(it.get("box") or {})
+        heights.append(float(b.get("height") or 0.0) * float(img_h))
+
+    while size >= 10:
+        lines = _wrap_tokens_to_lines_px(
+            tokens2, items, img_w, img_h, thai_font, latin_font, size, min_lines=desired_lines)
+        lines = _ensure_min_lines_by_split(
+            lines, min_lines=desired_lines, max_lines=max_lines)
+
+        if len(lines) <= max_lines:
+            ok = True
+            for ii, seg in enumerate(lines):
+                words = [s for k, s, _ in seg if k == "word" and s != ZWSP]
+                if not words:
+                    continue
+                line_text = "".join(words)
+                mline = _line_metrics_px(
+                    line_text, thai_font, latin_font, size)
+                if mline is None:
+                    continue
+                _, th, _ = mline
+                if ii < len(heights) and heights[ii] > 0.0 and th > heights[ii] * 1.01:
+                    ok = False
+                    break
+            if ok:
+                return size, lines
+
+        size -= 1
+
+    lines10 = _wrap_tokens_to_lines_px(
+        tokens2, items, img_w, img_h, thai_font, latin_font, 10, min_lines=desired_lines)
+    lines10 = _ensure_min_lines_by_split(
+        lines10, min_lines=desired_lines, max_lines=max_lines)
+    return 10, lines10
+
+def _pad_lines(lines, max_lines: int):
+    max_lines = int(max_lines)
+    if max_lines <= 0:
+        return []
+    lines = list(lines or [])
+    if len(lines) > max_lines:
+        return lines[:max_lines]
+    if len(lines) < max_lines:
+        lines.extend([[] for _ in range(max_lines - len(lines))])
+    return lines
+
+def _contains_thai(text: str) -> bool:
+    for ch in (text or ""):
+        if _is_thai_char(ch):
+            return True
+    return False
+
+def _apply_line_to_item(
+    item: dict,
+    line_tokens,
+    para_index: int,
+    item_index: int,
+    abs_line_start_raw: int,
+    W: int,
+    H: int,
+    thai_path: str,
+    latin_path: str,
+    forced_size_px: int | None,
+    apply_baseline_shift: bool = True,
+    kerning_adjust: bool = False,
+):
+    tokens = []
+    for t in (line_tokens or []):
+        if not isinstance(t, (list, tuple)) or len(t) < 2:
+            continue
+        k = str(t[0])
+        s = str(t[1])
+        w = float(t[2]) if len(t) > 2 and isinstance(
+            t[2], (int, float)) else 0.0
+        tokens.append((k, s, w))
+
+    words = [s for k, s, _ in tokens if k == "word" and s != ZWSP]
+    item_text = "".join(s for _, s, _ in tokens if s != ZWSP).strip()
+    item["text"] = item_text
+    item["valid_text"] = bool(item_text)
+
+    b = _ensure_box_fields(item.get("box") or {})
+    item["box"] = b
+    base_left = float(b.get("left") or 0.0)
+    base_top = float(b.get("top") or 0.0)
+    base_w = float(b.get("width") or 0.0)
+    base_h = float(b.get("height") or 0.0)
+
+    if not words or base_w <= 0.0 or base_h <= 0.0 or W <= 0 or H <= 0:
+        item["spans"] = []
+        return
+
+    p1 = item.get("baseline_p1") or {}
+    p2 = item.get("baseline_p2") or {}
+    x1 = float(p1.get("x") or 0.0) * float(W)
+    y1 = float(p1.get("y") or 0.0) * float(H)
+    x2 = float(p2.get("x") or 0.0) * float(W)
+    y2 = float(p2.get("y") or 0.0) * float(H)
+
+    dx = x2 - x1
+    dy = y2 - y1
+    L = float(math.hypot(dx, dy))
+    if L <= 1e-9:
+        item["spans"] = []
+        return
+
+    ux = dx / L
+    uy = dy / L
+    nx = -uy
+    ny = ux
+    if ny < 0:
+        nx, ny = -nx, -ny
+
+    base_w_px = L
+    base_h_px = base_h * float(H)
+
+    base_size = 96
+
+    widths_px = []
+    max_ascent = 0
+    max_descent = 0
+
+    layout_units = []
+    for k, s, _ in tokens:
+        if s == ZWSP:
+            continue
+        if k == "space":
+            layout_units.append(("space", _sanitize_draw_text(s)))
+        elif k == "word":
+            layout_units.append(("word", _sanitize_draw_text(s)))
+
+    def _measure_len_px(font, text: str) -> float:
+        try:
+            return float(font.getlength(text))
+        except Exception:
+            tmp = Image.new("RGBA", (10, 10), (0, 0, 0, 0))
+            dtmp = ImageDraw.Draw(tmp)
+            try:
+                bb = dtmp.textbbox((0, 0), text, font=font, anchor="ls")
+                return float(bb[2] - bb[0])
+            except Exception:
+                w, _ = dtmp.textsize(text, font=font)
+                return float(w)
+
+    for i, (k, t) in enumerate(layout_units):
+        if k == "space":
+            hint = ""
+            for j in range(i - 1, -1, -1):
+                if layout_units[j][0] == "word":
+                    hint = layout_units[j][1]
+                    break
+            if not hint:
+                for j in range(i + 1, len(layout_units)):
+                    if layout_units[j][0] == "word":
+                        hint = layout_units[j][1]
+                        break
+            font0 = pick_font(hint or "a", thai_path, latin_path, base_size)
+            widths_px.append(max(0.0, _measure_len_px(font0, t)))
+            continue
+
+        font0 = pick_font(t, thai_path, latin_path, base_size)
+        try:
+            ascent, descent = font0.getmetrics()
+        except Exception:
+            ascent, descent = base_size, int(base_size * 0.25)
+        if ascent > max_ascent:
+            max_ascent = ascent
+        if descent > max_descent:
+            max_descent = descent
+
+        if kerning_adjust and (i + 1) < len(layout_units) and layout_units[i + 1][0] == "word":
+            nxt = layout_units[i + 1][1]
+            nxt1 = nxt[:1] if nxt else ""
+            if nxt1 and (_contains_thai(t) == _contains_thai(nxt1)):
+                tw = _measure_len_px(font0, t + nxt1) - \
+                    _measure_len_px(font0, nxt1)
+            else:
+                tw = _measure_len_px(font0, t)
+        else:
+            tw = _measure_len_px(font0, t)
+
+        widths_px.append(max(0.0, tw))
+
+    line_tw = sum(widths_px)
+    bo_base = _baseline_offset_px_for_text(
+        item_text, thai_path, latin_path, base_size)
+    if bo_base is not None:
+        _, total_h_base = bo_base
+        line_th = float(total_h_base)
+    else:
+        line_th = float(max_ascent + max_descent)
+
+    if line_tw <= 1e-9 or line_th <= 1e-9:
+        item["spans"] = []
+        return
+
+    if forced_size_px is None:
+        scale_line = min((base_w_px * 1.0) / line_tw,
+                         (base_h_px * 0.995) / line_th)
+        if scale_line <= 0.0:
+            item["spans"] = []
+            return
+        final_size = max(10, int(base_size * scale_line))
+    else:
+        final_size = int(max(10, forced_size_px))
+        scale_line = float(final_size) / float(base_size)
+
+    item["font_size_px"] = final_size
+
+    w_scaled = [w * scale_line for w in widths_px]
+    total_scaled = sum(w_scaled)
+    margin_px = (base_w_px - total_scaled) / \
+        2.0 if total_scaled < base_w_px else 0.0
+
+    bo = _baseline_offset_px_for_text(
+        item_text, thai_path, latin_path, final_size)
+    if apply_baseline_shift and bo is not None:
+        baseline_offset_px, _ = bo
+        cx = (base_left + (base_w / 2.0)) * float(W)
+        cy = (base_top + (base_h / 2.0)) * float(H)
+        target = (cx + (baseline_offset_px * nx),
+                  cy + (baseline_offset_px * ny))
+        s = ((target[0] - x1) * nx) + ((target[1] - y1) * ny)
+        x1 += nx * s
+        y1 += ny * s
+        x2 += nx * s
+        y2 += ny * s
+
+        item["baseline_p1"] = {"x": x1 / float(W), "y": y1 / float(H)}
+        item["baseline_p2"] = {"x": x2 / float(W), "y": y2 / float(H)}
+
+    raw_pos = 0
+    span_i = 0
+    unit_i = 0
+    cum_px = 0.0
+    spans = []
+
+    for kind, s, _ in tokens:
+        if s == ZWSP:
+            continue
+
+        start_raw = abs_line_start_raw + raw_pos
+        raw_pos += len(s)
+        end_raw = abs_line_start_raw + raw_pos
+
+        if unit_i >= len(w_scaled):
+            break
+
+        wpx = w_scaled[unit_i]
+        t0 = (margin_px + cum_px) / base_w_px
+        cum_px += wpx
+        t1 = (margin_px + cum_px) / base_w_px
+
+        if kind == "space":
+            unit_i += 1
+            continue
+
+        span_box = _ensure_box_fields({
+            "left": base_left + (base_w * t0),
+            "top": base_top,
+            "width": base_w * (t1 - t0),
+            "height": base_h,
+            "rotation_deg": float(b.get("rotation_deg") or 0.0),
+            "rotation_deg_css": float(b.get("rotation_deg_css") or 0.0),
+        })
+
+        spans.append({
+            "side": "Ai",
+            "para_index": para_index,
+            "item_index": item_index,
+            "span_index": span_i,
+            "text": s,
+            "valid_text": True,
+            "start_raw": start_raw,
+            "end_raw": end_raw,
+            "t0_raw": t0,
+            "t1_raw": t1,
+            "box": span_box,
+            "height_raw": item.get("height_raw"),
+            "baseline_p1": item.get("baseline_p1"),
+            "baseline_p2": item.get("baseline_p2"),
+            "font_size_px": final_size,
+        })
+        span_i += 1
+        unit_i += 1
+    item["spans"] = spans
+
+def patch(payload: dict, img_w: int, img_h: int, thai_font: str, latin_font: str, lang: str | None = None) -> dict:
+    ai = payload.get("Ai") or {}
+    ai_text_full = str(ai.get("aiTextFull") or "")
+    template_tree = ai.get("aiTree") or {}
+    if not isinstance(template_tree, dict):
+        raise ValueError("Ai.aiTree template must be a dict")
+    lang_norm = _normalize_lang(lang or LANG)
+    parser = _budoux_parser_for_lang(lang_norm)
+
+    out_tree = copy.deepcopy(template_tree)
+    out_tree["side"] = "Ai"
+    paragraphs = out_tree.get("paragraphs") or []
+
+    ai_text_full_clean = ai_text_full
+
+    def _extract_paras_by_markers(txt: str, expected: int) -> tuple[list[str], str, int] | None:
+        if not txt or expected <= 0 or "<<TP_P" not in txt:
+            return None
+        matches = list(re.finditer(r"<<TP_P(\d+)>>", txt))
+        if not matches:
+            return None
+        out: list[str] = [""] * expected
+        for mi, m in enumerate(matches):
+            try:
+                idx = int(m.group(1))
+            except Exception:
+                continue
+            seg_start = m.end()
+            seg_end = matches[mi + 1].start() if (mi +
+                                                  1) < len(matches) else len(txt)
+            seg = (txt[seg_start:seg_end] or "").lstrip("\r\n").strip()
+            if 0 <= idx < expected and not out[idx]:
+                out[idx] = seg
+        clean = "\n\n".join(out)
+        return out, clean, len(matches)
+
+    marked = _extract_paras_by_markers(ai_text_full, len(paragraphs))
+    if marked is not None:
+        ai_paras, ai_text_full_clean, _marker_count = marked
+    else:
+        ai_paras = ai_text_full.split("\n\n") if ai_text_full else []
+        if len(ai_paras) < len(paragraphs):
+            ai_paras = ai_paras + [""] * (len(paragraphs) - len(ai_paras))
+        if len(ai_paras) > len(paragraphs):
+            ai_paras = ai_paras[:len(paragraphs)]
+        ai_text_full_clean = "\n\n".join(ai_paras)
+
+    raw_cursor = 0
+    for pi, (p, ptext) in enumerate(zip(paragraphs, ai_paras)):
+        p["side"] = "Ai"
+        p["para_index"] = int(p.get("para_index", pi))
+        items = p.get("items") or []
+        max_lines = len(items)
+        if max_lines <= 0:
+            continue
+
+        base_size_ref = None
+        if isinstance(p.get("para_font_size_px"), int) and int(p.get("para_font_size_px")) > 0:
+            base_size_ref = int(p.get("para_font_size_px"))
+        else:
+            ref_sizes = []
+            for it in items:
+                fs = it.get("font_size_px")
+                if isinstance(fs, int) and fs > 0:
+                    ref_sizes.append(fs)
+            if ref_sizes:
+                base_size_ref = min(ref_sizes)
+
+        base_size = int(base_size_ref or 96)
+        min_lines = int(max_lines)
+
+        para_size, lines = _fit_para_size_and_lines(
+            ptext,
+            parser,
+            items,
+            img_w,
+            img_h,
+            thai_font,
+            latin_font,
+            base_size,
+            min_lines=min_lines,
+            lang=lang_norm,
+        )
+        lines = _pad_lines(lines, max_lines)
+        p["para_font_size_px"] = int(para_size)
+
+        p["text"] = ptext
+        p["valid_text"] = bool(ptext)
+        p["start_raw"] = raw_cursor
+        p["end_raw"] = raw_cursor + len(ptext)
+
+        line_start = raw_cursor
+        for ii in range(max_lines):
+            it = items[ii]
+            it["side"] = "Ai"
+            it["para_index"] = pi
+            it["item_index"] = ii
+            _apply_line_to_item(
+                it,
+                (lines[ii] if ii < len(lines) else []),
+                pi,
+                ii,
+                line_start,
+                img_w,
+                img_h,
+                thai_font,
+                latin_font,
+                para_size,
+                apply_baseline_shift=True,
+                kerning_adjust=True,
+            )
+            line_raw_len = sum(len(s) for k, s, w in (
+                lines[ii] if ii < len(lines) else []) if s != ZWSP)
+            line_start += line_raw_len
+        raw_cursor = p["end_raw"] + 2
+
+    return {"Ai": {"aiTextFull": ai_text_full_clean, "aiTree": out_tree}}
+
+def _uniformize_ai_item_span_font_size(item: dict, img_w: int, img_h: int, thai_font: str, latin_font: str):
+    spans = item.get("spans") or []
+    if not spans or img_w <= 0 or img_h <= 0:
+        return
+
+    base_size = item.get("font_size_px")
+    try:
+        base_size = int(base_size) if base_size is not None else None
+    except Exception:
+        base_size = None
+
+    if not base_size:
+        for sp in spans:
+            fs = sp.get("font_size_px") if isinstance(sp, dict) else None
+            if isinstance(fs, int) and fs > 0:
+                base_size = fs
+                break
+
+    if not base_size or base_size <= 0:
+        return
+
+    tmp = Image.new("RGBA", (10, 10), (0, 0, 0, 0))
+    dtmp = ImageDraw.Draw(tmp)
+    font_cache = {}
+
+    def _font_for(text: str, size: int):
+        key = (int(size), 1 if _contains_thai(text) else 0)
+        f = font_cache.get(key)
+        if f:
+            return f
+        f = pick_font(text, thai_font, latin_font, int(size))
+        font_cache[key] = f
+        return f
+
+    min_size = int(base_size)
+
+    for sp in spans:
+        if not isinstance(sp, dict):
+            continue
+        txt = _sanitize_draw_text(sp.get("text") or "")
+        if txt.strip() == "":
+            continue
+
+        b = sp.get("box") or {}
+        aw = float(b.get("width") or 0.0) * float(img_w)
+        ah = float(b.get("height") or 0.0) * float(img_h)
+        if aw <= 0.0 or ah <= 0.0:
+            continue
+
+        font = _font_for(txt, base_size)
+        try:
+            bb = dtmp.textbbox((0, 0), txt, font=font, anchor="ls")
+            tw = float(bb[2] - bb[0])
+            th = float(bb[3] - bb[1])
+        except Exception:
+            tw, th = dtmp.textsize(txt, font=font)
+            tw = float(tw)
+            th = float(th)
+
+        if tw <= 0.0 or th <= 0.0:
+            continue
+
+        s = min((aw * 0.995) / tw, (ah * 0.995) / th)
+        if s < 1.0:
+            req = max(10, int(base_size * s))
+            if req < min_size:
+                min_size = req
+
+    if min_size != base_size:
+        item["font_size_px"] = int(min_size)
+        for sp in spans:
+            if isinstance(sp, dict):
+                sp["font_size_px"] = int(min_size)
+
+def _rebuild_ai_spans_after_font_resize(ai_tree: dict, img_w: int, img_h: int, thai_font: str, latin_font: str, lang: str | None = None):
+    if not ai_tree or img_w <= 0 or img_h <= 0:
+        return
+    lang_norm = _normalize_lang(lang or LANG)
+    parser = _budoux_parser_for_lang(lang_norm)
+    for pi, p in _iter_paragraphs(ai_tree):
+        items = p.get("items") or []
+        for ii, it in enumerate(items):
+            txt = _item_line_text(it)
+            if not str(txt).strip():
+                it["spans"] = []
+                continue
+            tokens = _tokens_with_spaces(str(txt), parser, lang_norm)
+            line_tokens = [(k, s, 0.0) for k, s in tokens]
+
+            forced = it.get("font_size_px") or p.get("para_font_size_px")
+            if isinstance(forced, float):
+                forced = int(forced)
+            elif isinstance(forced, str) and forced.strip().isdigit():
+                forced = int(forced.strip())
+
+            _apply_line_to_item(
+                it,
+                line_tokens,
+                int(p.get("para_index", pi)),
+                int(it.get("item_index", ii)),
+                int(it.get("start_raw", 0)),
+                img_w,
+                img_h,
+                thai_font,
+                latin_font,
+                forced,
+                apply_baseline_shift=False,
+                kerning_adjust=True,
+            )
+            _uniformize_ai_item_span_font_size(
+                it, img_w, img_h, thai_font, latin_font)
+
+def ai_translate_original_text(original_text_full: str, target_lang: str):
+    provider, api_key, model, base_url = _resolve_ai_config()
+    if not api_key:
+        raise Exception("AI_API_KEY is required for AI translation")
+
+    lang = _normalize_lang(target_lang)
+    prompt_sig = _sha1(
+        json.dumps(
+            {
+                "sys": AI_PROMPT_SYSTEM_BASE,
+                "edit": AI_PROMPT_USER_BY_LANG,
+                "contract": _active_ai_contract(),
+                "data": _active_ai_data_template(),
+                "style": AI_LANG_STYLE.get(lang) or AI_LANG_STYLE.get("default") or "",
+            },
+            ensure_ascii=False,
+        )
+    )
+
+    cache = None
+    cache_key = None
+    if AI_CACHE:
+        cache = _load_ai_cache(AI_CACHE_PATH)
+        cache_key = _sha1(
+            json.dumps(
+                {"provider": provider, "m": model, "u": base_url,
+                    "l": lang, "p": prompt_sig, "t": original_text_full},
+                ensure_ascii=False,
+            )
+        )
+        if cache_key in cache:
+            cached = cache[cache_key]
+            if lang == "th" and cached:
+                t = str(cached.get("aiTextFull") or "")
+                if t:
+                    t2 = re.sub(r"(?:(?<=^)|(?<=[\s\"'“”‘’()\[\]{}<>]))\u0e19\u0e32\u0e22(?=(?:\s|$))", "", t)
+                    t2 = re.sub(r"[ \t]{2,}", " ", t2)
+                    t2 = re.sub(r"^[ \t]+", "", t2, flags=re.MULTILINE)
+                    if t2 != t:
+                        cached = dict(cached)
+                        cached["aiTextFull"] = t2
+                        cache[cache_key] = cached
+                        _save_ai_cache(AI_CACHE_PATH, cache)
+            return cached
+
+    system_text, user_parts = _build_ai_prompt_packet(lang, original_text_full)
+
+    started = time.time()
+    used_model = model
+    if provider == "gemini":
+        raw = _gemini_generate_json(api_key, model, system_text, user_parts)
+    elif provider == "anthropic":
+        raw = _anthropic_generate_json(api_key, model, system_text, user_parts)
+    else:
+        raw, used_model = _openai_compat_generate_json(
+            api_key, base_url, model, system_text, user_parts)
+
+    ai_text_full = _parse_ai_textfull_only(
+        raw) if DO_AI_JSON else _parse_ai_textfull_text_only(raw)
+
+    if lang == "th" and ai_text_full:
+        ai_text_full = re.sub(r"(?:(?<=^)|(?<=[\s\"'“”‘’()\[\]{}<>]))\u0e19\u0e32\u0e22(?=(?:\s|$))", "", ai_text_full)
+        ai_text_full = re.sub(r"[ \t]{2,}", " ", ai_text_full)
+        ai_text_full = re.sub(r"^[ \t]+", "", ai_text_full, flags=re.MULTILINE)
+
+    result = {
+        "aiTextFull": ai_text_full,
+        "meta": {"model": used_model, "provider": provider, "base_url": base_url, "latency_sec": round(time.time() - started, 3)},
+    }
+    if AI_CACHE and cache is not None and cache_key is not None:
+        cache[cache_key] = result
+        _save_ai_cache(AI_CACHE_PATH, cache)
+    return result
+
+def to_translated(u, lang="th"):
+    q = parse_qs(urlparse(u).query)
+    return "https://lens.google.com/translatedimage?" + urlencode(
+        dict(
+            vsrid=q["vsrid"][0],
+            gsessionid=q["gsessionid"][0],
+            sl="auto",
+            tl=lang,
+            se=1,
+            ib="1",
+        )
+    )
+
+def _b64pad(s: str) -> str:
+    return s + "=" * ((4 - (len(s) % 4)) % 4)
+
+def decode_imageurl_to_datauri(imageUrl: str):
+    if not imageUrl:
+        return None
+    if isinstance(imageUrl, str) and imageUrl.startswith("data:image") and "base64," in imageUrl:
+        return imageUrl
+    for fn in (base64.b64decode, base64.urlsafe_b64decode):
+        try:
+            b = fn(_b64pad(imageUrl))
+            try:
+                t = b.decode("utf-8")
+            except Exception:
+                t = b.decode("utf-8", errors="ignore")
+            if "data:image" in t and "base64," in t:
+                i = t.find("data:image")
+                return t[i:].strip() if i >= 0 else t.strip()
+        except Exception:
+            pass
+    return None
+
+def read_varint(buf, i):
+    shift = 0
+    result = 0
+    while True:
+        if i >= len(buf):
+            raise ValueError("eof varint")
+        b = buf[i]
+        i += 1
+        result |= ((b & 0x7F) << shift)
+        if (b & 0x80) == 0:
+            return result, i
+        shift += 7
+        if shift > 70:
+            raise ValueError("varint too long")
+
+def parse_proto(buf, start=0, end=None):
+    if end is None:
+        end = len(buf)
+    i = start
+    out = []
+    while i < end:
+        key, i = read_varint(buf, i)
+        field = key >> 3
+        wire = key & 7
+        if wire == 0:
+            val, i = read_varint(buf, i)
+            out.append((field, wire, val))
+        elif wire == 1:
+            val = buf[i: i + 8]
+            i += 8
+            out.append((field, wire, val))
+        elif wire == 2:
+            l, i = read_varint(buf, i)
+            val = buf[i: i + l]
+            i += l
+            out.append((field, wire, val))
+        elif wire == 5:
+            val = buf[i: i + 4]
+            i += 4
+            out.append((field, wire, val))
+        else:
+            raise ValueError(f"wiretype {wire}")
+    return out
+
+def b2f(b4):
+    return struct.unpack("<f", b4)[0]
+
+def b2hex(b):
+    return b.hex()
+
+def _get_float_field(msg_fields, field_num):
+    for f, w, v in msg_fields:
+        if f == field_num and w == 5:
+            return b2f(v)
+    return None
+
+
+def _get_points_from_geom(geom_bytes):
+    pts = []
+    height = None
+    geom_fields = parse_proto(geom_bytes)
+    for f, w, v in geom_fields:
+        if f == 1 and w == 2:
+            p_fields = parse_proto(v)
+            x = _get_float_field(p_fields, 1)
+            y = _get_float_field(p_fields, 2)
+            if x is not None and y is not None:
+                pts.append((x, y))
+        if f == 3 and w == 5:
+            height = b2f(v)
+    if len(pts) >= 2 and height is not None:
+        return pts[0], pts[1], height
+    return None, None, None
+
+def _looks_like_geom(geom_bytes):
+    geom_fields = parse_proto(geom_bytes)
+    pts = 0
+    has_height = False
+    for f, w, v in geom_fields:
+        if f == 1 and w == 2:
+            p_fields = parse_proto(v)
+            if _get_float_field(p_fields, 1) is not None and _get_float_field(p_fields, 2) is not None:
+                pts += 1
+        elif f == 3 and w == 5:
+            has_height = True
+    return pts >= 2 and has_height
+
+def _looks_like_span(span_bytes):
+    span_fields = parse_proto(span_bytes)
+    has_t = False
+    has_range = False
+    for f, w, v in span_fields:
+        if f in (3, 4) and w == 5:
+            has_t = True
+        elif f in (1, 2) and w == 0:
+            has_range = True
+    return has_t and has_range
+
+def _is_item_message(msg_bytes):
+    fields = parse_proto(msg_bytes)
+    geom_ok = False
+    span_ok = 0
+    for f, w, v in fields:
+        if f == 1 and w == 2 and not geom_ok:
+            geom_ok = _looks_like_geom(v)
+        elif f == 2 and w == 2:
+            if _looks_like_span(v):
+                span_ok += 1
+    return geom_ok and span_ok > 0
+
+def _extract_items_from_paragraph(par_bytes):
+    top = parse_proto(par_bytes)
+    items = []
+    for _, w, v in top:
+        if w == 2 and _is_item_message(v):
+            items.append(v)
+    if items:
+        return items
+    items = []
+    seen = set()
+    nodes = 0
+
+    def walk(buf, depth):
+        nonlocal nodes
+        if depth >= 4 or nodes > 20000:
+            return
+        for _, w, v in parse_proto(buf):
+            if w != 2:
+                continue
+            nodes += 1
+            if nodes > 20000:
+                return
+            if _is_item_message(v):
+                if v in seen:
+                    continue
+                seen.add(v)
+                items.append(v)
+            else:
+                walk(v, depth + 1)
+    walk(par_bytes, 0)
+    return items
+
+def _extract_item_geom_spans(item_bytes):
+    fields = parse_proto(item_bytes)
+    geom_bytes = None
+    spans_bytes = []
+    for f, w, v in fields:
+        if f == 1 and w == 2:
+            geom_bytes = v
+        if f == 2 and w == 2:
+            spans_bytes.append(v)
+    return geom_bytes, spans_bytes
+
+def _extract_span(span_bytes):
+    span_fields = parse_proto(span_bytes)
+    start = None
+    end = None
+    t0 = None
+    t1 = None
+    for f, w, v in span_fields:
+        if f == 1 and w == 0:
+            start = int(v)
+        elif f == 2 and w == 0:
+            end = int(v)
+        elif f == 3 and w == 5:
+            t0 = b2f(v)
+        elif f == 4 and w == 5:
+            t1 = b2f(v)
+    return start, end, t0, t1, span_fields
+
+def _normalize_angle_deg(angle_deg):
+    while angle_deg <= -180.0:
+        angle_deg += 360.0
+    while angle_deg > 180.0:
+        angle_deg -= 360.0
+    if angle_deg < -90.0:
+        angle_deg += 180.0
+    if angle_deg > 90.0:
+        angle_deg -= 180.0
+    return angle_deg
+
+def _slice_text(full_text, start, end):
+    if start is None or end is None:
+        return ""
+    if start < 0 or end < 0 or start > end or end > len(full_text):
+        return ""
+    return full_text[start:end]
+
+def _range_min_max(ranges):
+    if not ranges:
+        return None, None
+    s = min(r[0] for r in ranges)
+    e = max(r[1] for r in ranges)
+    return s, e
+
+def decode_tree(paragraphs_b64, full_text, side, img_w, img_h, want_raw=True):
+    raw_dump = []
+    paragraphs = []
+
+    cursor = 0
+
+    for para_index, b64s in enumerate(paragraphs_b64):
+        par_bytes = base64.b64decode(b64s)
+        if want_raw:
+            raw_dump.append({"para_index": para_index,
+                            "b64": b64s, "bytes_hex": b2hex(par_bytes)})
+
+        item_msgs = _extract_items_from_paragraph(par_bytes)
+        items = []
+        para_ranges = []
+        para_bounds = None
+
+        for item_index, item_bytes in enumerate(item_msgs):
+            geom_bytes, spans_bytes = _extract_item_geom_spans(item_bytes)
+            if geom_bytes is None:
+                continue
+
+            p1, p2, height_norm = _get_points_from_geom(geom_bytes)
+            if p1 is None or p2 is None or height_norm is None:
+                continue
+
+            x1n, y1n = p1
+            x2n, y2n = p2
+            x1 = x1n * img_w
+            y1 = y1n * img_h
+            x2 = x2n * img_w
+            y2 = y2n * img_h
+
+            dx = x2 - x1
+            dy = y2 - y1
+            if dx < 0 or (abs(dx) < 1e-12 and dy < 0):
+                x1, y1, x2, y2 = x2, y2, x1, y1
+                x1n, y1n, x2n, y2n = x2n, y2n, x1n, y1n
+                dx = x2 - x1
+                dy = y2 - y1
+
+            L = math.hypot(dx, dy)
+            if L <= 1e-12:
+                continue
+
+            ux = dx / L
+            uy = dy / L
+
+            angle_deg_raw = math.degrees(math.atan2(dy, dx))
+            angle_deg = _normalize_angle_deg(angle_deg_raw)
+            
+            angle_deg_css = angle_deg
+
+            height_px = height_norm * img_h
+
+            item_spans = []
+            item_ranges = []
+            item_bounds = None
+
+            for span_index, sb in enumerate(spans_bytes):
+                start, end, t0, t1, _ = _extract_span(sb)
+
+                if start is None:
+                    start = cursor
+                else:
+                    cursor = max(cursor, start)
+                if end is None:
+                    continue
+                cursor = max(cursor, end)
+
+                if t0 is None and t1 is None:
+                    continue
+                if t0 is None:
+                    t0 = 0.0
+                if t1 is None:
+                    t1 = 1.0
+
+                valid_text = False
+                span_text = ""
+                if start is not None and end is not None and 0 <= start <= end <= len(full_text):
+                    span_text = full_text[start:end]
+                    valid_text = span_text.strip() != ""
+                    if valid_text:
+                        item_ranges.append((start, end))
+
+                e1x = x1 + ux * (t0 * L)
+                e1y = y1 + uy * (t0 * L)
+                e2x = x1 + ux * (t1 * L)
+                e2y = y1 + uy * (t1 * L)
+
+                cx = (e1x + e2x) / 2.0
+                cy = (e1y + e2y) / 2.0
+
+                width_px = abs(t1 - t0) * L
+                left_px = cx - width_px / 2.0
+                top_px = cy - height_px / 2.0
+
+                left = left_px / img_w
+                top = top_px / img_h
+                width = width_px / img_w
+                height = height_px / img_h
+
+                span_node = {
+                    "side": side,
+                    "para_index": para_index,
+                    "item_index": item_index,
+                    "span_index": span_index,
+                    "start_raw": start,
+                    "end_raw": end,
+                    "t0_raw": t0,
+                    "t1_raw": t1,
+                    "height_raw": height_norm,
+                    "baseline_p1": {"x": x1n, "y": y1n},
+                    "baseline_p2": {"x": x2n, "y": y2n},
+                    "box": {
+                        "left": left,
+                        "top": top,
+                        "width": width,
+                        "height": height,
+                        "rotation_deg": angle_deg,
+                        "rotation_deg_css": angle_deg_css,
+                        "center": {"x": cx / img_w, "y": cy / img_h},
+                        "left_pct": left * 100.0,
+                        "top_pct": top * 100.0,
+                        "width_pct": width * 100.0,
+                        "height_pct": height * 100.0,
+                    },
+                    "text": span_text,
+                    "valid_text": valid_text,
+                }
+
+                quad = _token_box_quad_px(span_node, img_w, img_h, pad_px=0)
+                if quad:
+                    xs = [p[0] for p in quad]
+                    ys = [p[1] for p in quad]
+                    b = (min(xs), min(ys), max(xs), max(ys))
+                    item_bounds = b if item_bounds is None else (min(item_bounds[0], b[0]), min(
+                        item_bounds[1], b[1]), max(item_bounds[2], b[2]), max(item_bounds[3], b[3]))
+                    item_bounds = item_bounds
+                item_spans.append(span_node)
+
+            s0, s1 = _range_min_max(item_ranges)
+            item_text = _slice_text(
+                full_text, s0, s1).strip() if s0 is not None else ""
+            item_valid_text = item_text.strip() != ""
+            if s0 is not None:
+                para_ranges.append((s0, s1))
+
+            cx = (x1 + x2) / 2.0
+            cy = (y1 + y2) / 2.0
+            left_px = cx - L / 2.0
+            top_px = cy - height_px / 2.0
+
+            item_box = {
+                "left": left_px / img_w,
+                "top": top_px / img_h,
+                "width": L / img_w,
+                "height": height_px / img_h,
+                "rotation_deg": angle_deg,
+                "rotation_deg_css": angle_deg_css,
+                "center": {"x": cx / img_w, "y": cy / img_h},
+            }
+
+            if item_bounds is not None:
+                para_bounds = item_bounds if para_bounds is None else (min(para_bounds[0], item_bounds[0]), min(
+                    para_bounds[1], item_bounds[1]), max(para_bounds[2], item_bounds[2]), max(para_bounds[3], item_bounds[3]))
+
+            items.append(
+                {
+                    "side": side,
+                    "para_index": para_index,
+                    "item_index": item_index,
+                    "start_raw": s0,
+                    "end_raw": s1,
+                    "text": item_text,
+                    "valid_text": item_valid_text,
+                    "height_raw": height_norm,
+                    "baseline_p1": {"x": x1n, "y": y1n},
+                    "baseline_p2": {"x": x2n, "y": y2n},
+                    "box": item_box,
+                    "bounds_px": item_bounds,
+                    "spans": item_spans,
+                }
+            )
+
+        p0, p1 = _range_min_max(para_ranges)
+        para_text = _slice_text(
+            full_text, p0, p1).strip() if p0 is not None else ""
+        para_valid_text = para_text.strip() != ""
+        paragraphs.append(
+            {
+                "side": side,
+                "para_index": para_index,
+                "start_raw": p0,
+                "end_raw": p1,
+                "text": para_text,
+                "valid_text": para_valid_text,
+                "bounds_px": para_bounds,
+                "items": items,
+            }
+        )
+
+    tree = {"side": side, "paragraphs": paragraphs}
+    return tree, raw_dump
+
+def flatten_tree_spans(tree):
+    spans = []
+    for p in tree.get("paragraphs") or []:
+        for it in p.get("items") or []:
+            for sp in it.get("spans") or []:
+                spans.append(sp)
+    return spans
+
+def flatten_tree_items_as_tokens(tree, img_w, img_h):
+    toks = []
+    for p in tree.get("paragraphs") or []:
+        for it in p.get("items") or []:
+            t = {
+                "side": it["side"],
+                "para_index": it["para_index"],
+                "item_index": it["item_index"],
+                "span_index": -1,
+                "start_raw": it.get("start_raw"),
+                "end_raw": it.get("end_raw"),
+                "t0_raw": 0.0,
+                "t1_raw": 1.0,
+                "height_raw": it.get("height_raw"),
+                "baseline_p1": it.get("baseline_p1"),
+                "baseline_p2": it.get("baseline_p2"),
+                "box": it.get("box"),
+                "text": it.get("text") or "",
+                "valid_text": it.get("valid_text", False),
+            }
+            toks.append(t)
+    return toks
+
+def _mean_angle_deg(angles_deg):
+    vals = [a for a in (angles_deg or []) if a is not None]
+    if not vals:
+        return 0.0
+    xs = [math.cos(math.radians(a)) for a in vals]
+    ys = [math.sin(math.radians(a)) for a in vals]
+    return math.degrees(math.atan2(sum(ys) / len(ys), sum(xs) / len(xs)))
+
+def _rotate_xy(x, y, cos_a, sin_a):
+    return (x * cos_a - y * sin_a, x * sin_a + y * cos_a)
+
+def _para_obb_quad_px(para_node, W, H):
+    items = para_node.get("items") or []
+    if not items:
+        return None
+
+    angles = []
+    pts = []
+    for it in items:
+        b = (it.get("box") or {})
+        angles.append(b.get("rotation_deg", 0.0))
+        q = _token_box_quad_px(it, W, H, pad_px=0)
+        if q:
+            pts.extend(q)
+
+    if len(pts) < 4:
+        return None
+
+    ang = _mean_angle_deg(angles)
+    cos_a = math.cos(math.radians(ang))
+    sin_a = math.sin(math.radians(ang))
+    cos_n = cos_a
+    sin_n = -sin_a
+
+    rpts = [_rotate_xy(x, y, cos_n, sin_n) for (x, y) in pts]
+    xs = [p[0] for p in rpts]
+    ys = [p[1] for p in rpts]
+    minx, maxx = min(xs), max(xs)
+    miny, maxy = min(ys), max(ys)
+    corners = [(minx, miny), (maxx, miny), (maxx, maxy), (minx, maxy)]
+    return [_rotate_xy(x, y, cos_a, sin_a) for (x, y) in corners]
+
+def build_level_outlines(tree, W, H):
+    outlines = []
+    if not tree:
+        return outlines
+
+    if DRAW_OUTLINE_PARA:
+        for para in tree.get("paragraphs") or []:
+            q = _para_obb_quad_px(para, W, H)
+            if q:
+                outlines.append(
+                    {"quad": q, "color": PARA_OUTLINE, "width": PARA_OUTLINE_WIDTH})
+
+    if DRAW_OUTLINE_ITEM:
+        for itok in flatten_tree_items_as_tokens(tree, W, H):
+            q = _token_box_quad_px(itok, W, H, pad_px=0)
+            if q:
+                outlines.append(
+                    {"quad": q, "color": ITEM_OUTLINE, "width": ITEM_OUTLINE_WIDTH})
+
+    return outlines
+
+def tokens_to_html(tokens, container_class="RTMDre"):
+    parts = []
+    parts.append(f'<div class="{container_class}">')
+    for t in tokens:
+        if not t.get("valid_text"):
+            continue
+        b = t["box"]
+        aria = (t.get("text") or "").replace('"', "&quot;").replace("\n", " ")
+        wi = t.get("wi", 0)
+        rot = b.get("rotation_deg_css", b.get("rotation_deg", 0.0))
+        fs = t.get("font_size_px") or b.get("font_size_px")
+        lh = None
+        if fs:
+            try:
+                lh = max(1, int(round(float(fs) * 1.05)))
+            except Exception:
+                lh = None
+        style = (
+            f'top: calc({b["top_pct"]}%); '
+            f'left: calc({b["left_pct"]}%); '
+            f'width: calc({b["width_pct"]}%); '
+            f'height: calc({b["height_pct"]}%); '
+            f"transform: rotate({rot}deg);"
+        )
+        if fs:
+            style += f" font-size: {float(fs):.4g}px;"
+        if lh:
+            style += f" line-height: {lh}px;"
+        parts.append(
+            f'<div class="IwqbBf" aria-label="{aria}" data-wi="{wi}" role="button" tabindex="-1" style="{style}"></div>'
+        )
+    parts.append("</div>")
+    return "".join(parts)
+
+def tp_overlay_css():
+    return (
+        ".tp-draw-root{position:absolute;inset:0;pointer-events:none;}"
+        ".tp-draw-scope{position:absolute;left:0;top:0;transform-origin:0 0;}"
+        ".tp-para{position:absolute;left:0;top:0;}"
+        ".tp-item{position:absolute;left:0;top:0;display:flex;align-items:center;justify-content:center;"
+        "white-space:pre;pointer-events:none;box-sizing:border-box;overflow:visible;"
+        "font-family:var(--tp-font,system-ui);font-weight:500;"
+        "color:var(--tp-fg,rgba(20,20,20,.98));"
+        "text-shadow:0 0 2px rgba(255,255,255,.90),0 0 2px rgba(0,0,0,.60),0 1px 1px rgba(0,0,0,.35);}"
+        ".tp-item>span{display:inline-block;white-space:pre;transform-origin:center;"
+        "padding:0;border-radius:3px;"
+        "background:var(--tp-bg,rgba(255,255,255,.65));"
+        "box-decoration-break:clone;-webkit-box-decoration-break:clone;}"
+        ".tp-item[data-wrap='1'],.tp-item[data-wrap='1']>span{white-space:pre-wrap;word-break:break-word;}"
+        ".tp-item[data-wrap='1']>span{text-align:center;}"
+    )
+
+def _tp_norm_list(v):
+    if isinstance(v, list):
+        return v
+    if isinstance(v, dict):
+        try:
+            return [v[k] for k in sorted(v.keys(), key=lambda x: int(x) if str(x).isdigit() else str(x))]
+        except Exception:
+            return list(v.values())
+    return []
+
+def _tp_num(x):
+    try:
+        n = float(x)
+        return n if math.isfinite(n) else None
+    except Exception:
+        return None
+
+def _tp_escape_text(s: str) -> str:
+    if not s:
+        return ""
+    s = s.replace("\r", "")
+    s = s.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
+    return s
+
+def _tp_get_rect(obj: dict, base_w: float, base_h: float):
+    if not isinstance(obj, dict):
+        return None
+    box = obj.get("box") if isinstance(obj.get("box"), dict) else {}
+
+    l0 = _tp_num(box.get("left"))
+    t0 = _tp_num(box.get("top"))
+    w0 = _tp_num(box.get("width"))
+    h0 = _tp_num(box.get("height"))
+    if None not in (l0, t0, w0, h0) and w0 > 0 and h0 > 0:
+        l = l0 * base_w
+        t = t0 * base_h
+        r = (l0 + w0) * base_w
+        b = (t0 + h0) * base_h
+        deg = _tp_num(box.get("rotation_deg_css"))
+        if deg is None:
+            deg = _tp_num(box.get("rotation_deg"))
+        return {"l": l, "t": t, "r": r, "b": b, "deg": deg or 0.0}
+
+    lp = _tp_num(box.get("left_pct"))
+    tp = _tp_num(box.get("top_pct"))
+    wp = _tp_num(box.get("width_pct"))
+    hp = _tp_num(box.get("height_pct"))
+    if None not in (lp, tp, wp, hp) and wp > 0 and hp > 0:
+        l0p = lp / 100.0
+        t0p = tp / 100.0
+        w0p = wp / 100.0
+        h0p = hp / 100.0
+        l = l0p * base_w
+        t = t0p * base_h
+        r = (l0p + w0p) * base_w
+        b = (t0p + h0p) * base_h
+        deg = _tp_num(box.get("rotation_deg_css"))
+        if deg is None:
+            deg = _tp_num(box.get("rotation_deg"))
+        return {"l": l, "t": t, "r": r, "b": b, "deg": deg or 0.0}
+
+    bpx = obj.get("bounds_px")
+    if isinstance(bpx, list) and len(bpx) == 4:
+        l = _tp_num(bpx[0])
+        t = _tp_num(bpx[1])
+        r = _tp_num(bpx[2])
+        bb = _tp_num(bpx[3])
+        if None not in (l, t, r, bb) and r > l and bb > t:
+            return {"l": l, "t": t, "r": r, "b": bb, "deg": 0.0}
+    return None
+
+def _tp_union_rect(items: list, base_w: float, base_h: float):
+    l = float("inf")
+    t = float("inf")
+    r = float("-inf")
+    b = float("-inf")
+    for it in items:
+        bx = _tp_get_rect(it, base_w, base_h)
+        if not bx:
+            continue
+        l = min(l, bx["l"])
+        t = min(t, bx["t"])
+        r = max(r, bx["r"])
+        b = max(b, bx["b"])
+    if not math.isfinite(l) or not math.isfinite(t) or not math.isfinite(r) or not math.isfinite(b):
+        return None
+    return {"l": l, "t": t, "r": r, "b": b, "deg": 0.0}
+
+def _tp_mean_item_deg(items: list, base_w: float, base_h: float) -> float:
+    angles = []
+    for it in items or []:
+        bx = _tp_get_rect(it, base_w, base_h)
+        if not bx:
+            continue
+        a = _tp_num(bx.get("deg"))
+        if a is None:
+            continue
+        angles.append(float(a))
+    if not angles:
+        return 0.0
+    return float(_mean_angle_deg(angles))
+
+def _tp_oriented_rect_from_points(pts: list, para_deg: float) -> dict | None:
+    if len(pts) < 2:
+        return None
+
+    ang = float(para_deg or 0.0)
+    if not math.isfinite(ang):
+        ang = 0.0
+
+    rad_n = math.radians(-ang)
+    cn = math.cos(rad_n)
+    sn = math.sin(rad_n)
+    rpts = [(x * cn - y * sn, x * sn + y * cn) for x, y in pts]
+    xs = [p[0] for p in rpts]
+    ys = [p[1] for p in rpts]
+    minx, maxx = min(xs), max(xs)
+    miny, maxy = min(ys), max(ys)
+
+    w = float(maxx - minx)
+    h = float(maxy - miny)
+    if w <= 0.0 or h <= 0.0:
+        return None
+
+    cx0 = float((minx + maxx) / 2.0)
+    cy0 = float((miny + maxy) / 2.0)
+    rad_a = math.radians(ang)
+    ca = math.cos(rad_a)
+    sa = math.sin(rad_a)
+    cx = (cx0 * ca) - (cy0 * sa)
+    cy = (cx0 * sa) + (cy0 * ca)
+
+    l = cx - (w / 2.0)
+    t = cy - (h / 2.0)
+    return {"l": float(l), "t": float(t), "r": float(l + w), "b": float(t + h), "deg": float(ang)}
+
+def _tp_rect_corners(l: float, t: float, r: float, b: float, deg: float) -> list:
+    w = float(r - l)
+    h = float(b - t)
+    if w <= 0.0 or h <= 0.0:
+        return []
+    cx = float((l + r) / 2.0)
+    cy = float((t + b) / 2.0)
+    hw = w / 2.0
+    hh = h / 2.0
+    rad = math.radians(float(deg or 0.0))
+    c = math.cos(rad)
+    s = math.sin(rad)
+    out = []
+    for x, y in ((-hw, -hh), (hw, -hh), (hw, hh), (-hw, hh)):
+        rx = (x * c) - (y * s)
+        ry = (x * s) + (y * c)
+        out.append((cx + rx, cy + ry))
+    return out
+
+def _tp_para_rect_from_items(items: list, base_w: float, base_h: float, para_deg: float) -> dict | None:
+    if not items:
+        return None
+
+    pts = []
+    for it in items:
+        ibx = _tp_get_rect(it, base_w, base_h)
+        if not ibx:
+            continue
+        w = float(ibx["r"] - ibx["l"])
+        h = float(ibx["b"] - ibx["t"])
+        if w <= 0.0 or h <= 0.0:
+            continue
+        deg = float(ibx.get("deg") or 0.0)
+        cx = float(ibx["l"] + w / 2.0)
+        cy = float(ibx["t"] + h / 2.0)
+        hw = w / 2.0
+        hh = h / 2.0
+        rad = math.radians(deg)
+        c = math.cos(rad)
+        s = math.sin(rad)
+        for x, y in ((-hw, -hh), (hw, -hh), (hw, hh), (-hw, hh)):
+            rx = (x * c) - (y * s)
+            ry = (x * s) + (y * c)
+            pts.append((cx + rx, cy + ry))
+
+    return _tp_oriented_rect_from_points(pts, para_deg)
+
+def _tp_extract_item_text(it: dict) -> str:
+    if not isinstance(it, dict):
+        return ""
+    for k in (
+        "text",
+        "translated_text",
+        "translatedText",
+        "ai_text",
+        "aiText",
+        "display_text",
+        "displayText",
+    ):
+        v = it.get(k)
+        if isinstance(v, str) and v:
+            return v
+    spans = _tp_norm_list(it.get("spans"))
+    if spans:
+        return "".join(s.get("text") if isinstance(s, dict) and isinstance(s.get("text"), str) else "" for s in spans)
+    return ""
+
+def ai_tree_to_tp_html(tree: dict, base_w: int, base_h: int) -> str:
+    base_w = int(base_w or 0)
+    base_h = int(base_h or 0)
+    if base_w <= 0 or base_h <= 0:
+        return ""
+    paras = _tp_norm_list(tree.get("paragraphs")
+                          if isinstance(tree, dict) else None)
+    if not paras:
+        return ""
+
+    parts = [
+        f'<div class="tp-draw-scope" style="width: {base_w}px; height: {base_h}px;">']
+    for pi, p in enumerate(paras):
+        if not isinstance(p, dict):
+            continue
+        items = _tp_norm_list(p.get("items"))
+        if len(items) > 1 and any(isinstance(x, dict) and _tp_num(x.get("item_index")) is not None for x in items):
+            items = sorted(
+                items,
+                key=lambda x: _tp_num(
+                    x.get("item_index")) if isinstance(x, dict) else 0.0,
+            )
+
+        para_idx = int(_tp_num(p.get("para_index")) or pi)
+        pbx = _tp_get_rect(p, base_w, base_h) or _tp_union_rect(
+            items, base_w, base_h)
+        if not pbx:
+            continue
+
+        para_deg = float(pbx.get("deg") or 0.0)
+        if abs(para_deg) <= 0.01:
+            derived = _tp_mean_item_deg(items, base_w, base_h)
+            if abs(derived) > 0.01:
+                pbx2 = _tp_para_rect_from_items(items, base_w, base_h, derived)
+                if pbx2:
+                    pbx = pbx2
+                    para_deg = float(pbx.get("deg") or 0.0)
+
+        pbx_items = _tp_para_rect_from_items(items, base_w, base_h, para_deg)
+        if pbx_items:
+            pts = _tp_rect_corners(
+                pbx["l"], pbx["t"], pbx["r"], pbx["b"], para_deg)
+            pts += _tp_rect_corners(pbx_items["l"], pbx_items["t"],
+                                    pbx_items["r"], pbx_items["b"], para_deg)
+            merged = _tp_oriented_rect_from_points(pts, para_deg)
+            if merged:
+                pbx = merged
+
+        eps = float(_TP_HTML_EPS_PX or 0.0)
+        if eps > 0.0:
+            pbx = {
+                "l": float(pbx["l"] - eps),
+                "t": float(pbx["t"] - eps),
+                "r": float(pbx["r"] + eps),
+                "b": float(pbx["b"] + eps),
+                "deg": float(pbx.get("deg") or para_deg or 0.0),
+            }
+
+        pw = max(0.0, pbx["r"] - pbx["l"])
+        ph = max(0.0, pbx["b"] - pbx["t"])
+
+        para_style = (
+            f'left: {pbx["l"]:.6f}px; '
+            f'top: {pbx["t"]:.6f}px; '
+            f'width: {pw:.6f}px; '
+            f'height: {ph:.6f}px;'
+        )
+        if abs(para_deg) > 0.01:
+            para_style += f' transform: rotate({para_deg:.6g}deg); transform-origin: center center;'
+
+        parts.append(
+            f'<div class="tp-para tp-para-{para_idx}" data-para-index="{para_idx}" style="{para_style}">'
+        )
+
+        para_cx = (pbx["l"] + pbx["r"]) / 2.0
+        para_cy = (pbx["t"] + pbx["b"]) / 2.0
+        inv_c = inv_s = None
+        if abs(para_deg) > 0.01:
+            rad_inv = math.radians(-para_deg)
+            inv_c = math.cos(rad_inv)
+            inv_s = math.sin(rad_inv)
+
+        raw_texts = [_tp_extract_item_text(it) for it in items]
+        mapped = list(raw_texts)
+        p_text = p.get("text") if isinstance(p.get("text"), str) else ""
+        non_empty = sum(
+            1 for t in raw_texts if isinstance(t, str) and t.strip())
+        any_nl = any(isinstance(t, str) and re.search(r"\r?\n", t)
+                     for t in raw_texts)
+        first_nl = bool(raw_texts and isinstance(
+            raw_texts[0], str) and re.search(r"\r?\n", raw_texts[0]))
+        lines = None
+        if p_text and re.search(r"\r?\n", p_text) and (non_empty <= 1 or any_nl):
+            lines = [s.rstrip()
+                     for s in re.split(r"\r?\n+", p_text) if s.strip()]
+        elif first_nl and (non_empty <= 1 or all(not (t or "").strip() for t in raw_texts[1:])):
+            lines = [s.rstrip() for s in re.split(
+                r"\r?\n+", raw_texts[0]) if s.strip()]
+        if lines:
+            mapped = [lines[i] if i < len(lines) else (
+                raw_texts[i] if i < len(raw_texts) else "") for i in range(len(items))]
+
+        for ii, it in enumerate(items):
+            if not isinstance(it, dict):
+                continue
+            text = (mapped[ii] if ii < len(mapped) else "") or ""
+            if not text.strip():
+                continue
+
+            ibx = _tp_get_rect(it, base_w, base_h)
+            if not ibx:
+                continue
+
+            w0 = max(0.0, ibx["r"] - ibx["l"])
+            h0 = max(0.0, ibx["b"] - ibx["t"])
+            if w0 <= 0 or h0 <= 0:
+                continue
+
+            w = float(w0 + (2.0 * eps)) if eps > 0.0 else float(w0)
+            h = float(h0 + (2.0 * eps)) if eps > 0.0 else float(h0)
+
+            item_idx = int(_tp_num(it.get("item_index")) or ii)
+
+            fs_raw = _tp_num(it.get("font_size_px"))
+            
+            fs = int(round(fs_raw)) if fs_raw and fs_raw > 0 else max(
+                10, int(round(h0 * 0.85)))
+            fs = max(6, min(fs, max(6, int(math.floor(h0 * 0.95)))))
+            lh = max(1, min(int(round(h0)), int(round(fs * 1.12))))
+            if inv_c is not None and inv_s is not None:
+                icx = (ibx["l"] + ibx["r"]) / 2.0
+                icy = (ibx["t"] + ibx["b"]) / 2.0
+                dx = icx - para_cx
+                dy = icy - para_cy
+                rcx = para_cx + (dx * inv_c - dy * inv_s)
+                rcy = para_cy + (dx * inv_s + dy * inv_c)
+                left = (rcx - (w / 2.0)) - pbx["l"]
+                top = (rcy - (h / 2.0)) - pbx["t"]
+            else:
+                left = (ibx["l"] - pbx["l"]) - eps
+                top = (ibx["t"] - pbx["t"]) - eps
+
+            style = (
+                f'left: {left:.6f}px; '
+                f'top: {top:.6f}px; '
+                f'width: {w:.6f}px; '
+                f'height: {h:.6f}px; '
+                f'font-size: {fs}px; '
+                f'line-height: {lh}px; '
+                'padding-bottom: 0px;'
+            )
+            deg = float(ibx.get("deg") or 0.0)
+            if inv_c is not None:
+                deg = deg - para_deg
+            if abs(deg) > 0.01:
+                style += f' transform: rotate({deg:.6g}deg); transform-origin: center center;'
+
+            wrap_attr = ' data-wrap="1"' if it.get("_tp_wrap") else ""
+            parts.append(
+                f'<div class="tp-item tp-item-{item_idx}" data-para-index="{para_idx}" data-item-index="{item_idx}"{wrap_attr} style="{style}">'
+                f'<span>{_tp_escape_text(text)}</span></div>'
+            )
+
+        parts.append("</div>")
+    parts.append("</div>")
+    return "".join(parts)
+
+def overlay_css(container_class="RTMDre", token_class="IwqbBf"):
+    c = container_class
+    t = token_class
+    return (
+        f".{c}{{"
+        "position:absolute!important;"
+        "inset:0!important;"
+        "width:100%!important;"
+        "height:100%!important;"
+        "display:block!important;"
+        "opacity:1!important;"
+        "visibility:visible!important;"
+        "pointer-events:none!important;"
+        "overflow:visible!important;"
+        "z-index:2147483647!important;"
+        "transform:none!important;"
+        "contain:layout style paint!important;"
+        "--lens-text-color:#fff;"
+        "--lens-font-family:\"Noto Sans Thai\",\"Noto Sans Thai UI\",\"Noto Sans\",system-ui,-apple-system,BlinkMacSystemFont,\"Segoe UI\",Roboto,Arial,sans-serif;"
+        "--lens-text-shadow:0 1px 2px rgba(0,0,0,.85),0 0 1px rgba(0,0,0,.85);"
+        "}}"
+        f".{c} *{{box-sizing:border-box!important;}}"
+        f".{c} .{t}{{"
+        "position:absolute!important;"
+        "display:flex!important;"
+        "align-items:center!important;"
+        "justify-content:center!important;"
+        "opacity:1!important;"
+        "visibility:visible!important;"
+        "pointer-events:none!important;"
+        "user-select:none!important;"
+        "overflow:visible!important;"
+        "white-space:pre!important;"
+        "transform-origin:top left!important;"
+        "filter:none!important;"
+        "mix-blend-mode:normal!important;"
+        "text-transform:none!important;"
+        "letter-spacing:normal!important;"
+        "}}"
+        f".{c} .{t}::before{{"
+        "content:attr(aria-label)!important;"
+        "display:block!important;"
+        "white-space:pre!important;"
+        "color:var(--lens-text-color)!important;"
+        "font-family:var(--lens-font-family)!important;"
+        "text-shadow:var(--lens-text-shadow)!important;"
+        "font-weight:400!important;"
+        "font-style:normal!important;"
+        "line-height:inherit!important;"
+        "text-rendering:geometricPrecision!important;"
+        "}}"
+    )
+
+def ensure_font(path, urls):
+    key = str(path or "")
+    cached = _FONT_RESOLVE_CACHE.get(key)
+    if cached is not None:
+        return cached or None
+
+    if path and os.path.isfile(path):
+        _FONT_RESOLVE_CACHE[key] = path
+        return path
+
+    candidates = []
+    for root in ("/usr/share/fonts", "/usr/local/share/fonts", os.path.expanduser("~/.fonts")):
+        if os.path.isdir(root):
+            for p in os.walk(root):
+                for fn in p[2]:
+                    if fn.lower() == os.path.basename(path).lower():
+                        candidates.append(os.path.join(p[0], fn))
+    if candidates:
+        _FONT_RESOLVE_CACHE[key] = candidates[0]
+        return candidates[0]
+
+    for url in urls:
+        try:
+            r = httpx.get(url, timeout=30)
+            if r.status_code == 200 and len(r.content) > 10000:
+                with open(path, "wb") as f:
+                    f.write(r.content)
+                if os.path.isfile(path):
+                    _FONT_RESOLVE_CACHE[key] = path
+                    return path
+        except Exception:
+            pass
+    _FONT_RESOLVE_CACHE[key] = ""
+    return None
+
+def pick_font(text, thai_path, latin_path, size):
+    def has_thai(s):
+        for ch in s:
+            o = ord(ch)
+            if 0x0E00 <= o <= 0x0E7F:
+                return True
+        return False
+
+    fp = thai_path if has_thai(text) else latin_path
+    if fp and os.path.isfile(fp):
+        try:
+            return ImageFont.truetype(fp, size=size, layout_engine=getattr(ImageFont, "LAYOUT_RAQM", 0))
+        except Exception:
+            try:
+                return ImageFont.truetype(fp, size=size)
+            except Exception:
+                pass
+    return ImageFont.load_default()
+
+def _get_font_pair(thai_path, latin_path, size):
+    key = (str(thai_path or ""), str(latin_path or ""), int(size))
+    v = _FONT_PAIR_CACHE.get(key)
+    if v:
+        return v
+    f_th = pick_font("ก", thai_path, latin_path, size)
+    f_lat = pick_font("A", thai_path, latin_path, size)
+    _FONT_PAIR_CACHE[key] = (f_th, f_lat)
+    return f_th, f_lat
+
+def _is_thai_char(ch: str) -> bool:
+    if not ch:
+        return False
+    o = ord(ch)
+    return 0x0E00 <= o <= 0x0E7F
+
+def _split_runs_for_fallback(text: str):
+    runs = []
+    cur = []
+    cur_is_th = None
+    for ch in text:
+        if ch == "\n":
+            if cur:
+                runs.append(("".join(cur), cur_is_th))
+                cur = []
+            runs.append(("\n", None))
+            cur_is_th = None
+            continue
+        is_th = _is_thai_char(ch)
+        if ch.isspace() and cur_is_th is not None:
+            is_th = cur_is_th
+        if cur_is_th is None:
+            cur_is_th = is_th
+            cur = [ch]
+            continue
+        if is_th == cur_is_th:
+            cur.append(ch)
+        else:
+            runs.append(("".join(cur), cur_is_th))
+            cur = [ch]
+            cur_is_th = is_th
+    if cur:
+        runs.append(("".join(cur), cur_is_th))
+    return runs
+
+def _draw_text_centered_fallback(draw_ctx, center_xy, text, thai_path, latin_path, size, fill):
+    t = _sanitize_draw_text(text)
+    if not t:
+        return
+    f_th, f_lat = _get_font_pair(thai_path, latin_path, size)
+    runs = _split_runs_for_fallback(t)
+
+    x = 0.0
+    min_t = 0.0
+    max_b = 0.0
+    for run, is_th in runs:
+        if run == "\n":
+            continue
+        f = f_th if is_th else f_lat
+        try:
+            bb = draw_ctx.textbbox((x, 0), run, font=f, anchor="ls")
+            min_t = min(min_t, float(bb[1]))
+            max_b = max(max_b, float(bb[3]))
+            x = float(bb[2])
+        except Exception:
+            try:
+                w, h = draw_ctx.textsize(run, font=f)
+            except Exception:
+                w, h = (len(run) * size * 0.5, size)
+            min_t = min(min_t, -float(h) * 0.8)
+            max_b = max(max_b, float(h) * 0.2)
+            x += float(w)
+
+    total_w = max(1.0, x)
+    total_h = max(1.0, max_b - min_t)
+
+    cx, cy = center_xy
+    start_x = float(cx) - (total_w / 2.0)
+    baseline_y = float(cy) - (total_h / 2.0) - min_t
+
+    x = start_x
+    for run, is_th in runs:
+        if run == "\n":
+            continue
+        f = f_th if is_th else f_lat
+        draw_ctx.text((x, baseline_y), run, font=f, fill=fill, anchor="ls")
+        try:
+            x += float(draw_ctx.textlength(run, font=f))
+        except Exception:
+            try:
+                w, _ = draw_ctx.textsize(run, font=f)
+            except Exception:
+                w = len(run) * size * 0.5
+            x += float(w)
+
+def _draw_text_baseline_fallback(draw, pos, text, thai_path, latin_path, size, fill):
+    t = _sanitize_draw_text(text)
+    if not t:
+        return 0.0, 0.0
+    f_th, f_lat = _get_font_pair(thai_path, latin_path, size)
+    runs = _split_runs_for_fallback(t)
+
+    x0, y0 = pos
+    x = float(x0)
+    max_ascent = 0
+    max_descent = 0
+
+    for run, is_th in runs:
+        if run == "\n":
+            continue
+        f = f_th if is_th else f_lat
+        try:
+            ascent, descent = f.getmetrics()
+        except Exception:
+            ascent, descent = size, int(size * 0.25)
+        max_ascent = max(max_ascent, ascent)
+        max_descent = max(max_descent, descent)
+
+        draw.text((x, y0), run, font=f, fill=fill, anchor="ls")
+        try:
+            adv = float(f.getlength(run))
+        except Exception:
+            tmp = Image.new("RGBA", (10, 10), (0, 0, 0, 0))
+            dtmp = ImageDraw.Draw(tmp)
+            try:
+                bb = dtmp.textbbox((0, 0), run, font=f, anchor="ls")
+                adv = float(bb[2] - bb[0])
+            except Exception:
+                w, _ = dtmp.textsize(run, font=f)
+                adv = float(w)
+        x += adv
+
+    return float(x - x0), float(max_ascent + max_descent)
+
+def _baseline_offset_px_for_text(text: str, thai_path: str, latin_path: str, size: int):
+    t = _sanitize_draw_text(text)
+    if not t:
+        return None
+    f_th, f_lat = _get_font_pair(thai_path, latin_path, size)
+    runs = _split_runs_for_fallback(t)
+
+    tmp = Image.new("RGBA", (16, 16), (0, 0, 0, 0))
+    dtmp = ImageDraw.Draw(tmp)
+
+    x = 0.0
+    min_t = 0.0
+    max_b = 0.0
+    for run, is_th in runs:
+        if run == "\n":
+            continue
+        f = f_th if is_th else f_lat
+        try:
+            bb = dtmp.textbbox((x, 0), run, font=f, anchor="ls")
+            min_t = min(min_t, float(bb[1]))
+            max_b = max(max_b, float(bb[3]))
+            x = float(bb[2])
+        except Exception:
+            try:
+                w, h = dtmp.textsize(run, font=f)
+            except Exception:
+                w, h = (len(run) * size * 0.5, size)
+            min_t = min(min_t, -float(h) * 0.8)
+            max_b = max(max_b, float(h) * 0.2)
+            x += float(w)
+
+    total_h = max(1.0, max_b - min_t)
+    baseline_offset = -(total_h / 2.0) - min_t
+    return baseline_offset, total_h
+
+def _line_metrics_px(text: str, thai_path: str, latin_path: str, size: int):
+    t = _sanitize_draw_text(text)
+    if not t:
+        return None
+    f_th, f_lat = _get_font_pair(thai_path, latin_path, size)
+    runs = _split_runs_for_fallback(t)
+
+    tmp = Image.new("RGBA", (16, 16), (0, 0, 0, 0))
+    dtmp = ImageDraw.Draw(tmp)
+
+    x = 0.0
+    min_t = 0.0
+    max_b = 0.0
+    for run, is_th in runs:
+        if run == "\n":
+            continue
+        f = f_th if is_th else f_lat
+        try:
+            bb = dtmp.textbbox((x, 0), run, font=f, anchor="ls")
+            min_t = min(min_t, float(bb[1]))
+            max_b = max(max_b, float(bb[3]))
+            x = float(bb[2])
+        except Exception:
+            try:
+                w, h = dtmp.textsize(run, font=f)
+            except Exception:
+                w, h = (len(run) * size * 0.5, size)
+            min_t = min(min_t, -float(h) * 0.8)
+            max_b = max(max_b, float(h) * 0.2)
+            x += float(w)
+
+    width = max(1.0, x)
+    total_h = max(1.0, max_b - min_t)
+    baseline_to_center = -((min_t + max_b) / 2.0)
+    return width, total_h, baseline_to_center
+
+def _item_avail_w_px(item: dict, W: int, H: int) -> float:
+    b = item.get("box") or {}
+    w_box = float(b.get("width") or 0.0) * float(W)
+    
+    L = 0.0
+    p1 = item.get("baseline_p1") or {}
+    p2 = item.get("baseline_p2") or {}
+    if ("x" in p1 and "y" in p1 and "x" in p2 and "y" in p2):
+        dx = (float(p2.get("x") or 0.0) - float(p1.get("x") or 0.0)) * float(W)
+        dy = (float(p2.get("y") or 0.0) - float(p1.get("y") or 0.0)) * float(H)
+        L = float(math.hypot(dx, dy))
+
+    avail = max(w_box, L)
+    return max(1.0, float(avail))
+
+def _item_avail_h_px(item: dict, H: int) -> float:
+    b = item.get("box") or {}
+    return max(1.0, (float(b.get("height") or 0.0) * float(H)) - 2.0)
+
+def _item_line_text(item: dict) -> str:
+    t = str(item.get("text") or "")
+    if t.strip():
+        return t
+    spans = item.get("spans") or []
+    return "".join(str(s.get("text") or "") for s in spans)
+
+def _compute_fit_size_px_for_item(item: dict, thai_path: str, latin_path: str, W: int, H: int, base_size: int = 96) -> int | None:
+    item.pop("_tp_wrap", None)
+    text = _item_line_text(item)
+    if not text.strip():
+        return None
+    m = _line_metrics_px(text, thai_path, latin_path, base_size)
+    if m is None:
+        return None
+    tw, th, _ = m
+    avail_w = _item_avail_w_px(item, W, H)
+    avail_h = _item_avail_h_px(item, H)
+    if tw <= 1e-6 or th <= 1e-6:
+        return None
+
+    is_thai = any(_is_thai_char(ch) for ch in text)
+    scale_w = (avail_w * 0.98) / tw
+    scale_h = (avail_h * (0.90 if is_thai else 0.94)) / th
+    scale = min(scale_w, scale_h)
+    if scale <= 0:
+        return None
+
+    size = max(10, int(base_size * scale))
+
+    while size > 10:
+        mm = _line_metrics_px(text, thai_path, latin_path, size)
+        if mm is None:
+            return None
+        tw2, th2, _ = mm
+        if (tw2 <= avail_w * 0.999) and (th2 <= avail_h * 0.999):
+            break
+        size -= 1
+        
+    if size <= 12 and avail_h >= 24:
+        tw0, th0, _ = m
+        if tw0 > (avail_w * 1.2):
+            def _wrap_fits(s: int) -> bool:
+                if s <= 0:
+                    return False
+                k = float(s) / float(base_size)
+                tw = float(tw0) * k
+                th = float(th0) * k
+                lines = int(math.ceil(max(1.0, tw) / max(1.0, avail_w)))
+                return (float(lines) * th) <= float(avail_h)
+
+            hi = int(min(max(16, avail_h), base_size * 3))
+            lo = int(size)
+            best = int(size)
+            while lo <= hi:
+                mid = (lo + hi) // 2
+                if _wrap_fits(mid):
+                    best = int(mid)
+                    lo = mid + 1
+                else:
+                    hi = mid - 1
+
+            if best >= int(size * 1.25):
+                item["_tp_wrap"] = True
+                size = int(best)
+
+    return int(size)
+
+def fit_tree_font_sizes_for_tp_html(tree: dict, thai_path: str, latin_path: str, W: int, H: int) -> dict:
+    paras = tree.get("paragraphs") or []
+    for p in paras:
+        items = p.get("items") or []
+        if not items:
+            continue
+
+        per_item_fit: dict[int, int] = {}
+        fits: list[int] = []
+
+        for i, it in enumerate(items):
+            s = _compute_fit_size_px_for_item(it, thai_path, latin_path, W, H)
+            if s is None:
+                continue
+            per_item_fit[i] = int(s)
+            fits.append(int(s))
+
+        if not fits:
+            continue
+        
+        fits.sort()
+        p["para_font_size_px"] = int(fits[len(fits) // 2])
+
+        for i, it in enumerate(items):
+            fs = per_item_fit.get(i)
+            if fs is None:
+                continue
+            it["font_size_px"] = int(fs)
+            for sp in (it.get("spans") or []):
+                sp["font_size_px"] = int(fs)
+
+    return tree
+
+def _iter_paragraphs(tree: dict):
+    ps = (tree or {}).get("paragraphs") or []
+    for i, p in enumerate(ps):
+        yield i, p
+
+def _apply_para_font_size(tree: dict, para_sizes: dict[int, int]):
+    if not tree:
+        return
+    for pi, p in _iter_paragraphs(tree):
+        sz = para_sizes.get(pi)
+        if not sz:
+            continue
+        p["para_font_size_px"] = int(sz)
+        for it in (p.get("items") or []):
+            it["font_size_px"] = int(sz)
+            for sp in (it.get("spans") or []):
+                sp["font_size_px"] = int(sz)
+
+def _compute_shared_para_sizes(trees: list[dict], thai_path: str, latin_path: str, W: int, H: int) -> dict[int, int]:
+    sizes: dict[int, int] = {}
+    for tree in trees:
+        if not tree:
+            continue
+        for pi, p in _iter_paragraphs(tree):
+            for it in (p.get("items") or []):
+                fit = _compute_fit_size_px_for_item(
+                    it, thai_path, latin_path, W, H)
+                if fit is None:
+                    continue
+                cur = sizes.get(pi)
+                sizes[pi] = fit if cur is None else min(cur, fit)
+
+    vals = [v for v in sizes.values() if isinstance(v, int) and v > 0]
+    if not vals:
+        return sizes
+    vals.sort()
+    mid = len(vals) // 2
+    target = vals[mid] if (len(vals) % 2 == 1) else int(
+        round((vals[mid - 1] + vals[mid]) / 2))
+    for k in list(sizes.keys()):
+        try:
+            sizes[k] = int(min(int(sizes[k]), int(target)))
+        except Exception:
+            pass
+    return sizes
+
+def _sanitize_draw_text(s: str) -> str:
+    t = (s or "").replace("\r\n", "\n").replace("\r", "\n")
+    t = t.replace("\u200b", "").replace("\ufeff", "")
+    t = "".join(ch for ch in t if (ch == "\n") or (
+        unicodedata.category(ch)[0] != "C"))
+    return t
+
+def _token_box_px(t, W, H, pad_px=0):
+    b = t.get("box") or {}
+    left = int(round(float(b.get("left", 0.0)) * W)) - pad_px
+    top = int(round(float(b.get("top", 0.0)) * H)) - pad_px
+    right = int(round((float(b.get("left", 0.0)) +
+                float(b.get("width", 0.0))) * W)) + pad_px
+    bottom = int(
+        round((float(b.get("top", 0.0)) + float(b.get("height", 0.0))) * H)) + pad_px
+    left = max(0, min(W, left))
+    top = max(0, min(H, top))
+    right = max(0, min(W, right))
+    bottom = max(0, min(H, bottom))
+    if right <= left or bottom <= top:
+        return None
+    return left, top, right, bottom
+
+def _token_quad_px(t, W, H, pad_px=0, apply_baseline_shift=True):
+    if not t.get("valid_text"):
+        return None
+
+    p1 = t.get("baseline_p1") or {}
+    p2 = t.get("baseline_p2") or {}
+    x1 = float(p1.get("x", 0.0)) * W
+    y1 = float(p1.get("y", 0.0)) * H
+    x2 = float(p2.get("x", 0.0)) * W
+    y2 = float(p2.get("y", 0.0)) * H
+
+    dx = x2 - x1
+    dy = y2 - y1
+    if dx < 0 or (abs(dx) < 1e-12 and dy < 0):
+        x1, y1, x2, y2 = x2, y2, x1, y1
+        dx = x2 - x1
+        dy = y2 - y1
+
+    L = math.hypot(dx, dy)
+    if L <= 1e-9:
+        return None
+
+    ux = dx / L
+    uy = dy / L
+
+    nx = -uy
+    ny = ux
+    if ny < 0:
+        nx, ny = -nx, -ny
+
+    t0 = float(t.get("t0_raw") if t.get("t0_raw") is not None else 0.0)
+    t1 = float(t.get("t1_raw") if t.get("t1_raw") is not None else 1.0)
+
+    sx = x1 + ux * (t0 * L)
+    sy = y1 + uy * (t0 * L)
+    ex = x1 + ux * (t1 * L)
+    ey = y1 + uy * (t1 * L)
+
+    h = max(1.0, float(t.get("height_raw") or 0.0) * H)
+    if apply_baseline_shift and BASELINE_SHIFT:
+        shift = h * BASELINE_SHIFT_FACTOR
+        sx += nx * shift
+        sy += ny * shift
+        ex += nx * shift
+        ey += ny * shift
+
+    pad = max(0.0, float(pad_px))
+    sx -= ux * pad
+    sy -= uy * pad
+    ex += ux * pad
+    ey += uy * pad
+
+    hh = (h / 2.0) + pad
+    ox = nx * hh
+    oy = ny * hh
+
+    return [(sx - ox, sy - oy), (ex - ox, ey - oy), (ex + ox, ey + oy), (sx + ox, sy + oy)]
+
+def _token_box_quad_px(t, W, H, pad_px=0):
+    b = t.get("box") or {}
+    w = float(b.get("width", 0.0)) * W
+    h = float(b.get("height", 0.0)) * H
+    if w <= 0.0 or h <= 0.0:
+        return None
+
+    left = float(b.get("left", 0.0)) * W
+    top = float(b.get("top", 0.0)) * H
+    cx = left + (w / 2.0)
+    cy = top + (h / 2.0)
+
+    hw = (w / 2.0) + float(pad_px)
+    hh = (h / 2.0) + float(pad_px)
+
+    angle_deg = float(b.get("rotation_deg", 0.0))
+    rad = math.radians(angle_deg)
+    c = math.cos(rad)
+    s = math.sin(rad)
+
+    corners = [(-hw, -hh), (hw, -hh), (hw, hh), (-hw, hh)]
+    out = []
+    for x, y in corners:
+        rx = (x * c) - (y * s)
+        ry = (x * s) + (y * c)
+        out.append((cx + rx, cy + ry))
+    return out
+
+def _quad_bbox(quad, W, H):
+    xs = [p[0] for p in quad]
+    ys = [p[1] for p in quad]
+    l = max(0, min(W, int(math.floor(min(xs)))))
+    t = max(0, min(H, int(math.floor(min(ys)))))
+    r = max(0, min(W, int(math.ceil(max(xs)))))
+    b = max(0, min(H, int(math.ceil(max(ys)))))
+    if r <= l or b <= t:
+        return None
+    return l, t, r, b
+
+def _median_rgba(pixels):
+    if not pixels:
+        return None
+    rs = sorted(p[0] for p in pixels)
+    gs = sorted(p[1] for p in pixels)
+    bs = sorted(p[2] for p in pixels)
+    a = 255
+    mid = len(rs) // 2
+    return (rs[mid], gs[mid], bs[mid], a)
+
+def _rel_luminance(rgb):
+    r, g, b = rgb
+
+    def lin(c):
+        c = c / 255.0
+        return c / 12.92 if c <= 0.04045 else ((c + 0.055) / 1.055) ** 2.4
+    return 0.2126 * lin(r) + 0.7152 * lin(g) + 0.0722 * lin(b)
+
+def _contrast_ratio(l1, l2):
+    a = max(l1, l2) + 0.05
+    b = min(l1, l2) + 0.05
+    return a / b
+
+def _pick_bw_text_color(bg_rgb):
+    Lb = _rel_luminance(bg_rgb)
+    c_black = _contrast_ratio(Lb, 0.0)
+    c_white = _contrast_ratio(Lb, 1.0)
+    return TEXT_COLOR_LIGHT if c_white >= c_black else TEXT_COLOR_DARK
+
+def _sample_bg_color_from_quad(base_rgb, quad, rect, border_px=3, margin_px=6):
+    l, t, r, b = rect
+    w = r - l
+    h = b - t
+    if w <= 0 or h <= 0:
+        return _sample_bg_color(base_rgb, rect, margin_px)
+    mask = Image.new("L", (w, h), 0)
+    d = ImageDraw.Draw(mask)
+    qrel = [(x - l, y - t) for x, y in quad]
+    d.polygon(qrel, fill=255)
+    bp = int(max(0, border_px or 0))
+    if bp > 0:
+        k = min(w, h)
+        bp = min(bp, max(1, (k - 1) // 2))
+    if bp > 0:
+        er = mask.filter(ImageFilter.MinFilter(size=bp * 2 + 1))
+        border = ImageChops.subtract(mask, er)
+    else:
+        border = mask
+    region = base_rgb.crop((l, t, r, b))
+    rp = list(region.getdata())
+    mp = list(border.getdata())
+    samples = [p for p, m in zip(rp, mp) if m > 0]
+    if len(samples) < 24:
+        ext = _sample_bg_color(base_rgb, rect, margin_px)
+        return ext
+    med = _median_rgba(samples)
+    if med:
+        return med[:3]
+    return _sample_bg_color(base_rgb, rect, margin_px)
+
+def _sample_bg_color(base_rgb, rect, margin_px):
+    W, H = base_rgb.size
+    l, t, r, b = rect
+    m = max(1, int(margin_px))
+    samples = []
+
+    def add_strip(x0, y0, x1, y1):
+        x0 = max(0, min(W, x0))
+        y0 = max(0, min(H, y0))
+        x1 = max(0, min(W, x1))
+        y1 = max(0, min(H, y1))
+        if x1 <= x0 or y1 <= y0:
+            return
+        samples.extend(list(base_rgb.crop((x0, y0, x1, y1)).getdata()))
+    add_strip(l, t - m, r, t)
+    add_strip(l, b, r, b + m)
+    add_strip(l - m, t, l, b)
+    add_strip(r, t, r + m, b)
+    med = _median_rgba(samples)
+    if med:
+        return med[:3]
+    return base_rgb.getpixel((max(0, min(W - 1, l)), max(0, min(H - 1, t))))
+
+def _sample_bg_color_from_quad_ring(base_rgb, quad, rect, ring_px=4):
+    W, H = base_rgb.size
+    l, t, r, b = rect
+    w = r - l
+    h = b - t
+    if w <= 0 or h <= 0:
+        return None
+
+    mask = np.zeros((h, w), dtype=np.uint8)
+    pts = np.array([[(x - l, y - t) for x, y in quad]], dtype=np.int32)
+    cv2.fillPoly(mask, pts, 255)
+
+    rp = int(max(1, ring_px or 1))
+    k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (rp * 2 + 1, rp * 2 + 1))
+    dil = cv2.dilate(mask, k, iterations=1)
+    ring = cv2.bitwise_and(dil, cv2.bitwise_not(mask))
+
+    rgb = np.array(base_rgb.crop((l, t, r, b)).convert("RGB"), dtype=np.uint8)
+    sel = rgb[ring > 0]
+    if sel.size < 24:
+        return None
+    med = np.median(sel, axis=0)
+    return (int(med[0]), int(med[1]), int(med[2]))
+
+def _pixelate(img, block_px):
+    w, h = img.size
+    if w <= 1 or h <= 1:
+        return img
+    block_px = int(block_px or 1)
+    if block_px < 1:
+        block_px = 1
+    sw = max(1, w // block_px)
+    sh = max(1, h // block_px)
+    return img.resize((sw, sh), resample=Image.NEAREST).resize((w, h), resample=Image.NEAREST)
+
+def _mean_abs_diff(a, b):
+    if a.size != b.size:
+        return 1e18
+    a = a.convert("RGB")
+    b = b.convert("RGB")
+    da = list(a.getdata())
+    db = list(b.getdata())
+    if not da:
+        return 1e18
+    s = 0
+    for (ar, ag, ab), (br, bg, bb) in zip(da, db):
+        s += abs(ar - br) + abs(ag - bg) + abs(ab - bb)
+    return s / (len(da) * 3)
+
+def _resize_small(img, max_w=64, max_h=64):
+    w, h = img.size
+    if w <= 0 or h <= 0:
+        return img
+    scale = min(max_w / w, max_h / h, 1.0)
+    nw = max(1, int(w * scale))
+    nh = max(1, int(h * scale))
+    return img.resize((nw, nh), resample=Image.BILINEAR)
+
+def _clone_candidate_score(base, rect, cand_rect, direction, border_px):
+    W, H = base.size
+    l, t, r, b = rect
+    cl, ct, cr, cb = cand_rect
+    w = r - l
+    h = b - t
+    if w <= 1 or h <= 1:
+        return 1e18
+    border_px = max(1, int(border_px or 1))
+    if direction == "up":
+        a = base.crop((l, max(0, t - border_px), r, t))
+        d = base.crop((cl, max(0, cb - border_px), cr, cb))
+    elif direction == "down":
+        a = base.crop((l, b, r, min(H, b + border_px)))
+        d = base.crop((cl, ct, cr, min(H, ct + border_px)))
+    elif direction == "left":
+        a = base.crop((max(0, l - border_px), t, l, b))
+        d = base.crop((max(0, cr - border_px), ct, cr, cb))
+    else:
+        a = base.crop((r, t, min(W, r + border_px), b))
+        d = base.crop((cl, ct, min(W, cl + border_px), cb))
+    a = _resize_small(a, 64, 16)
+    d = _resize_small(d, 64, 16)
+    return _mean_abs_diff(a, d)
+
+def _choose_clone_rect(base, rect, gap_px, border_px):
+    W, H = base.size
+    l, t, r, b = rect
+    w = r - l
+    h = b - t
+    gap_px = max(0, int(gap_px or 0))
+    cands = []
+    up = (l, t - gap_px - h, r, t - gap_px)
+    down = (l, b + gap_px, r, b + gap_px + h)
+    left = (l - gap_px - w, t, l - gap_px, b)
+    right = (r + gap_px, t, r + gap_px + w, b)
+    for direction, (cl, ct, cr, cb) in [("up", up), ("down", down), ("left", left), ("right", right)]:
+        if cl < 0 or ct < 0 or cr > W or cb > H:
+            continue
+        cand_rect = (cl, ct, cr, cb)
+        score = _clone_candidate_score(
+            base, rect, cand_rect, direction, border_px)
+        cands.append((score, cand_rect))
+    if not cands:
+        return None
+    cands.sort(key=lambda x: x[0])
+    return cands[0][1]
+
+def _erase_with_clone(base, rect, mask, gap_px, border_px, feather_px):
+    l, t, r, b = rect
+    cand = _choose_clone_rect(base, rect, gap_px, border_px)
+    if not cand:
+        return False
+    cl, ct, cr, cb = cand
+    donor = base.crop((cl, ct, cr, cb))
+    region = base.crop((l, t, r, b))
+    feather_px = max(0, int(feather_px or 0))
+    if feather_px > 0:
+        m = mask.filter(ImageFilter.GaussianBlur(radius=feather_px))
+    else:
+        m = mask
+    merged = Image.composite(donor, region, m)
+    base.paste(merged, (l, t))
+    return True
+
+def _erase_with_blend_patches(base, rect, mask, gap_px=3, feather_px=4):
+    l, t, r, b = rect
+    W, H = base.size
+    w = r - l
+    h = b - t
+    if w <= 2 or h <= 2:
+        return False
+    gap = int(max(0, gap_px))
+    candidates = []
+    dirs = [(0, -(h + gap)), (0, (h + gap)), (-(w + gap), 0), ((w + gap), 0),
+            (-(w + gap), -(h + gap)), ((w + gap), -(h + gap)), (-(w + gap), (h + gap)), ((w + gap), (h + gap))]
+    for dx, dy in dirs:
+        ll = l + dx
+        tt = t + dy
+        rr = ll + w
+        bb = tt + h
+        if ll < 0 or tt < 0 or rr > W or bb > H:
+            continue
+        candidates.append(base.crop((ll, tt, rr, bb)).convert("RGB"))
+    if not candidates:
+        return False
+    acc = candidates[0]
+    for c in candidates[1:]:
+        acc = ImageChops.add(acc, c, scale=1.0, offset=0)
+    n = len(candidates)
+    blended = acc.point(lambda p: int(p / n))
+    m = mask
+    fp = int(max(0, feather_px))
+    if fp > 0:
+        m = m.filter(ImageFilter.GaussianBlur(radius=fp))
+    region = base.crop((l, t, r, b)).convert("RGB")
+    merged = Image.composite(blended, region, m)
+    base.paste(merged, (l, t))
+    return True
+
+def _erase_with_inpaint(base, box_tokens, pad_px=2):
+    if not box_tokens:
+        return base
+
+    rgb = base.convert("RGB")
+    W, H = rgb.size
+    mask = Image.new("L", (W, H), 0)
+    d = ImageDraw.Draw(mask)
+    for t in box_tokens:
+        quad = _token_box_quad_px(t, W, H, pad_px=pad_px)
+        if not quad:
+            quad = _token_quad_px(t, W, H, pad_px=pad_px,
+                                  apply_baseline_shift=True)
+        if not quad:
+            rect = _token_box_px(t, W, H, pad_px=pad_px)
+            if not rect:
+                continue
+            l, tt, r, bb = rect
+            quad = [(l, tt), (r, tt), (r, bb), (l, bb)]
+        d.polygon(quad, fill=255)
+
+    m = np.array(mask, dtype=np.uint8)
+    ys, xs = np.where(m > 0)
+    if xs.size == 0 or ys.size == 0:
+        return rgb
+
+    l = int(max(0, xs.min() - 8))
+    t = int(max(0, ys.min() - 8))
+    r = int(min(W, xs.max() + 1 + 8))
+    b = int(min(H, ys.max() + 1 + 8))
+    if r <= l or b <= t:
+        return rgb
+
+    crop_rgb = np.array(rgb.crop((l, t, r, b)), dtype=np.uint8)
+    crop_m = m[t:b, l:r]
+    dpx = int(max(0, INPAINT_DILATE_PX or 0))
+    if dpx > 0:
+        k = cv2.getStructuringElement(
+            cv2.MORPH_ELLIPSE, (dpx * 2 + 1, dpx * 2 + 1))
+        crop_m = cv2.dilate(crop_m, k, iterations=1)
+
+    bgr = cv2.cvtColor(crop_rgb, cv2.COLOR_RGB2BGR)
+    method = (INPAINT_METHOD or "telea").strip().lower()
+    flag = cv2.INPAINT_TELEA if method in ("telea", "t") else cv2.INPAINT_NS
+    radius = float(INPAINT_RADIUS or 3)
+    out_bgr = cv2.inpaint(bgr, crop_m, radius, flag)
+    out_rgb = cv2.cvtColor(out_bgr, cv2.COLOR_BGR2RGB)
+
+    out = rgb.copy()
+    out.paste(Image.fromarray(out_rgb), (l, t))
+    return out
+
+def erase_text_with_boxes(img, box_tokens, pad_px=2, sample_margin_px=6, mode=None, mosaic_block_px=None):
+    if not box_tokens:
+        return img
+    mode = (mode or ERASE_MODE or "solid").strip().lower()
+    mosaic_block_px = int(mosaic_block_px or ERASE_MOSAIC_BLOCK_PX or 10)
+    base = img.convert("RGB").copy()
+    if mode in ("inpaint", "cv2", "opencv"):
+        return _erase_with_inpaint(base, box_tokens, pad_px=pad_px)
+    W, H = base.size
+    for t in box_tokens:
+        quad = _token_box_quad_px(t, W, H, pad_px=pad_px)
+        if not quad:
+            quad = _token_quad_px(t, W, H, pad_px=pad_px,
+                                  apply_baseline_shift=True)
+        if not quad:
+            rect = _token_box_px(t, W, H, pad_px=pad_px)
+            if not rect:
+                continue
+            l, tt, r, bb = rect
+            quad = [(l, tt), (r, tt), (r, bb), (l, bb)]
+
+        rect = _quad_bbox(quad, W, H)
+        if not rect:
+            continue
+
+        l, tt, r, bb = rect
+        region = base.crop((l, tt, r, bb))
+        mask = Image.new("L", (r - l, bb - tt), 0)
+        mdraw = ImageDraw.Draw(mask)
+        qrel = [(x - l, y - tt) for x, y in quad]
+        mdraw.polygon(qrel, fill=255)
+
+        if mode in ("blend_patch", "blend", "avg_patch", "patch"):
+            ok = _erase_with_blend_patches(
+                base, rect, mask, ERASE_BLEND_GAP_PX, ERASE_BLEND_FEATHER_PX)
+            if ok:
+                continue
+            mode = "solid"
+
+        if mode == "clone":
+            ok = _erase_with_clone(
+                base, rect, mask, ERASE_CLONE_GAP_PX, ERASE_CLONE_BORDER_PX, ERASE_CLONE_FEATHER_PX)
+            if ok:
+                continue
+            mode = "solid"
+
+        if mode == "mosaic":
+            pixelated = _pixelate(region, mosaic_block_px)
+            merged = Image.composite(pixelated, region, mask)
+            base.paste(merged, (l, tt))
+        else:
+            color = _sample_bg_color_from_quad(
+                base, quad, rect, BG_SAMPLE_BORDER_PX, sample_margin_px)
+            region.paste(color, mask=mask)
+            base.paste(region, (l, tt))
+    return base
+
+def draw_overlay(img, tokens, out_path, thai_path, latin_path, level_outlines=None, font_scale: float = 1.0, fit_to_box: bool = True):
+    base = img.convert("RGBA")
+    base_rgb = img.convert("RGB")
+    overlay = Image.new("RGBA", base.size, (0, 0, 0, 0))
+    draw = ImageDraw.Draw(overlay)
+
+    for ol in (level_outlines or []):
+        q = ol.get("quad")
+        if not q:
+            continue
+        col = ol.get("color", BOX_OUTLINE)
+        w = int(ol.get("width", 2))
+        draw.line(q + [q[0]], fill=col, width=w)
+
+    W, H = base.size
+
+    for t in tokens:
+        b = t.get("box") or {}
+        box_quad = _token_box_quad_px(t, W, H, pad_px=0)
+        use_box_center = False
+        if box_quad:
+            lq, tq, rq, bq = _quad_bbox(box_quad, W, H)
+            box_cx = (lq + rq) / 2.0
+            box_cy = (tq + bq) / 2.0
+            box_w = max(1.0, float(rq - lq))
+            box_h = max(1.0, float(bq - tq))
+            use_box_center = True
+        else:
+            left0 = float(b.get("left", 0.0)) * W
+            top0 = float(b.get("top", 0.0)) * H
+            box_w = max(1.0, float(b.get("width", 0.0)) * W)
+            box_h = max(1.0, float(b.get("height", 0.0)) * H)
+            box_cx = left0 + (box_w / 2.0)
+            box_cy = top0 + (box_h / 2.0)
+        if DRAW_OUTLINE_SPAN and DRAW_BOX_OUTLINE:
+            quad = _token_box_quad_px(t, W, H, pad_px=0)
+            if quad:
+                draw.line(quad + [quad[0]], fill=SPAN_OUTLINE,
+                          width=SPAN_OUTLINE_WIDTH)
+            else:
+                left = b["left"] * W
+                top = b["top"] * H
+                width = b["width"] * W
+                height = b["height"] * H
+                draw.rectangle([left, top, left + width, top + height],
+                               outline=SPAN_OUTLINE, width=SPAN_OUTLINE_WIDTH)
+
+        text = _sanitize_draw_text(t.get("text") or "")
+        if text.strip() == "":
+            continue
+
+        p1 = t["baseline_p1"]
+        p2 = t["baseline_p2"]
+        x1 = float(p1["x"]) * W
+        y1 = float(p1["y"]) * H
+        x2 = float(p2["x"]) * W
+        y2 = float(p2["y"]) * H
+
+        dx = x2 - x1
+        dy = y2 - y1
+        if dx < 0 or (abs(dx) < 1e-12 and dy < 0):
+            x1, y1, x2, y2 = x2, y2, x1, y1
+            dx = x2 - x1
+            dy = y2 - y1
+
+        L = math.hypot(dx, dy)
+        if L <= 1e-9:
+            continue
+
+        ux = dx / L
+        uy = dy / L
+
+        t0 = float(t.get("t0_raw") if t.get("t0_raw") is not None else 0.0)
+        t1 = float(t.get("t1_raw") if t.get("t1_raw") is not None else 1.0)
+
+        sx = x1 + ux * (t0 * L)
+        sy = y1 + uy * (t0 * L)
+        ex = x1 + ux * (t1 * L)
+        ey = y1 + uy * (t1 * L)
+
+        avail_w = box_w
+        avail_h = box_h
+
+        if BASELINE_SHIFT and (not use_box_center):
+            nx, ny = -uy, ux
+            shift = avail_h * BASELINE_SHIFT_FACTOR
+            sx += nx * shift
+            sy += ny * shift
+
+        angle_deg = float(b.get("rotation_deg", 0.0))
+
+        forced_size = t.get("font_size_px")
+        if forced_size is not None:
+            final_size = int(
+                max(10, round(float(forced_size) * float(font_scale))))
+            font = pick_font(text, thai_path, latin_path, final_size)
+
+            if fit_to_box:
+                tmpc = Image.new("RGBA", (10, 10), (0, 0, 0, 0))
+                dc = ImageDraw.Draw(tmpc)
+                try:
+                    bbc = dc.textbbox((0, 0), text, font=font, anchor="ls")
+                    twc = float(bbc[2] - bbc[0])
+                    thc = float(bbc[3] - bbc[1])
+                except Exception:
+                    twc, thc = dc.textsize(text, font=font)
+                    twc = float(twc)
+                    thc = float(thc)
+
+                if twc > 0 and thc > 0 and (twc > avail_w or thc > avail_h):
+                    s = min(avail_w / twc, avail_h / thc)
+                    if s < 1.0:
+                        final_size = max(10, int(final_size * s))
+                        font = pick_font(
+                            text, thai_path, latin_path, final_size)
+        else:
+            base_size = 96
+            font0 = pick_font(text, thai_path, latin_path, base_size)
+
+            tmp = Image.new("RGBA", (10, 10), (0, 0, 0, 0))
+            dtmp = ImageDraw.Draw(tmp)
+            try:
+                bb = dtmp.textbbox((0, 0), text, font=font0, anchor="ls")
+                tw = bb[2] - bb[0]
+                th = bb[3] - bb[1]
+            except Exception:
+                tw, th = dtmp.textsize(text, font=font0)
+
+            if tw <= 0 or th <= 0:
+                continue
+
+            scale = min(avail_w / tw, avail_h / th)
+            final_size = max(10, int(base_size * scale))
+            if not fit_to_box:
+                final_size = max(10, int(final_size * float(font_scale)))
+            font = pick_font(text, thai_path, latin_path, final_size)
+
+        tmp2 = Image.new("RGBA", (10, 10), (0, 0, 0, 0))
+        d2 = ImageDraw.Draw(tmp2)
+        try:
+            bb2 = d2.textbbox((0, 0), text, font=font, anchor="ls")
+            tw2 = bb2[2] - bb2[0]
+            th2 = bb2[3] - bb2[1]
+        except Exception:
+            tw2, th2 = d2.textsize(text, font=font)
+
+        side = int(max(tw2, th2, avail_h, avail_w) * 2.2 + 40)
+        side = min(side, int(max(W, H) * 4))
+        if side < 128:
+            side = 128
+
+        canvas = Image.new("RGBA", (side, side), (0, 0, 0, 0))
+        dc = ImageDraw.Draw(canvas)
+
+        fill = TEXT_COLOR
+        if AUTO_TEXT_COLOR:
+            q = _token_box_quad_px(t, W, H, pad_px=0)
+            if q:
+                rr = _quad_bbox(q, W, H)
+                if rr:
+                    bg = _sample_bg_color_from_quad_ring(
+                        base_rgb, q, rr, ring_px=max(2, BG_SAMPLE_BORDER_PX))
+                    if bg is None:
+                        bg = _sample_bg_color_from_quad(
+                            base_rgb, q, rr, BG_SAMPLE_BORDER_PX, ERASE_SAMPLE_MARGIN_PX)
+                    fill = _pick_bw_text_color(bg)
+            else:
+                rr = _token_box_px(t, W, H, pad_px=0)
+                if rr:
+                    bg = _sample_bg_color(base_rgb, rr, ERASE_SAMPLE_MARGIN_PX)
+                    fill = _pick_bw_text_color(bg)
+
+        origin = (side // 2, side // 2)
+
+        p1 = t.get("baseline_p1") or {}
+        p2 = t.get("baseline_p2") or {}
+        has_baseline = ("x" in p1 and "y" in p1 and "x" in p2 and "y" in p2)
+
+        if has_baseline:
+            x1 = float(p1.get("x") or 0.0) * float(W)
+            y1 = float(p1.get("y") or 0.0) * float(H)
+            x2 = float(p2.get("x") or 0.0) * float(W)
+            y2 = float(p2.get("y") or 0.0) * float(H)
+            dx = x2 - x1
+            dy = y2 - y1
+            Lb = float(math.hypot(dx, dy))
+            if Lb <= 1e-6:
+                Lb = 1.0
+            ux = dx / Lb
+            uy = dy / Lb
+            nx = -uy
+            ny = ux
+
+            bb = t.get("box") or {}
+            cx = (float(bb.get("left") or 0.0) +
+                  float(bb.get("width") or 0.0) / 2.0) * float(W)
+            cy = (float(bb.get("top") or 0.0) +
+                  float(bb.get("height") or 0.0) / 2.0) * float(H)
+
+            tt = _sanitize_draw_text(text)
+            if not tt:
+                continue
+            font_m = pick_font(tt, thai_path, latin_path, final_size)
+            try:
+                tw = float(font_m.getlength(tt))
+            except Exception:
+                tmp = Image.new("RGBA", (10, 10), (0, 0, 0, 0))
+                dtmp = ImageDraw.Draw(tmp)
+                try:
+                    bbm = dtmp.textbbox((0, 0), tt, font=font_m, anchor="ls")
+                    tw = float(bbm[2] - bbm[0])
+                except Exception:
+                    tw, _ = dtmp.textsize(tt, font=font_m)
+                    tw = float(tw)
+
+            f_th, f_lat = _get_font_pair(thai_path, latin_path, final_size)
+            try:
+                a_th, d_th = f_th.getmetrics()
+            except Exception:
+                a_th, d_th = final_size, int(final_size * 0.25)
+            try:
+                a_lat, d_lat = f_lat.getmetrics()
+            except Exception:
+                a_lat, d_lat = final_size, int(final_size * 0.25)
+            ascent = float(max(a_th, a_lat))
+            descent = float(max(d_th, d_lat))
+            center_y_rel = (-ascent + descent) / 2.0
+
+            bx = cx - ux * (tw / 2.0) - nx * center_y_rel
+            by = cy - uy * (tw / 2.0) - ny * center_y_rel
+
+            angle_deg = float(math.degrees(math.atan2(dy, dx)))
+
+            _draw_text_baseline_fallback(
+                dc, origin, text, thai_path, latin_path, final_size, fill)
+            rotated = canvas.rotate(-angle_deg, resample=Image.BICUBIC,
+                                    expand=False, center=origin)
+            paste_x = int(round(bx - origin[0]))
+            paste_y = int(round(by - origin[1]))
+            overlay.alpha_composite(rotated, dest=(paste_x, paste_y))
+        else:
+            _draw_text_centered_fallback(
+                dc, origin, text, thai_path, latin_path, final_size, fill)
+            rotated = canvas.rotate(-angle_deg, resample=Image.BICUBIC,
+                                    expand=False, center=origin)
+            paste_x = int(round(box_cx - origin[0]))
+            paste_y = int(round(box_cy - origin[1]))
+            overlay.alpha_composite(rotated, dest=(paste_x, paste_y))
+
+    out = Image.alpha_composite(base, overlay).convert("RGB")
+    out.save(out_path)
+
+def get_lens_data_from_image(image_path, firebase_url, lang):
+    ck = _get_firebase_cookie(firebase_url)
+
+    with open(image_path, "rb") as f:
+        img_bytes = f.read()
+
+    hdr = {"User-Agent": "Mozilla/5.0", "Referer": "https://lens.google.com/"}
+    with httpx.Client(cookies=ck, headers=hdr, follow_redirects=False, timeout=60) as c:
+        r = c.post(
+            "https://lens.google.com/v3/upload",
+            files={"encoded_image": ("file.jpg", img_bytes, "image/jpeg")},
+        )
+        if r.status_code not in (302, 303):
+            raise Exception(f"Upload failed: {r.status_code}\n{r.text}")
+        redirect = r.headers["location"]
+
+    u = to_translated(redirect, lang=lang)
+    with httpx.Client(cookies=ck, headers=hdr, timeout=60) as c:
+        j = c.get(u).text
+
+    data = json.loads(j[5:] if j.startswith(")]}'") else j)
+    return data
+
+def _get_firebase_cookie(firebase_url: str):
+    u = (firebase_url or '').strip()
+    now = time.time()
+    cache = _FIREBASE_COOKIE_CACHE
+    if cache.get('data') and cache.get('url') == u and (now - float(cache.get('ts') or 0)) < float(FIREBASE_COOKIE_TTL_SEC):
+        return cache.get('data')
+    r = httpx.get(u, timeout=30)
+    ck = r.json()
+    cache['ts'] = now
+    cache['url'] = u
+    cache['data'] = ck
+    return ck
+
+def warmup(lang: str = "th") -> dict:
+    l = _normalize_lang(lang)
+    cookie_ok = False
+    try:
+        _get_firebase_cookie(FIREBASE_URL)
+        cookie_ok = True
+    except Exception:
+        pass
+    thai_font = FONT_THAI_PATH
+    latin_font = FONT_LATIN_PATH
+    if l == "ja":
+        latin_font = FONT_JA_PATH
+    elif l in ("zh", "zh-hans", "zh_cn", "zh-cn", "zh_hans"):
+        latin_font = FONT_ZH_SC_PATH
+    elif l in ("zh-hant", "zh_tw", "zh-tw", "zh_hant"):
+        latin_font = FONT_ZH_TC_PATH
+
+    if FONT_DOWNLOD:
+        thai_font = ensure_font(thai_font, FONT_THAI_URLS)
+        if l == "ja":
+            latin_font = ensure_font(latin_font, FONT_JA_URLS)
+        elif l in ("zh", "zh-hans", "zh_cn", "zh-cn", "zh_hans"):
+            latin_font = ensure_font(latin_font, FONT_ZH_SC_URLS)
+        elif l in ("zh-hant", "zh_tw", "zh-tw", "zh_hant"):
+            latin_font = ensure_font(latin_font, FONT_ZH_TC_URLS)
+        else:
+            latin_font = ensure_font(latin_font, FONT_LATIN_URLS)
+
+    _get_font_pair(thai_font or "", latin_font or "", 22)
+    _get_font_pair(thai_font or "", latin_font or "", 28)
+    return {"ok": True, "lang": l, "thai_font": thai_font or "", "latin_font": latin_font or "", "cookie_ok": cookie_ok}
+
+def main():
+    data = get_lens_data_from_image(IMAGE_PATH, FIREBASE_URL, LANG)
+
+    img = Image.open(IMAGE_PATH).convert("RGB")
+    W, H = img.size
+
+    thai_font = FONT_THAI_PATH
+    latin_font = FONT_LATIN_PATH
+
+    lang = _normalize_lang(LANG)
+
+    if lang == "ja":
+        latin_font = FONT_JA_PATH
+    elif lang in ("zh", "zh-hans", "zh_cn", "zh-cn", "zh_hans"):
+        latin_font = FONT_ZH_SC_PATH
+    elif lang in ("zh-hant", "zh_tw", "zh-tw", "zh_hant"):
+        latin_font = FONT_ZH_TC_PATH
+
+    if FONT_DOWNLOD:
+        thai_font = ensure_font(thai_font, FONT_THAI_URLS)
+        if lang == "ja":
+            latin_font = ensure_font(latin_font, FONT_JA_URLS)
+        elif lang in ("zh", "zh-hans", "zh_cn", "zh-cn", "zh_hans"):
+            latin_font = ensure_font(latin_font, FONT_ZH_SC_URLS)
+        elif lang in ("zh-hant", "zh_tw", "zh-tw", "zh_hant"):
+            latin_font = ensure_font(latin_font, FONT_ZH_TC_URLS)
+        else:
+            latin_font = ensure_font(latin_font, FONT_LATIN_URLS)
+
+    image_url = data.get("imageUrl") if isinstance(data, dict) else None
+    image_datauri = ""
+    if DECODE_IMAGEURL_TO_DATAURI and image_url:
+        image_datauri = decode_imageurl_to_datauri(image_url)
+
+    out = {
+        "imageUrl": image_url,
+        "imageDataUri": image_datauri,
+        "originalContentLanguage": data.get("originalContentLanguage"),
+        "originalTextFull": data.get("originalTextFull"),
+        "translatedTextFull": data.get("translatedTextFull"),
+        "AiTextFull": "",
+        "originalParagraphs": data.get("originalParagraphs") or [],
+        "translatedParagraphs": data.get("translatedParagraphs") or [],
+        "original": {},
+        "translated": {},
+        "Ai": {},
+    }
+    original_span_tokens = None
+    original_tree = None
+    translated_tree = None
+
+    def _base_img_for_overlay() -> Image.Image:
+        if not (ERASE_OLD_TEXT_WITH_ORIGINAL_BOXES and original_span_tokens):
+            return img
+        return erase_text_with_boxes(
+            img,
+            original_span_tokens,
+            pad_px=ERASE_PADDING_PX,
+            sample_margin_px=ERASE_SAMPLE_MARGIN_PX,
+        )
+
+    if DO_ORIGINAL:
+        tree, _ = decode_tree(
+            data.get("originalParagraphs") or [],
+            data.get("originalTextFull") or "",
+            "original",
+            W,
+            H,
+            want_raw=False,
+        )
+        original_tree = tree
+        original_span_tokens = flatten_tree_spans(tree)
+        out["original"] = {"originalTree": tree}
+        if DO_ORIGINAL_HTML:
+            out["original"]["originalhtml"] = tokens_to_html(
+                original_span_tokens)
+
+        if DRAW_OVERLAY_ORIGINAL:
+            base_img = _base_img_for_overlay()
+            draw_overlay(
+                base_img,
+                original_span_tokens,
+                OVERLAY_ORIGINAL_PATH,
+                thai_font or "",
+                latin_font or "",
+                level_outlines=build_level_outlines(original_tree, W, H),
+            )
+
+    if DO_AI and original_tree is None:
+        tree0, _ = decode_tree(
+            data.get("originalParagraphs") or [],
+            data.get("originalTextFull") or "",
+            "original",
+            W,
+            H,
+            want_raw=False,
+        )
+        original_tree = tree0
+
+    if DO_TRANSLATED:
+        tree, _ = decode_tree(
+            data.get("translatedParagraphs") or [],
+            data.get("translatedTextFull") or "",
+            "translated",
+            W,
+            H,
+            want_raw=False,
+        )
+        translated_tree = tree
+        out["translated"] = {"translatedTree": tree}
+        translated_span_tokens = flatten_tree_spans(tree)
+        if DO_TRANSLATED_HTML:
+            out["translated"]["translatedhtml"] = tokens_to_html(
+                translated_span_tokens)
+
+        if DRAW_OVERLAY_TRANSLATED:
+            base_img = _base_img_for_overlay()
+            draw_overlay(
+                base_img,
+                translated_span_tokens,
+                OVERLAY_TRANSLATED_PATH,
+                thai_font or "",
+                latin_font or "",
+                level_outlines=build_level_outlines(tree, W, H),
+                font_scale=TRANSLATED_OVERLAY_FONT_SCALE,
+                fit_to_box=TRANSLATED_OVERLAY_FIT_TO_BOX,
+            )
+
+    ai = None
+    if DO_AI:
+        src_text = out.get("originalTextFull") or ""
+        if not src_text:
+            src_text = data.get("originalTextFull") or ""
+
+        tree_for_boxes = translated_tree or original_tree
+        if tree_for_boxes is None:
+            tree_for_boxes, _ = decode_tree(
+                data.get("originalParagraphs") or [],
+                data.get("originalTextFull") or "",
+                "original",
+                W,
+                H,
+                want_raw=False,
+            )
+            original_tree = tree_for_boxes
+
+        ai = ai_translate_original_text(
+            src_text,
+            LANG,
+        )
+
+        template_tree = translated_tree
+        patched = patch({"Ai": {"aiTextFull": str(ai.get(
+            "aiTextFull") or ""), "aiTree": template_tree}}, W, H, thai_font, latin_font)
+        ai_tree = (patched.get("Ai") or {}).get("aiTree") or {}
+
+        ai["aiTree"] = ai_tree
+
+        shared_para_sizes = _compute_shared_para_sizes(
+            [original_tree or {}, translated_tree or {}, ai_tree or {}],
+            thai_font or "",
+            latin_font or "",
+            W,
+            H,
+        )
+        _apply_para_font_size(original_tree or {}, shared_para_sizes)
+        _apply_para_font_size(translated_tree or {}, shared_para_sizes)
+        _apply_para_font_size(ai_tree or {}, shared_para_sizes)
+
+        _rebuild_ai_spans_after_font_resize(
+            ai_tree or {}, W, H, thai_font or "", latin_font or "")
+
+        out["AiTextFull"] = str(ai.get("aiTextFull") or "")
+        out["Ai"] = {
+            "aiTextFull": str(ai.get("aiTextFull") or ""),
+            "aiTree": ai_tree,
+        }
+
+        if DO_AI_HTML:
+            if AI_OVERLAY_FIT_TO_BOX:
+                fit_tree_font_sizes_for_tp_html(
+                    ai_tree or {}, thai_font or "", latin_font or "", W, H)
+            out["Ai"]["aihtml"] = ai_tree_to_tp_html(ai_tree, W, H)
+            out["Ai"]["aihtmlCss"] = tp_overlay_css()
+            out["Ai"]["aihtmlMeta"] = {
+                "baseW": int(W),
+                "baseH": int(H),
+                "format": "tp",
+            }
+
+        if DO_AI_OVERLAY and translated_tree is not None:
+            base_img = _base_img_for_overlay()
+            tokens_for_draw = flatten_tree_spans(ai_tree)
+            draw_overlay(
+                base_img,
+                tokens_for_draw,
+                AI_PATH_OVERLAY,
+                thai_font or "",
+                latin_font or "",
+                level_outlines=build_level_outlines(ai_tree, W, H),
+                font_scale=AI_OVERLAY_FONT_SCALE,
+                fit_to_box=AI_OVERLAY_FIT_TO_BOX,
+            )
+
+    if HTML_INCLUDE_CSS and (DO_ORIGINAL_HTML or DO_TRANSLATED_HTML or DO_AI_HTML):
+        out["htmlCss"] = overlay_css()
+        out["htmlMeta"] = {
+            "containerClass": "RTMDre",
+            "tokenClass": "IwqbBf",
+            "sourceWidth": int(W),
+            "sourceHeight": int(H),
+        }
+
+    if "htmlMeta" not in out:
+        out["htmlMeta"] = {
+            "containerClass": "RTMDre",
+            "tokenClass": "IwqbBf",
+            "sourceWidth": int(W),
+            "sourceHeight": int(H),
+        }
+
+    if WRITE_OUT_JSON:
+        with open(OUT_JSON, "w", encoding="utf-8") as f:
+            json.dump(out, f, ensure_ascii=False, indent=2)
+
+if __name__ == "__main__":
+    main()