import base64, copy, hashlib, json, math, os, re, struct, time, unicodedata, cv2, httpx, numpy as np, budoux

from urllib.parse import parse_qs, urlencode, urlparse
from PIL import Image, ImageChops, ImageDraw, ImageFilter, ImageFont

IMAGE_PATH = "33.jpg"
OUT_JSON = "output.json"
LANG = "th"

AI_API_KEY = os.getenv("AI_API_KEY", "").strip()

FIREBASE_URL = "https://cookie-6e1cd-default-rtdb.asia-southeast1.firebasedatabase.app/lens/cookie.json"

WRITE_OUT_JSON = True

DECODE_IMAGEURL_TO_DATAURI = True

DO_ORIGINAL = True
DO_TRANSLATED = True
DO_ORIGINAL_HTML = True
DO_TRANSLATED_HTML = True
DO_AI_HTML = True
HTML_INCLUDE_CSS = True

DRAW_OVERLAY_ORIGINAL = False
DRAW_OVERLAY_TRANSLATED = False
OVERLAY_ORIGINAL_PATH = "overlay_original.png"
OVERLAY_TRANSLATED_PATH = "overlay_translated.png"

TRANSLATED_OVERLAY_FONT_SCALE = 1.0
TRANSLATED_OVERLAY_FIT_TO_BOX = True

AI_OVERLAY_FONT_SCALE = 1.5
AI_OVERLAY_FIT_TO_BOX = True

DO_AI = True
DO_AI_JSON = False
DO_AI_OVERLAY = False
AI_CACHE = False
AI_CACHE_PATH = "ai_cache.json"
AI_PATH_OVERLAY = "overlay_ai.png"
AI_PROVIDER = "auto"
AI_MODEL = "auto"
AI_BASE_URL = "auto"
AI_TEMPERATURE = 0.2

AI_MAX_TOKENS = 1200
AI_TIMEOUT_SEC = 120

DRAW_BOX_OUTLINE = True
AUTO_TEXT_COLOR = True
TEXT_COLOR = (0, 0, 0, 255)
TEXT_COLOR_DARK = (0, 0, 0, 255)
TEXT_COLOR_LIGHT = (255, 255, 255, 255)
BOX_OUTLINE = (0, 255, 0, 255)
BOX_OUTLINE_WIDTH = 2

DRAW_OUTLINE_PARA = False
DRAW_OUTLINE_ITEM = False
DRAW_OUTLINE_SPAN = False
PARA_OUTLINE = (0, 0, 255, 255)
ITEM_OUTLINE = (255, 0, 0, 255)
SPAN_OUTLINE = BOX_OUTLINE
PARA_OUTLINE_WIDTH = 3
ITEM_OUTLINE_WIDTH = 2
SPAN_OUTLINE_WIDTH = BOX_OUTLINE_WIDTH

ERASE_OLD_TEXT_WITH_ORIGINAL_BOXES = True
ERASE_PADDING_PX = 2
ERASE_SAMPLE_MARGIN_PX = 6
ERASE_MODE = "inpaint"
ERASE_MOSAIC_BLOCK_PX = 10
ERASE_CLONE_GAP_PX = 4
ERASE_CLONE_BORDER_PX = 6
ERASE_CLONE_FEATHER_PX = 3

ERASE_BLEND_GAP_PX = 3
ERASE_BLEND_FEATHER_PX = 4

INPAINT_RADIUS = 3
INPAINT_METHOD = "telea"
INPAINT_DILATE_PX = 1

BG_SAMPLE_BORDER_PX = 3

BASELINE_SHIFT = True
BASELINE_SHIFT_FACTOR = 0.40

FONT_DOWNLOD = True
FONT_THAI_PATH = "NotoSansThai-Regular.ttf"
FONT_LATIN_PATH = "NotoSans-Regular.ttf"

FONT_THAI_URLS = [
    "https://github.com/google/fonts/raw/main/ofl/notosansthai/NotoSansThai-Regular.ttf",
    "https://github.com/google/fonts/raw/main/ofl/notosansthaiui/NotoSansThaiUI-Regular.ttf",
]
FONT_LATIN_URLS = [
    "https://github.com/google/fonts/raw/main/ofl/notosans/NotoSans-Regular.ttf",
]
FONT_JA_PATH = "NotoSansCJKjp-Regular.otf"
FONT_JA_URLS = [
    "https://raw.githubusercontent.com/googlefonts/noto-cjk/main/Sans/OTF/Japanese/NotoSansCJKjp-Regular.otf",
    "https://github.com/googlefonts/noto-cjk/raw/main/Sans/OTF/Japanese/NotoSansCJKjp-Regular.otf",
]
FONT_ZH_SC_PATH = "NotoSansCJKsc-Regular.otf"
FONT_ZH_SC_URLS = [
    "https://raw.githubusercontent.com/googlefonts/noto-cjk/main/Sans/OTF/SimplifiedChinese/NotoSansCJKsc-Regular.otf",
    "https://github.com/googlefonts/noto-cjk/raw/main/Sans/OTF/SimplifiedChinese/NotoSansCJKsc-Regular.otf",
]
FONT_ZH_TC_PATH = "NotoSansCJKtc-Regular.otf"
FONT_ZH_TC_URLS = [
    "https://raw.githubusercontent.com/googlefonts/noto-cjk/main/Sans/OTF/TraditionalChinese/NotoSansCJKtc-Regular.otf",
    "https://github.com/googlefonts/noto-cjk/raw/main/Sans/OTF/TraditionalChinese/NotoSansCJKtc-Regular.otf",
]

UI_LANGUAGES = [
    {"code": "en", "name": "English"},
    {"code": "th", "name": "Thai"},
    {"code": "ja", "name": "Japanese"},
    {"code": "ko", "name": "Korean"},
    {"code": "zh-CN", "name": "Chinese (Simplified)"},
    {"code": "vi", "name": "Vietnamese"},
    {"code": "es", "name": "Spanish"},
    {"code": "de", "name": "German"},
    {"code": "fr", "name": "French"},
]

AI_PROVIDER_DEFAULTS = {
    "gemini": {
        "model": "gemini-2.5-flash",
        "base_url": "",
    },
    "openai": {
        "model": "gpt-4o-mini",
        "base_url": "https://api.openai.com/v1",
    },
    "openrouter": {
        "model": "openai/o4-mini",
        "base_url": "https://openrouter.ai/api/v1",
    },
    "huggingface": {
        "model": "google/gemma-2-2b-it",
        "base_url": "https://router.huggingface.co/v1",
    },
    "featherless": {
        "model": "Qwen/Qwen2.5-7B-Instruct",
        "base_url": "https://api.featherless.ai/v1",
    },
    "groq": {
        "model": "openai/gpt-oss-20b",
        "base_url": "https://api.groq.com/openai/v1",
    },
    "together": {
        "model": "openai/gpt-oss-20b",
        "base_url": "https://api.together.xyz/v1",
    },
    "deepseek": {
        "model": "deepseek-chat",
        "base_url": "https://api.deepseek.com/v1",
    },
    "anthropic": {
        "model": "claude-sonnet-4-20250514",
        "base_url": "https://api.anthropic.com",
    },
}

AI_PROVIDER_ALIASES = {
    "hf": "huggingface",
    "huggingface_router": "huggingface",
    "hf_router": "huggingface",
    "openai_compat": "openai",
    "openai-compatible": "openai",
    "gemini3": "gemini",
    "gemini-3": "gemini",
    "google": "gemini",
}

AI_MODEL_ALIASES = {
    "gemini": {
        "flash-lite": "gemini-2.5-flash-lite",
        "flash": "gemini-2.5-flash",
        "pro": "gemini-2.5-pro",
        "3-flash": "gemini-3-flash-preview",
        "3-pro": "gemini-3-pro-preview",
        "3-pro-image": "gemini-3-pro-image-preview",
        "flash-image": "gemini-2.5-flash-image",
    }
}

AI_PROMPT_SYSTEM_BASE = (
    "You are a professional manga translator and dialogue localizer.\n"
    "Rewrite each paragraph as natural dialogue in the target language while preserving meaning, tone, intent, and character voice.\n"
    "Keep lines concise for speech bubbles. Do not add new information. Do not omit meaning. Do not explain.\n"
    "Preserve emphasis (… ! ?). Avoid excessive punctuation.\n"
    "If the input is already in the target language, improve it (dialogue polish) without changing meaning."
)

AI_LANG_STYLE = {
    "th": (
        "Target language: Thai\\n"
        "Write Thai manga dialogue that reads like a high-quality Thai scanlation: natural, concise, and in-character.\\n"
        "Keep lines short for speech bubbles; avoid stiff, literal phrasing.\\n"
        "Default: omit pronouns and omit gendered polite sentence-final particles unless the source line clearly requires them.\\n"
        "Never use the word 'ฉัน'. Prefer omitting the subject.\\n"
        "Never use a male-coded second-person pronoun. When addressing someone by name, do not add a second-person pronoun after the name; prefer NAME + clause.\\n"
        "If a second-person reference is unavoidable, use a neutral/casual form appropriate to tone, but keep it gender-neutral and consistent with the line.\\n"
        "Use particles/interjections sparingly to match tone; do not overuse.\\n"
        "Keep names/terms consistent; transliterate when appropriate.\\n"
        "Output only the translated text."
    ),
    "en": (
        "Target language: English\n"
        "Write natural English manga dialogue: concise, conversational, with contractions where natural.\n"
        "Localize tone and character voice; keep emotion and emphasis.\n"
        "Keep proper nouns consistent; do not over-explain."
    ),
    "ja": (
        "Target language: Japanese\n"
        "Write natural Japanese manga dialogue: concise, spoken.\n"
        "Choose 丁寧語/タメ口 to match context; keep emotion and emphasis.\n"
        "Keep proper nouns consistent; keep SFX natural in Japanese."
    ),
    "default": (
        "Write natural manga dialogue in the target language: concise, spoken, faithful to meaning and tone."
    ),
}


AI_PROMPT_RESPONSE_CONTRACT_JSON = (
    "Return ONLY valid JSON (no markdown, no extra text).\n"
    "Output JSON MUST have exactly one key: \"aiTextFull\".\n"
    "\"aiTextFull\" MUST be a single JSON string WITHOUT raw newlines.\n"
    "Use literal \\n and \\n\\n to represent line breaks.\n"
    "You MUST preserve paragraph boundaries and order. Paragraphs are separated by a blank line (\\n\\n).\n"
    "Do NOT add extra paragraphs. Do NOT remove paragraphs.\n"
    "Never include code fences or XML/HTML tags.\n"
    "All string values MUST NOT contain raw newlines."
)

AI_PROMPT_RESPONSE_CONTRACT_TEXT = (
    "Return ONLY the translated text (no JSON, no markdown, no commentary).\n"
    "You MUST preserve paragraph boundaries and order. Paragraphs are separated by a blank line.\n"
    "Use actual newlines for line breaks.\n"
    "Do NOT add extra paragraphs. Do NOT remove paragraphs.\n"
    "Never include code fences or XML/HTML tags."
)
AI_PROMPT_DATA_TEMPLATE = (
    "Input JSON:\n{input_json}\n\n"
    "Output JSON schema (MUST match exactly):\n{output_schema}"
)

AI_PROMPT_DATA_TEMPLATE_TEXT = (
    "Input JSON:\n{input_json}\n\n"
    "Return the translation as plain text only."
)

FIREBASE_COOKIE_TTL_SEC = int(os.getenv("FIREBASE_COOKIE_TTL_SEC", "900"))
_FIREBASE_COOKIE_CACHE = {"ts": 0.0, "url": "", "data": None}
_FONT_RESOLVE_CACHE = {}
_HF_MODELS_CACHE = {}
_FONT_PAIR_CACHE = {}
_TP_HTML_EPS_PX = 0.0
ZWSP = "\u200b"


def _active_ai_contract() -> str:
    return AI_PROMPT_RESPONSE_CONTRACT_JSON if DO_AI_JSON else AI_PROMPT_RESPONSE_CONTRACT_TEXT

def _active_ai_data_template() -> str:
    return AI_PROMPT_DATA_TEMPLATE if DO_AI_JSON else AI_PROMPT_DATA_TEMPLATE_TEXT

def _canonical_provider(provider: str) -> str:
    p = (provider or "").strip().lower()
    return AI_PROVIDER_ALIASES.get(p, p)

def _resolve_model(provider: str, model: str) -> str:
    m = (model or "").strip()
    if not m or m.lower() == "auto":
        d = AI_PROVIDER_DEFAULTS.get(provider) or {}
        return (d.get("model") or "").strip() or AI_PROVIDER_DEFAULTS["openai"]["model"]
    key = m.lower()
    aliases = AI_MODEL_ALIASES.get(provider) or {}
    return aliases.get(key) or m

def _normalize_lang(lang: str) -> str:
    t = (lang or "").strip().lower()
    if t in ("jp", "jpn", "japanese"):
        return "ja"
    if t in ("thai",):
        return "th"
    if t in ("eng", "english"):
        return "en"
    if t.startswith("zh"):
        return t
    if len(t) >= 2:
        return t[:2]
    return t

def _sha1(s: str) -> str:
    return hashlib.sha1(s.encode("utf-8")).hexdigest()

def _hf_router_available_models(api_key: str, base_url: str) -> list[str]:
    if not api_key or not base_url:
        return []
    key = _sha1(f"{_sha1(api_key)}|{base_url}")
    now = time.time()
    cached = _HF_MODELS_CACHE.get(key) or {}
    if cached.get("ts") and now - float(cached["ts"]) < 3600 and isinstance(cached.get("models"), list):
        return cached["models"]

    url = base_url.rstrip("/") + "/models"
    headers = {"Authorization": f"Bearer {api_key}"}
    try:
        with httpx.Client(timeout=float(AI_TIMEOUT_SEC)) as client:
            r = client.get(url, headers=headers)
            r.raise_for_status()
            data = r.json()
    except Exception:
        return []

    models = []
    for m in (data.get("data") or []):
        mid = (m.get("id") if isinstance(m, dict) else None)
        if isinstance(mid, str) and mid.strip():
            models.append(mid.strip())
    _HF_MODELS_CACHE[key] = {"ts": now, "models": models}
    return models

def _pick_hf_fallback_model(models: list[str]) -> str:
    if not models:
        return ""
    priority_substrings = (
        "gemma-3",
        "gemma-2",
        "llama-3.1",
        "llama-3",
        "mistral",
        "qwen",
        "glm",
    )
    lowered = [(m, m.lower()) for m in models]
    for sub in priority_substrings:
        for m, ml in lowered:
            if sub in ml and ("instruct" in ml or ml.endswith("-it") or ":" in ml):
                return m
    for m, ml in lowered:
        if "instruct" in ml or ml.endswith("-it") or ":" in ml:
            return m
    return models[0]

def _load_ai_cache(path: str):
    if not path:
        return {}
    if not os.path.exists(path):
        return {}
    try:
        with open(path, "r", encoding="utf-8") as f:
            d = json.load(f)
            return d if isinstance(d, dict) else {}
    except Exception:
        return {}

def _save_ai_cache(path: str, cache: dict):
    if not path:
        return
    tmp = path + ".tmp"
    with open(tmp, "w", encoding="utf-8") as f:
        json.dump(cache, f, ensure_ascii=False)
    os.replace(tmp, path)

def _build_ai_prompt_packet(target_lang: str, original_text_full: str):
    lang = _normalize_lang(target_lang)
    input_json = json.dumps(
        {"target_lang": lang, "originalTextFull": original_text_full}, ensure_ascii=False)
    output_schema = json.dumps({"aiTextFull": "..."}, ensure_ascii=False)
    data_template = _active_ai_data_template()
    if DO_AI_JSON:
        data_text = data_template.format(
            input_json=input_json, output_schema=output_schema)
    else:
        data_text = data_template.format(input_json=input_json)

    style = AI_LANG_STYLE.get(lang) or AI_LANG_STYLE.get("default") or ""

    system_parts = [AI_PROMPT_SYSTEM_BASE]
    if style:
        system_parts.append(style)
    system_parts.append(_active_ai_contract())
    system_text = "\n\n".join([p for p in system_parts if p])

    user_parts = []
    user_parts.append(data_text)
    return system_text, user_parts

def _gemini_generate_json(api_key: str, model: str, system_text: str, user_parts: list[str]):
    url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={api_key}"
    parts = [{"text": p} for p in user_parts if (p or "").strip()]
    payload = {
        "systemInstruction": {"parts": [{"text": system_text}]},
        "contents": [{"role": "user", "parts": parts}],
        "generationConfig": {
            "temperature": float(AI_TEMPERATURE),
            "maxOutputTokens": int(AI_MAX_TOKENS),
            "responseMimeType": "text/plain",
        },
    }
    with httpx.Client(timeout=float(AI_TIMEOUT_SEC)) as client:
        r = client.post(url, json=payload)
        try:
            r.raise_for_status()
        except httpx.HTTPStatusError as e:
            raise Exception(f"Gemini HTTP {r.status_code}: {r.text}") from e
        data = r.json()
    candidates = data.get("candidates") or []
    if not candidates:
        raise Exception("Gemini returned no candidates")
    c = (candidates[0].get("content") or {})
    out_parts = c.get("parts") or []
    if not out_parts:
        raise Exception("Gemini returned empty content parts")
    txt = "".join([str(p.get("text") or "") for p in out_parts]).strip()
    if not txt:
        raise Exception("Gemini returned empty text")
    return txt

def _read_first_env(*names: str) -> str:
    for n in names:
        v = (os.environ.get(n) or "").strip()
        if v:
            return v
    return ""

def _detect_ai_provider_from_key(api_key: str) -> str:
    k = (api_key or "").strip()
    if k.startswith("AIza"):
        return "gemini"
    if k.startswith("hf_"):
        return "huggingface"
    if k.startswith("sk-or-"):
        return "openrouter"
    if k.startswith("sk-ant-"):
        return "anthropic"
    if k.startswith("gsk_"):
        return "groq"
    return "openai"

def _resolve_ai_config():
    api_key = (AI_API_KEY or _read_first_env(
        "AI_API_KEY",
        "OPENAI_API_KEY",
        "HF_TOKEN",
        "HUGGINGFACEHUB_API_TOKEN",
        "GEMINI_API_KEY",
        "OPENROUTER_API_KEY",
        "FEATHERLESS_API_KEY",
        "GROQ_API_KEY",
        "TOGETHER_API_KEY",
        "DEEPSEEK_API_KEY",
        "ANTHROPIC_API_KEY",
    )).strip()

    provider = _canonical_provider((AI_PROVIDER or "auto"))
    model = (AI_MODEL or "auto").strip()
    base_url = (AI_BASE_URL or "auto").strip()

    if provider in ("", "auto"):
        provider = _canonical_provider(_detect_ai_provider_from_key(api_key))

    preset = AI_PROVIDER_DEFAULTS.get(provider) or {}

    model = _resolve_model(provider, model)

    if base_url in ("", "auto"):
        base_url = (preset.get("base_url") or "").strip()

    if provider not in ("gemini", "anthropic"):
        if not base_url:
            base_url = (AI_PROVIDER_DEFAULTS.get("openai") or {}).get(
                "base_url") or "https://api.openai.com/v1"

    return provider, api_key, model, base_url

def _openai_compat_generate_json(api_key: str, base_url: str, model: str, system_text: str, user_parts: list[str]):
    url = (base_url.rstrip("/") + "/chat/completions")
    messages = [{"role": "system", "content": system_text}]
    for p in user_parts:
        if (p or "").strip():
            messages.append({"role": "user", "content": p})
    payload = {
        "model": model,
        "messages": messages,
        "temperature": float(AI_TEMPERATURE),
        "max_tokens": int(AI_MAX_TOKENS),
    }
    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json",
    }
    used_model = model
    with httpx.Client(timeout=float(AI_TIMEOUT_SEC)) as client:
        r = client.post(url, json=payload, headers=headers)
        try:
            r.raise_for_status()
            data = r.json()
        except httpx.HTTPStatusError as e:
            if (
                r.status_code == 400
                and "router.huggingface.co" in (base_url or "")
                and ((AI_MODEL or "").strip().lower() in ("", "auto") or model == (AI_PROVIDER_DEFAULTS.get("huggingface") or {}).get("model"))
            ):
                try:
                    err = r.json().get("error") or {}
                except Exception:
                    err = {}
                if (err.get("code") or "") == "model_not_supported":
                    models = _hf_router_available_models(api_key, base_url)
                    fallback = _pick_hf_fallback_model(models)
                    if fallback and fallback != model:
                        payload["model"] = fallback
                        used_model = fallback
                        r2 = client.post(url, json=payload, headers=headers)
                        try:
                            r2.raise_for_status()
                        except httpx.HTTPStatusError as e2:
                            raise Exception(
                                f"AI HTTP {r2.status_code}: {r2.text}") from e2
                        data = r2.json()
                    else:
                        preview = ", ".join(models[:8])
                        hint = f"\nAvailable models (first 8): {preview}" if preview else ""
                        raise Exception(
                            f"AI HTTP {r.status_code}: {r.text}{hint}") from e
                else:
                    raise Exception(
                        f"AI HTTP {r.status_code}: {r.text}") from e
            else:
                raise Exception(f"AI HTTP {r.status_code}: {r.text}") from e
    choices = data.get("choices") or []
    if not choices:
        raise Exception("AI returned no choices")
    msg = (choices[0].get("message") or {})
    txt = (msg.get("content") or "").strip()
    if not txt:
        raise Exception("AI returned empty text")
    return txt, used_model

def _anthropic_generate_json(api_key: str, model: str, system_text: str, user_parts: list[str]):
    url = "https://api.anthropic.com/v1/messages"
    messages = []
    for p in user_parts:
        if (p or "").strip():
            messages.append({"role": "user", "content": p})
    payload = {
        "model": model,
        "max_tokens": int(AI_MAX_TOKENS),
        "temperature": float(AI_TEMPERATURE),
        "system": system_text,
        "messages": messages,
    }
    headers = {
        "x-api-key": api_key,
        "content-type": "application/json",
    }
    with httpx.Client(timeout=float(AI_TIMEOUT_SEC)) as client:
        r = client.post(url, json=payload, headers=headers)
        try:
            r.raise_for_status()
        except httpx.HTTPStatusError as e:
            raise Exception(f"Anthropic HTTP {r.status_code}: {r.text}") from e
        data = r.json()
    content = data.get("content") or []
    txt = "".join([(c.get("text") or "") for c in content if isinstance(
        c, dict) and c.get("type") == "text"]).strip()
    if not txt:
        raise Exception("Anthropic returned empty text")
    return txt

def _strip_wrappers(s: str) -> str:
    t = (s or "").strip()
    if not t:
        return ""
    t = t.replace("\r\n", "\n").replace("\r", "\n")
    if "```" in t:
        t = re.sub(r"```[a-zA-Z0-9_-]*", "", t)
        t = t.replace("```", "")
    t = re.sub(r"</?AiTextFull>", "", t, flags=re.IGNORECASE).strip()
    return t

def _sanitize_json_like_text(raw: str) -> str:
    t = _strip_wrappers(raw)
    if not t:
        return ""
    out = []
    in_str = False
    esc = False
    run_ch = ""
    run_len = 0

    def _flush_run():
        nonlocal run_ch, run_len
        if run_len:
            out.append(run_ch * min(run_len, 3))
        run_ch = ""
        run_len = 0

    for ch in t:
        if in_str:
            if esc:
                _flush_run()
                out.append(ch)
                esc = False
                continue
            if ch == "\\":
                _flush_run()
                out.append(ch)
                esc = True
                continue
            if ch == '"':
                _flush_run()
                out.append(ch)
                in_str = False
                continue
            if ch == "\n":
                _flush_run()
                out.append("\\n")
                continue
            if ch == "\t":
                _flush_run()
                out.append("\\t")
                continue
            if ch == run_ch:
                run_len += 1
                continue
            _flush_run()
            run_ch = ch
            run_len = 1
            continue

        _flush_run()
        if ch == '"':
            out.append(ch)
            in_str = True
            esc = False
            continue
        out.append(ch)

    _flush_run()
    return "".join(out)

def _extract_first_json(raw: str):
    t = _sanitize_json_like_text(raw)
    if not t:
        raise Exception("AI returned empty text")
    start = t.find("{")
    if start < 0:
        raise Exception("AI returned no JSON object")

    in_str = False
    esc = False
    depth = 0
    json_start = None

    for i in range(start, len(t)):
        ch = t[i]
        if in_str:
            if esc:
                esc = False
            elif ch == "\\":
                esc = True
            elif ch == '"':
                in_str = False
            continue

        if ch == '"':
            in_str = True
            continue
        if ch == "{":
            if depth == 0:
                json_start = i
            depth += 1
            continue
        if ch == "}":
            if depth > 0:
                depth -= 1
                if depth == 0 and json_start is not None:
                    cand = t[json_start: i + 1]
                    return json.loads(cand)

    raise Exception("Failed to parse AI JSON")

def _parse_ai_textfull_only(raw: str) -> str:
    obj = _extract_first_json(raw)
    if not isinstance(obj, dict):
        raise Exception("AI JSON is not an object")
    txt = obj.get("aiTextFull")
    if txt is None:
        txt = obj.get("textFull")
    if txt is None:
        raise Exception("AI JSON missing aiTextFull")
    t = str(txt)
    if "\\n" in t and "\n" not in t:
        t = t.replace("\\n", "\n")
    t = t.replace("\r\n", "\n").replace("\r", "\n").strip()
    return t

def _parse_ai_textfull_text_only(raw: str) -> str:
    t = _strip_wrappers(raw)
    if not t:
        raise Exception("AI returned empty text")
    if t.lstrip().startswith("{"):
        return _parse_ai_textfull_only(t)
    if "\\n" in t and "\n" not in t:
        t = t.replace("\\n", "\n")
    t = re.sub(r"^aiTextFull\s*[:=]\s*", "", t, flags=re.IGNORECASE).strip()
    return t

def _budoux_parser_for_lang(lang: str):
    lang = _normalize_lang(lang)
    if not budoux:
        return None
    if lang == "th":
        return budoux.load_default_thai_parser()
    if lang == "ja":
        return budoux.load_default_japanese_parser()
    if lang in ("zh", "zh-hans", "zh_cn", "zh-cn", "zh_hans"):
        return budoux.load_default_simplified_chinese_parser()
    if lang in ("zh-hant", "zh_tw", "zh-tw", "zh_hant"):
        return budoux.load_default_traditional_chinese_parser()
    model_path = os.environ.get("BUDOUX_MODEL_PATH")
    if not model_path:
        return None
    with open(model_path, "r", encoding="utf-8") as f:
        model = json.load(f)
    return budoux.Parser(model)

def _ensure_box_fields(box: dict):
    if not isinstance(box, dict):
        return {}
    b = copy.deepcopy(box)
    if "rotation_deg" not in b:
        b["rotation_deg"] = 0.0
    if "rotation_deg_css" not in b:
        b["rotation_deg_css"] = 0.0
    if "center" not in b and all(k in b for k in ("left", "top", "width", "height")):
        b["center"] = {"x": b["left"] + b["width"] /
                       2.0, "y": b["top"] + b["height"]/2.0}
    if all(k in b for k in ("left", "top", "width", "height")):
        if "left_pct" not in b:
            b["left_pct"] = b["left"] * 100.0
        if "top_pct" not in b:
            b["top_pct"] = b["top"] * 100.0
        if "width_pct" not in b:
            b["width_pct"] = b["width"] * 100.0
        if "height_pct" not in b:
            b["height_pct"] = b["height"] * 100.0
    return b

def _tokens_with_spaces(text: str, parser, lang: str):
    t = (text or "")
    if not t:
        return []
    out = []
    parts = re.findall(r"\s+|\S+", t)
    for part in parts:
        if not part:
            continue
        if part.isspace():
            out.append(("space", part))
            continue
        segs = parser.parse(part) if parser else [part]
        for seg in segs:
            if seg:
                out.append(("word", seg))
    return out

def _line_cap_px_for_item(item: dict, img_w: int, img_h: int) -> float:
    p1 = item.get("baseline_p1") or {}
    p2 = item.get("baseline_p2") or {}
    dx = (float(p2.get("x") or 0.0) - float(p1.get("x") or 0.0)) * float(img_w)
    dy = (float(p2.get("y") or 0.0) - float(p1.get("y") or 0.0)) * float(img_h)
    cap = float(math.hypot(dx, dy))
    if cap > 1e-6:
        return cap
    b = _ensure_box_fields(item.get("box") or {})
    return float(b.get("width") or 0.0) * float(img_w)

def _wrap_tokens_to_lines_px(tokens, items, img_w: int, img_h: int, thai_font: str, latin_font: str, font_size: int, min_lines: int):
    max_lines = len(items)
    if max_lines <= 0:
        return []

    caps = [_line_cap_px_for_item(it, img_w, img_h) for it in items]
    desired = max(1, min(int(min_lines), max_lines))
    soft_factor = 0.90 if desired > 1 else 1.0

    lines = [[]]
    cur_w = 0.0
    li = 0

    last_word_hint = ""
    pending_space = ""

    tmp = Image.new("RGBA", (10, 10), (0, 0, 0, 0))
    dtmp = ImageDraw.Draw(tmp)

    def _measure_w(font, txt: str) -> float:
        try:
            return float(font.getlength(txt))
        except Exception:
            try:
                bb = dtmp.textbbox((0, 0), txt, font=font, anchor="ls")
                return float(bb[2] - bb[0])
            except Exception:
                w, _ = dtmp.textsize(txt, font=font)
                return float(w)

    def _cap_for_line(idx: int) -> float:
        return float(caps[min(idx, max_lines - 1)])

    for k, s in (tokens or []):
        if k == "space":
            if not lines[-1]:
                continue
            pending_space += str(s)
            continue

        if k != "word":
            continue

        txt = str(s)
        if not txt:
            continue

        font = pick_font(txt, thai_font, latin_font, int(font_size))
        w = _measure_w(font, txt)

        sw = 0.0
        if pending_space:
            hint = last_word_hint or txt
            font_s = pick_font(hint, thai_font, latin_font, int(font_size))
            sw = _measure_w(font_s, pending_space)

        cap = _cap_for_line(li)
        soft_cap = cap * soft_factor if (li < desired and cap > 0.0) else cap

        need_w = cur_w + sw + w
        if lines[-1] and li < max_lines - 1:
            if cap > 0.0 and need_w > cap:
                lines.append([])
                li += 1
                cur_w = 0.0
                pending_space = ""
                sw = 0.0
            elif soft_cap > 0.0 and need_w > soft_cap:
                lines.append([])
                li += 1
                cur_w = 0.0
                pending_space = ""
                sw = 0.0

        if pending_space and lines[-1]:
            lines[-1].append(("space", pending_space, sw))
            cur_w += sw
            pending_space = ""

        lines[-1].append(("word", txt, w))
        cur_w += w
        last_word_hint = txt

    if len(lines) > max_lines:
        head = lines[: max_lines - 1]
        tail = []
        for seg in lines[max_lines - 1:]:
            tail.extend(seg)
        lines = head + [tail]

    for i in range(len(lines)):
        while lines[i] and lines[i][0][0] == "space":
            lines[i] = lines[i][1:]
        while lines[i] and lines[i][-1][0] == "space":
            lines[i] = lines[i][:-1]

    return lines

def _ensure_min_lines_by_split(lines, min_lines: int, max_lines: int):
    if not lines:
        return []
    min_lines = int(min_lines)
    max_lines = int(max_lines)
    if min_lines <= 1:
        return lines

    target = min(min_lines, max_lines)
    lines = [list(seg) for seg in (lines or [])]

    def _trim(seg):
        while seg and seg[0][0] == "space":
            seg.pop(0)
        while seg and seg[-1][0] == "space":
            seg.pop()
        return seg

    while len(lines) < target:
        idx = None
        best = 0
        for i, seg in enumerate(lines):
            n_words = sum(1 for k, s, _ in seg if k == "word" and s != ZWSP)
            if n_words > best and n_words > 1:
                best = n_words
                idx = i
        if idx is None:
            break

        seg = lines[idx]
        word_pos = [i for i, (k, s, _) in enumerate(seg)
                    if k == "word" and s != ZWSP]
        if len(word_pos) <= 1:
            break
        cut_word = len(word_pos) // 2
        cut_pos = word_pos[cut_word]

        left = _trim(seg[:cut_pos])
        right = _trim(seg[cut_pos:])

        lines[idx] = left
        lines.insert(idx + 1, right)
        if len(lines) >= max_lines:
            break

    return lines

def _fit_para_size_and_lines(ptext: str, parser, items, img_w: int, img_h: int, thai_font: str, latin_font: str, base_size: int, min_lines: int, lang: str):
    tokens2 = _tokens_with_spaces(ptext, parser, lang)
    if not tokens2 or not items:
        return int(base_size), [[] for _ in range(len(items))]

    max_lines = len(items)
    n_words = 0
    for k, s in tokens2:
        if k == "word" and str(s):
            n_words += 1
    desired_lines = max(1, min(max_lines, n_words))
    size = max(10, int(base_size))

    heights = []
    for it in items:
        b = _ensure_box_fields(it.get("box") or {})
        heights.append(float(b.get("height") or 0.0) * float(img_h))

    while size >= 10:
        lines = _wrap_tokens_to_lines_px(
            tokens2, items, img_w, img_h, thai_font, latin_font, size, min_lines=desired_lines)
        lines = _ensure_min_lines_by_split(
            lines, min_lines=desired_lines, max_lines=max_lines)

        if len(lines) <= max_lines:
            ok = True
            for ii, seg in enumerate(lines):
                words = [s for k, s, _ in seg if k == "word" and s != ZWSP]
                if not words:
                    continue
                line_text = "".join(words)
                mline = _line_metrics_px(
                    line_text, thai_font, latin_font, size)
                if mline is None:
                    continue
                _, th, _ = mline
                if ii < len(heights) and heights[ii] > 0.0 and th > heights[ii] * 1.01:
                    ok = False
                    break
            if ok:
                return size, lines

        size -= 1

    lines10 = _wrap_tokens_to_lines_px(
        tokens2, items, img_w, img_h, thai_font, latin_font, 10, min_lines=desired_lines)
    lines10 = _ensure_min_lines_by_split(
        lines10, min_lines=desired_lines, max_lines=max_lines)
    return 10, lines10

def _pad_lines(lines, max_lines: int):
    max_lines = int(max_lines)
    if max_lines <= 0:
        return []
    lines = list(lines or [])
    if len(lines) > max_lines:
        return lines[:max_lines]
    if len(lines) < max_lines:
        lines.extend([[] for _ in range(max_lines - len(lines))])
    return lines

def _contains_thai(text: str) -> bool:
    for ch in (text or ""):
        if _is_thai_char(ch):
            return True
    return False

def _apply_line_to_item(
    item: dict,
    line_tokens,
    para_index: int,
    item_index: int,
    abs_line_start_raw: int,
    W: int,
    H: int,
    thai_path: str,
    latin_path: str,
    forced_size_px: int | None,
    apply_baseline_shift: bool = True,
    kerning_adjust: bool = False,
):
    tokens = []
    for t in (line_tokens or []):
        if not isinstance(t, (list, tuple)) or len(t) < 2:
            continue
        k = str(t[0])
        s = str(t[1])
        w = float(t[2]) if len(t) > 2 and isinstance(
            t[2], (int, float)) else 0.0
        tokens.append((k, s, w))

    words = [s for k, s, _ in tokens if k == "word" and s != ZWSP]
    item_text = "".join(s for _, s, _ in tokens if s != ZWSP).strip()
    item["text"] = item_text
    item["valid_text"] = bool(item_text)

    b = _ensure_box_fields(item.get("box") or {})
    item["box"] = b
    base_left = float(b.get("left") or 0.0)
    base_top = float(b.get("top") or 0.0)
    base_w = float(b.get("width") or 0.0)
    base_h = float(b.get("height") or 0.0)

    if not words or base_w <= 0.0 or base_h <= 0.0 or W <= 0 or H <= 0:
        item["spans"] = []
        return

    p1 = item.get("baseline_p1") or {}
    p2 = item.get("baseline_p2") or {}
    x1 = float(p1.get("x") or 0.0) * float(W)
    y1 = float(p1.get("y") or 0.0) * float(H)
    x2 = float(p2.get("x") or 0.0) * float(W)
    y2 = float(p2.get("y") or 0.0) * float(H)

    dx = x2 - x1
    dy = y2 - y1
    L = float(math.hypot(dx, dy))
    if L <= 1e-9:
        item["spans"] = []
        return

    ux = dx / L
    uy = dy / L
    nx = -uy
    ny = ux
    if ny < 0:
        nx, ny = -nx, -ny

    base_w_px = L
    base_h_px = base_h * float(H)

    base_size = 96

    widths_px = []
    max_ascent = 0
    max_descent = 0

    layout_units = []
    for k, s, _ in tokens:
        if s == ZWSP:
            continue
        if k == "space":
            layout_units.append(("space", _sanitize_draw_text(s)))
        elif k == "word":
            layout_units.append(("word", _sanitize_draw_text(s)))

    def _measure_len_px(font, text: str) -> float:
        try:
            return float(font.getlength(text))
        except Exception:
            tmp = Image.new("RGBA", (10, 10), (0, 0, 0, 0))
            dtmp = ImageDraw.Draw(tmp)
            try:
                bb = dtmp.textbbox((0, 0), text, font=font, anchor="ls")
                return float(bb[2] - bb[0])
            except Exception:
                w, _ = dtmp.textsize(text, font=font)
                return float(w)

    for i, (k, t) in enumerate(layout_units):
        if k == "space":
            hint = ""
            for j in range(i - 1, -1, -1):
                if layout_units[j][0] == "word":
                    hint = layout_units[j][1]
                    break
            if not hint:
                for j in range(i + 1, len(layout_units)):
                    if layout_units[j][0] == "word":
                        hint = layout_units[j][1]
                        break
            font0 = pick_font(hint or "a", thai_path, latin_path, base_size)
            widths_px.append(max(0.0, _measure_len_px(font0, t)))
            continue

        font0 = pick_font(t, thai_path, latin_path, base_size)
        try:
            ascent, descent = font0.getmetrics()
        except Exception:
            ascent, descent = base_size, int(base_size * 0.25)
        if ascent > max_ascent:
            max_ascent = ascent
        if descent > max_descent:
            max_descent = descent

        if kerning_adjust and (i + 1) < len(layout_units) and layout_units[i + 1][0] == "word":
            nxt = layout_units[i + 1][1]
            nxt1 = nxt[:1] if nxt else ""
            if nxt1 and (_contains_thai(t) == _contains_thai(nxt1)):
                tw = _measure_len_px(font0, t + nxt1) - \
                    _measure_len_px(font0, nxt1)
            else:
                tw = _measure_len_px(font0, t)
        else:
            tw = _measure_len_px(font0, t)

        widths_px.append(max(0.0, tw))

    line_tw = sum(widths_px)
    bo_base = _baseline_offset_px_for_text(
        item_text, thai_path, latin_path, base_size)
    if bo_base is not None:
        _, total_h_base = bo_base
        line_th = float(total_h_base)
    else:
        line_th = float(max_ascent + max_descent)

    if line_tw <= 1e-9 or line_th <= 1e-9:
        item["spans"] = []
        return

    if forced_size_px is None:
        scale_line = min((base_w_px * 1.0) / line_tw,
                         (base_h_px * 0.995) / line_th)
        if scale_line <= 0.0:
            item["spans"] = []
            return
        final_size = max(10, int(base_size * scale_line))
    else:
        final_size = int(max(10, forced_size_px))
        scale_line = float(final_size) / float(base_size)

    item["font_size_px"] = final_size

    w_scaled = [w * scale_line for w in widths_px]
    total_scaled = sum(w_scaled)
    margin_px = (base_w_px - total_scaled) / \
        2.0 if total_scaled < base_w_px else 0.0

    bo = _baseline_offset_px_for_text(
        item_text, thai_path, latin_path, final_size)
    if apply_baseline_shift and bo is not None:
        baseline_offset_px, _ = bo
        cx = (base_left + (base_w / 2.0)) * float(W)
        cy = (base_top + (base_h / 2.0)) * float(H)
        target = (cx + (baseline_offset_px * nx),
                  cy + (baseline_offset_px * ny))
        s = ((target[0] - x1) * nx) + ((target[1] - y1) * ny)
        x1 += nx * s
        y1 += ny * s
        x2 += nx * s
        y2 += ny * s

        item["baseline_p1"] = {"x": x1 / float(W), "y": y1 / float(H)}
        item["baseline_p2"] = {"x": x2 / float(W), "y": y2 / float(H)}

    raw_pos = 0
    span_i = 0
    unit_i = 0
    cum_px = 0.0
    spans = []

    for kind, s, _ in tokens:
        if s == ZWSP:
            continue

        start_raw = abs_line_start_raw + raw_pos
        raw_pos += len(s)
        end_raw = abs_line_start_raw + raw_pos

        if unit_i >= len(w_scaled):
            break

        wpx = w_scaled[unit_i]
        t0 = (margin_px + cum_px) / base_w_px
        cum_px += wpx
        t1 = (margin_px + cum_px) / base_w_px

        if kind == "space":
            unit_i += 1
            continue

        span_box = _ensure_box_fields({
            "left": base_left + (base_w * t0),
            "top": base_top,
            "width": base_w * (t1 - t0),
            "height": base_h,
            "rotation_deg": float(b.get("rotation_deg") or 0.0),
            "rotation_deg_css": float(b.get("rotation_deg_css") or 0.0),
        })

        spans.append({
            "side": "Ai",
            "para_index": para_index,
            "item_index": item_index,
            "span_index": span_i,
            "text": s,
            "valid_text": True,
            "start_raw": start_raw,
            "end_raw": end_raw,
            "t0_raw": t0,
            "t1_raw": t1,
            "box": span_box,
            "height_raw": item.get("height_raw"),
            "baseline_p1": item.get("baseline_p1"),
            "baseline_p2": item.get("baseline_p2"),
            "font_size_px": final_size,
        })
        span_i += 1
        unit_i += 1
    item["spans"] = spans

def patch(payload: dict, img_w: int, img_h: int, thai_font: str, latin_font: str, lang: str | None = None) -> dict:
    ai = payload.get("Ai") or {}
    ai_text_full = str(ai.get("aiTextFull") or "")
    template_tree = ai.get("aiTree") or {}
    if not isinstance(template_tree, dict):
        raise ValueError("Ai.aiTree template must be a dict")
    lang_norm = _normalize_lang(lang or LANG)
    parser = _budoux_parser_for_lang(lang_norm)

    out_tree = copy.deepcopy(template_tree)
    out_tree["side"] = "Ai"
    paragraphs = out_tree.get("paragraphs") or []

    ai_text_full_clean = ai_text_full

    def _extract_paras_by_markers(txt: str, expected: int) -> tuple[list[str], str, int] | None:
        if not txt or expected <= 0 or "<<TP_P" not in txt:
            return None
        matches = list(re.finditer(r"<<TP_P(\d+)>>", txt))
        if not matches:
            return None
        out: list[str] = [""] * expected
        for mi, m in enumerate(matches):
            try:
                idx = int(m.group(1))
            except Exception:
                continue
            seg_start = m.end()
            seg_end = matches[mi + 1].start() if (mi +
                                                  1) < len(matches) else len(txt)
            seg = (txt[seg_start:seg_end] or "").lstrip("\r\n").strip()
            if 0 <= idx < expected and not out[idx]:
                out[idx] = seg
        clean = "\n\n".join(out)
        return out, clean, len(matches)

    marked = _extract_paras_by_markers(ai_text_full, len(paragraphs))
    if marked is not None:
        ai_paras, ai_text_full_clean, _marker_count = marked
    else:
        ai_paras = ai_text_full.split("\n\n") if ai_text_full else []
        if len(ai_paras) < len(paragraphs):
            ai_paras = ai_paras + [""] * (len(paragraphs) - len(ai_paras))
        if len(ai_paras) > len(paragraphs):
            ai_paras = ai_paras[:len(paragraphs)]
        ai_text_full_clean = "\n\n".join(ai_paras)

    raw_cursor = 0
    for pi, (p, ptext) in enumerate(zip(paragraphs, ai_paras)):
        p["side"] = "Ai"
        p["para_index"] = int(p.get("para_index", pi))
        items = p.get("items") or []
        max_lines = len(items)
        if max_lines <= 0:
            continue

        base_size_ref = None
        if isinstance(p.get("para_font_size_px"), int) and int(p.get("para_font_size_px")) > 0:
            base_size_ref = int(p.get("para_font_size_px"))
        else:
            ref_sizes = []
            for it in items:
                fs = it.get("font_size_px")
                if isinstance(fs, int) and fs > 0:
                    ref_sizes.append(fs)
            if ref_sizes:
                base_size_ref = min(ref_sizes)

        base_size = int(base_size_ref or 96)
        min_lines = int(max_lines)

        para_size, lines = _fit_para_size_and_lines(
            ptext,
            parser,
            items,
            img_w,
            img_h,
            thai_font,
            latin_font,
            base_size,
            min_lines=min_lines,
            lang=lang_norm,
        )
        lines = _pad_lines(lines, max_lines)
        p["para_font_size_px"] = int(para_size)

        p["text"] = ptext
        p["valid_text"] = bool(ptext)
        p["start_raw"] = raw_cursor
        p["end_raw"] = raw_cursor + len(ptext)

        line_start = raw_cursor
        for ii in range(max_lines):
            it = items[ii]
            it["side"] = "Ai"
            it["para_index"] = pi
            it["item_index"] = ii
            _apply_line_to_item(
                it,
                (lines[ii] if ii < len(lines) else []),
                pi,
                ii,
                line_start,
                img_w,
                img_h,
                thai_font,
                latin_font,
                para_size,
                apply_baseline_shift=True,
                kerning_adjust=True,
            )
            line_raw_len = sum(len(s) for k, s, w in (
                lines[ii] if ii < len(lines) else []) if s != ZWSP)
            line_start += line_raw_len
        raw_cursor = p["end_raw"] + 2

    return {"Ai": {"aiTextFull": ai_text_full_clean, "aiTree": out_tree}}

def _uniformize_ai_item_span_font_size(item: dict, img_w: int, img_h: int, thai_font: str, latin_font: str):
    spans = item.get("spans") or []
    if not spans or img_w <= 0 or img_h <= 0:
        return

    base_size = item.get("font_size_px")
    try:
        base_size = int(base_size) if base_size is not None else None
    except Exception:
        base_size = None

    if not base_size:
        for sp in spans:
            fs = sp.get("font_size_px") if isinstance(sp, dict) else None
            if isinstance(fs, int) and fs > 0:
                base_size = fs
                break

    if not base_size or base_size <= 0:
        return

    tmp = Image.new("RGBA", (10, 10), (0, 0, 0, 0))
    dtmp = ImageDraw.Draw(tmp)
    font_cache = {}

    def _font_for(text: str, size: int):
        key = (int(size), 1 if _contains_thai(text) else 0)
        f = font_cache.get(key)
        if f:
            return f
        f = pick_font(text, thai_font, latin_font, int(size))
        font_cache[key] = f
        return f

    min_size = int(base_size)

    for sp in spans:
        if not isinstance(sp, dict):
            continue
        txt = _sanitize_draw_text(sp.get("text") or "")
        if txt.strip() == "":
            continue

        b = sp.get("box") or {}
        aw = float(b.get("width") or 0.0) * float(img_w)
        ah = float(b.get("height") or 0.0) * float(img_h)
        if aw <= 0.0 or ah <= 0.0:
            continue

        font = _font_for(txt, base_size)
        try:
            bb = dtmp.textbbox((0, 0), txt, font=font, anchor="ls")
            tw = float(bb[2] - bb[0])
            th = float(bb[3] - bb[1])
        except Exception:
            tw, th = dtmp.textsize(txt, font=font)
            tw = float(tw)
            th = float(th)

        if tw <= 0.0 or th <= 0.0:
            continue

        s = min((aw * 0.995) / tw, (ah * 0.995) / th)
        if s < 1.0:
            req = max(10, int(base_size * s))
            if req < min_size:
                min_size = req

    if min_size != base_size:
        item["font_size_px"] = int(min_size)
        for sp in spans:
            if isinstance(sp, dict):
                sp["font_size_px"] = int(min_size)

def _rebuild_ai_spans_after_font_resize(ai_tree: dict, img_w: int, img_h: int, thai_font: str, latin_font: str, lang: str | None = None):
    if not ai_tree or img_w <= 0 or img_h <= 0:
        return
    lang_norm = _normalize_lang(lang or LANG)
    parser = _budoux_parser_for_lang(lang_norm)
    for pi, p in _iter_paragraphs(ai_tree):
        items = p.get("items") or []
        for ii, it in enumerate(items):
            txt = _item_line_text(it)
            if not str(txt).strip():
                it["spans"] = []
                continue
            tokens = _tokens_with_spaces(str(txt), parser, lang_norm)
            line_tokens = [(k, s, 0.0) for k, s in tokens]

            forced = it.get("font_size_px") or p.get("para_font_size_px")
            if isinstance(forced, float):
                forced = int(forced)
            elif isinstance(forced, str) and forced.strip().isdigit():
                forced = int(forced.strip())

            _apply_line_to_item(
                it,
                line_tokens,
                int(p.get("para_index", pi)),
                int(it.get("item_index", ii)),
                int(it.get("start_raw", 0)),
                img_w,
                img_h,
                thai_font,
                latin_font,
                forced,
                apply_baseline_shift=False,
                kerning_adjust=True,
            )
            _uniformize_ai_item_span_font_size(
                it, img_w, img_h, thai_font, latin_font)

def ai_translate_original_text(original_text_full: str, target_lang: str):
    provider, api_key, model, base_url = _resolve_ai_config()
    if not api_key:
        raise Exception("AI_API_KEY is required for AI translation")

    lang = _normalize_lang(target_lang)
    prompt_sig = _sha1(
        json.dumps(
            {
                "sys": AI_PROMPT_SYSTEM_BASE,
                "contract": _active_ai_contract(),
                "data": _active_ai_data_template(),
                "style": AI_LANG_STYLE.get(lang) or AI_LANG_STYLE.get("default") or "",
            },
            ensure_ascii=False,
        )
    )

    cache = None
    cache_key = None
    if AI_CACHE:
        cache = _load_ai_cache(AI_CACHE_PATH)
        cache_key = _sha1(
            json.dumps(
                {"provider": provider, "m": model, "u": base_url,
                    "l": lang, "p": prompt_sig, "t": original_text_full},
                ensure_ascii=False,
            )
        )
        if cache_key in cache:
            cached = cache[cache_key]
            if lang == "th" and cached:
                t = str(cached.get("aiTextFull") or "")
                if t:
                    t2 = re.sub(
                        r"(?:(?<=^)|(?<=[\s\"'“”‘’()\[\]{}<>]))\u0e19\u0e32\u0e22(?=(?:\s|$))", "", t)
                    t2 = re.sub(r"[ \t]{2,}", " ", t2)
                    t2 = re.sub(r"^[ \t]+", "", t2, flags=re.MULTILINE)
                    if t2 != t:
                        cached = dict(cached)
                        cached["aiTextFull"] = t2
                        cache[cache_key] = cached
                        _save_ai_cache(AI_CACHE_PATH, cache)
            return cached

    system_text, user_parts = _build_ai_prompt_packet(lang, original_text_full)

    started = time.time()
    used_model = model
    if provider == "gemini":
        raw = _gemini_generate_json(api_key, model, system_text, user_parts)
    elif provider == "anthropic":
        raw = _anthropic_generate_json(api_key, model, system_text, user_parts)
    else:
        raw, used_model = _openai_compat_generate_json(
            api_key, base_url, model, system_text, user_parts)

    ai_text_full = _parse_ai_textfull_only(
        raw) if DO_AI_JSON else _parse_ai_textfull_text_only(raw)

    if lang == "th" and ai_text_full:
        ai_text_full = re.sub(
            r"(?:(?<=^)|(?<=[\s\"'“”‘’()\[\]{}<>]))\u0e19\u0e32\u0e22(?=(?:\s|$))", "", ai_text_full)
        ai_text_full = re.sub(r"[ \t]{2,}", " ", ai_text_full)
        ai_text_full = re.sub(r"^[ \t]+", "", ai_text_full, flags=re.MULTILINE)

    result = {
        "aiTextFull": ai_text_full,
        "meta": {"model": used_model, "provider": provider, "base_url": base_url, "latency_sec": round(time.time() - started, 3)},
    }
    if AI_CACHE and cache is not None and cache_key is not None:
        cache[cache_key] = result
        _save_ai_cache(AI_CACHE_PATH, cache)
    return result

def to_translated(u, lang="th"):
    q = parse_qs(urlparse(u).query)
    return "https://lens.google.com/translatedimage?" + urlencode(
        dict(
            vsrid=q["vsrid"][0],
            gsessionid=q["gsessionid"][0],
            sl="auto",
            tl=lang,
            se=1,
            ib="1",
        )
    )

def _b64pad(s: str) -> str:
    return s + "=" * ((4 - (len(s) % 4)) % 4)

def decode_imageurl_to_datauri(imageUrl: str):
    if not imageUrl:
        return None
    if isinstance(imageUrl, str) and imageUrl.startswith("data:image") and "base64," in imageUrl:
        return imageUrl
    for fn in (base64.b64decode, base64.urlsafe_b64decode):
        try:
            b = fn(_b64pad(imageUrl))
            try:
                t = b.decode("utf-8")
            except Exception:
                t = b.decode("utf-8", errors="ignore")
            if "data:image" in t and "base64," in t:
                i = t.find("data:image")
                return t[i:].strip() if i >= 0 else t.strip()
        except Exception:
            pass
    return None

def read_varint(buf, i):
    shift = 0
    result = 0
    while True:
        if i >= len(buf):
            raise ValueError("eof varint")
        b = buf[i]
        i += 1
        result |= ((b & 0x7F) << shift)
        if (b & 0x80) == 0:
            return result, i
        shift += 7
        if shift > 70:
            raise ValueError("varint too long")

def parse_proto(buf, start=0, end=None):
    if end is None:
        end = len(buf)
    i = start
    out = []
    while i < end:
        key, i = read_varint(buf, i)
        field = key >> 3
        wire = key & 7
        if wire == 0:
            val, i = read_varint(buf, i)
            out.append((field, wire, val))
        elif wire == 1:
            val = buf[i: i + 8]
            i += 8
            out.append((field, wire, val))
        elif wire == 2:
            l, i = read_varint(buf, i)
            val = buf[i: i + l]
            i += l
            out.append((field, wire, val))
        elif wire == 5:
            val = buf[i: i + 4]
            i += 4
            out.append((field, wire, val))
        else:
            raise ValueError(f"wiretype {wire}")
    return out

def b2f(b4):
    return struct.unpack("<f", b4)[0]

def b2hex(b):
    return b.hex()

def _get_float_field(msg_fields, field_num):
    for f, w, v in msg_fields:
        if f == field_num and w == 5:
            return b2f(v)
    return None

def _get_points_from_geom(geom_bytes):
    pts = []
    height = None
    geom_fields = parse_proto(geom_bytes)
    for f, w, v in geom_fields:
        if f == 1 and w == 2:
            p_fields = parse_proto(v)
            x = _get_float_field(p_fields, 1)
            y = _get_float_field(p_fields, 2)
            if x is not None and y is not None:
                pts.append((x, y))
        if f == 3 and w == 5:
            height = b2f(v)
    if len(pts) >= 2 and height is not None:
        return pts[0], pts[1], height
    return None, None, None

def _looks_like_geom(geom_bytes):
    geom_fields = parse_proto(geom_bytes)
    pts = 0
    has_height = False
    for f, w, v in geom_fields:
        if f == 1 and w == 2:
            p_fields = parse_proto(v)
            if _get_float_field(p_fields, 1) is not None and _get_float_field(p_fields, 2) is not None:
                pts += 1
        elif f == 3 and w == 5:
            has_height = True
    return pts >= 2 and has_height

def _looks_like_span(span_bytes):
    span_fields = parse_proto(span_bytes)
    has_t = False
    has_range = False
    for f, w, v in span_fields:
        if f in (3, 4) and w == 5:
            has_t = True
        elif f in (1, 2) and w == 0:
            has_range = True
    return has_t and has_range

def _is_item_message(msg_bytes):
    fields = parse_proto(msg_bytes)
    geom_ok = False
    span_ok = 0
    for f, w, v in fields:
        if f == 1 and w == 2 and not geom_ok:
            geom_ok = _looks_like_geom(v)
        elif f == 2 and w == 2:
            if _looks_like_span(v):
                span_ok += 1
    return geom_ok and span_ok > 0

def _extract_items_from_paragraph(par_bytes):
    top = parse_proto(par_bytes)
    items = []
    for _, w, v in top:
        if w == 2 and _is_item_message(v):
            items.append(v)
    if items:
        return items
    items = []
    seen = set()
    nodes = 0

    def walk(buf, depth):
        nonlocal nodes
        if depth >= 4 or nodes > 20000:
            return
        for _, w, v in parse_proto(buf):
            if w != 2:
                continue
            nodes += 1
            if nodes > 20000:
                return
            if _is_item_message(v):
                if v in seen:
                    continue
                seen.add(v)
                items.append(v)
            else:
                walk(v, depth + 1)
    walk(par_bytes, 0)
    return items

def _extract_item_geom_spans(item_bytes):
    fields = parse_proto(item_bytes)
    geom_bytes = None
    spans_bytes = []
    for f, w, v in fields:
        if f == 1 and w == 2:
            geom_bytes = v
        if f == 2 and w == 2:
            spans_bytes.append(v)
    return geom_bytes, spans_bytes

def _extract_span(span_bytes):
    span_fields = parse_proto(span_bytes)
    start = None
    end = None
    t0 = None
    t1 = None
    for f, w, v in span_fields:
        if f == 1 and w == 0:
            start = int(v)
        elif f == 2 and w == 0:
            end = int(v)
        elif f == 3 and w == 5:
            t0 = b2f(v)
        elif f == 4 and w == 5:
            t1 = b2f(v)
    return start, end, t0, t1, span_fields

def _normalize_angle_deg(angle_deg):
    while angle_deg <= -180.0:
        angle_deg += 360.0
    while angle_deg > 180.0:
        angle_deg -= 360.0
    if angle_deg < -90.0:
        angle_deg += 180.0
    if angle_deg > 90.0:
        angle_deg -= 180.0
    return angle_deg

def _slice_text(full_text, start, end):
    if start is None or end is None:
        return ""
    if start < 0 or end < 0 or start > end or end > len(full_text):
        return ""
    return full_text[start:end]

def _range_min_max(ranges):
    if not ranges:
        return None, None
    s = min(r[0] for r in ranges)
    e = max(r[1] for r in ranges)
    return s, e

def decode_tree(paragraphs_b64, full_text, side, img_w, img_h, want_raw=True):
    raw_dump = []
    paragraphs = []

    cursor = 0

    for para_index, b64s in enumerate(paragraphs_b64):
        par_bytes = base64.b64decode(b64s)
        if want_raw:
            raw_dump.append({"para_index": para_index,
                            "b64": b64s, "bytes_hex": b2hex(par_bytes)})

        item_msgs = _extract_items_from_paragraph(par_bytes)
        items = []
        para_ranges = []
        para_bounds = None

        for item_index, item_bytes in enumerate(item_msgs):
            geom_bytes, spans_bytes = _extract_item_geom_spans(item_bytes)
            if geom_bytes is None:
                continue

            p1, p2, height_norm = _get_points_from_geom(geom_bytes)
            if p1 is None or p2 is None or height_norm is None:
                continue

            x1n, y1n = p1
            x2n, y2n = p2
            x1 = x1n * img_w
            y1 = y1n * img_h
            x2 = x2n * img_w
            y2 = y2n * img_h

            dx = x2 - x1
            dy = y2 - y1
            if dx < 0 or (abs(dx) < 1e-12 and dy < 0):
                x1, y1, x2, y2 = x2, y2, x1, y1
                x1n, y1n, x2n, y2n = x2n, y2n, x1n, y1n
                dx = x2 - x1
                dy = y2 - y1

            L = math.hypot(dx, dy)
            if L <= 1e-12:
                continue

            ux = dx / L
            uy = dy / L

            angle_deg_raw = math.degrees(math.atan2(dy, dx))
            angle_deg = _normalize_angle_deg(angle_deg_raw)

            angle_deg_css = angle_deg

            height_px = height_norm * img_h

            item_spans = []
            item_ranges = []
            item_bounds = None

            for span_index, sb in enumerate(spans_bytes):
                start, end, t0, t1, _ = _extract_span(sb)

                if start is None:
                    start = cursor
                else:
                    cursor = max(cursor, start)
                if end is None:
                    continue
                cursor = max(cursor, end)

                if t0 is None and t1 is None:
                    continue
                if t0 is None:
                    t0 = 0.0
                if t1 is None:
                    t1 = 1.0

                valid_text = False
                span_text = ""
                if start is not None and end is not None and 0 <= start <= end <= len(full_text):
                    span_text = full_text[start:end]
                    valid_text = span_text.strip() != ""
                    if valid_text:
                        item_ranges.append((start, end))

                e1x = x1 + ux * (t0 * L)
                e1y = y1 + uy * (t0 * L)
                e2x = x1 + ux * (t1 * L)
                e2y = y1 + uy * (t1 * L)

                cx = (e1x + e2x) / 2.0
                cy = (e1y + e2y) / 2.0

                width_px = abs(t1 - t0) * L
                left_px = cx - width_px / 2.0
                top_px = cy - height_px / 2.0

                left = left_px / img_w
                top = top_px / img_h
                width = width_px / img_w
                height = height_px / img_h

                span_node = {
                    "side": side,
                    "para_index": para_index,
                    "item_index": item_index,
                    "span_index": span_index,
                    "start_raw": start,
                    "end_raw": end,
                    "t0_raw": t0,
                    "t1_raw": t1,
                    "height_raw": height_norm,
                    "baseline_p1": {"x": x1n, "y": y1n},
                    "baseline_p2": {"x": x2n, "y": y2n},
                    "box": {
                        "left": left,
                        "top": top,
                        "width": width,
                        "height": height,
                        "rotation_deg": angle_deg,
                        "rotation_deg_css": angle_deg_css,
                        "center": {"x": cx / img_w, "y": cy / img_h},
                        "left_pct": left * 100.0,
                        "top_pct": top * 100.0,
                        "width_pct": width * 100.0,
                        "height_pct": height * 100.0,
                    },
                    "text": span_text,
                    "valid_text": valid_text,
                }

                quad = _token_box_quad_px(span_node, img_w, img_h, pad_px=0)
                if quad:
                    xs = [p[0] for p in quad]
                    ys = [p[1] for p in quad]
                    b = (min(xs), min(ys), max(xs), max(ys))
                    item_bounds = b if item_bounds is None else (min(item_bounds[0], b[0]), min(
                        item_bounds[1], b[1]), max(item_bounds[2], b[2]), max(item_bounds[3], b[3]))
                    item_bounds = item_bounds
                item_spans.append(span_node)

            s0, s1 = _range_min_max(item_ranges)
            item_text = _slice_text(
                full_text, s0, s1).strip() if s0 is not None else ""
            item_valid_text = item_text.strip() != ""
            if s0 is not None:
                para_ranges.append((s0, s1))

            cx = (x1 + x2) / 2.0
            cy = (y1 + y2) / 2.0
            left_px = cx - L / 2.0
            top_px = cy - height_px / 2.0

            item_box = {
                "left": left_px / img_w,
                "top": top_px / img_h,
                "width": L / img_w,
                "height": height_px / img_h,
                "rotation_deg": angle_deg,
                "rotation_deg_css": angle_deg_css,
                "center": {"x": cx / img_w, "y": cy / img_h},
            }

            if item_bounds is not None:
                para_bounds = item_bounds if para_bounds is None else (min(para_bounds[0], item_bounds[0]), min(
                    para_bounds[1], item_bounds[1]), max(para_bounds[2], item_bounds[2]), max(para_bounds[3], item_bounds[3]))

            items.append(
                {
                    "side": side,
                    "para_index": para_index,
                    "item_index": item_index,
                    "start_raw": s0,
                    "end_raw": s1,
                    "text": item_text,
                    "valid_text": item_valid_text,
                    "height_raw": height_norm,
                    "baseline_p1": {"x": x1n, "y": y1n},
                    "baseline_p2": {"x": x2n, "y": y2n},
                    "box": item_box,
                    "bounds_px": item_bounds,
                    "spans": item_spans,
                }
            )

        p0, p1 = _range_min_max(para_ranges)
        para_text = _slice_text(
            full_text, p0, p1).strip() if p0 is not None else ""
        para_valid_text = para_text.strip() != ""
        paragraphs.append(
            {
                "side": side,
                "para_index": para_index,
                "start_raw": p0,
                "end_raw": p1,
                "text": para_text,
                "valid_text": para_valid_text,
                "bounds_px": para_bounds,
                "items": items,
            }
        )

    tree = {"side": side, "paragraphs": paragraphs}
    return tree, raw_dump

def flatten_tree_spans(tree):
    spans = []
    for p in tree.get("paragraphs") or []:
        for it in p.get("items") or []:
            for sp in it.get("spans") or []:
                spans.append(sp)
    return spans

def flatten_tree_items_as_tokens(tree, img_w, img_h):
    toks = []
    for p in tree.get("paragraphs") or []:
        for it in p.get("items") or []:
            t = {
                "side": it["side"],
                "para_index": it["para_index"],
                "item_index": it["item_index"],
                "span_index": -1,
                "start_raw": it.get("start_raw"),
                "end_raw": it.get("end_raw"),
                "t0_raw": 0.0,
                "t1_raw": 1.0,
                "height_raw": it.get("height_raw"),
                "baseline_p1": it.get("baseline_p1"),
                "baseline_p2": it.get("baseline_p2"),
                "box": it.get("box"),
                "text": it.get("text") or "",
                "valid_text": it.get("valid_text", False),
            }
            toks.append(t)
    return toks

def _mean_angle_deg(angles_deg):
    vals = [a for a in (angles_deg or []) if a is not None]
    if not vals:
        return 0.0
    xs = [math.cos(math.radians(a)) for a in vals]
    ys = [math.sin(math.radians(a)) for a in vals]
    return math.degrees(math.atan2(sum(ys) / len(ys), sum(xs) / len(xs)))

def _rotate_xy(x, y, cos_a, sin_a):
    return (x * cos_a - y * sin_a, x * sin_a + y * cos_a)

def _para_obb_quad_px(para_node, W, H):
    items = para_node.get("items") or []
    if not items:
        return None

    angles = []
    pts = []
    for it in items:
        b = (it.get("box") or {})
        angles.append(b.get("rotation_deg", 0.0))
        q = _token_box_quad_px(it, W, H, pad_px=0)
        if q:
            pts.extend(q)

    if len(pts) < 4:
        return None

    ang = _mean_angle_deg(angles)
    cos_a = math.cos(math.radians(ang))
    sin_a = math.sin(math.radians(ang))
    cos_n = cos_a
    sin_n = -sin_a

    rpts = [_rotate_xy(x, y, cos_n, sin_n) for (x, y) in pts]
    xs = [p[0] for p in rpts]
    ys = [p[1] for p in rpts]
    minx, maxx = min(xs), max(xs)
    miny, maxy = min(ys), max(ys)
    corners = [(minx, miny), (maxx, miny), (maxx, maxy), (minx, maxy)]
    return [_rotate_xy(x, y, cos_a, sin_a) for (x, y) in corners]

def build_level_outlines(tree, W, H):
    outlines = []
    if not tree:
        return outlines

    if DRAW_OUTLINE_PARA:
        for para in tree.get("paragraphs") or []:
            q = _para_obb_quad_px(para, W, H)
            if q:
                outlines.append(
                    {"quad": q, "color": PARA_OUTLINE, "width": PARA_OUTLINE_WIDTH})

    if DRAW_OUTLINE_ITEM:
        for itok in flatten_tree_items_as_tokens(tree, W, H):
            q = _token_box_quad_px(itok, W, H, pad_px=0)
            if q:
                outlines.append(
                    {"quad": q, "color": ITEM_OUTLINE, "width": ITEM_OUTLINE_WIDTH})

    return outlines

def tokens_to_html(tokens, container_class="RTMDre"):
    parts = []
    parts.append(f'<div class="{container_class}">')
    for t in tokens:
        if not t.get("valid_text"):
            continue
        b = t["box"]
        aria = (t.get("text") or "").replace('"', "&quot;").replace("\n", " ")
        wi = t.get("wi", 0)
        rot = b.get("rotation_deg_css", b.get("rotation_deg", 0.0))
        fs = t.get("font_size_px") or b.get("font_size_px")
        lh = None
        if fs:
            try:
                lh = max(1, int(round(float(fs) * 1.05)))
            except Exception:
                lh = None
        style = (
            f'top: calc({b["top_pct"]}%); '
            f'left: calc({b["left_pct"]}%); '
            f'width: calc({b["width_pct"]}%); '
            f'height: calc({b["height_pct"]}%); '
            f"transform: rotate({rot}deg);"
        )
        if fs:
            style += f" font-size: {float(fs):.4g}px;"
        if lh:
            style += f" line-height: {lh}px;"
        parts.append(
            f'<div class="IwqbBf" aria-label="{aria}" data-wi="{wi}" role="button" tabindex="-1" style="{style}"></div>'
        )
    parts.append("</div>")
    return "".join(parts)

def tp_overlay_css():
    return (
        ".tp-draw-root{position:absolute;inset:0;pointer-events:none;}"
        ".tp-draw-scope{position:absolute;left:0;top:0;transform-origin:0 0;}"
        ".tp-para{position:absolute;left:0;top:0;}"
        ".tp-item{position:absolute;left:0;top:0;display:flex;align-items:center;justify-content:center;"
        "white-space:pre;pointer-events:none;box-sizing:border-box;overflow:visible;"
        "font-family:var(--tp-font,system-ui);font-weight:500;"
        "color:var(--tp-fg,rgba(20,20,20,.98));"
        "text-shadow:0 0 2px rgba(255,255,255,.90),0 0 2px rgba(0,0,0,.60),0 1px 1px rgba(0,0,0,.35);}"
        ".tp-item>span{display:inline-block;white-space:pre;transform-origin:center;"
        "padding:0;border-radius:3px;"
        "background:var(--tp-bg,rgba(255,255,255,.65));"
        "box-decoration-break:clone;-webkit-box-decoration-break:clone;}"
        ".tp-item[data-wrap='1'],.tp-item[data-wrap='1']>span{white-space:pre-wrap;word-break:break-word;}"
        ".tp-item[data-wrap='1']>span{text-align:center;}"
    )

def _tp_norm_list(v):
    if isinstance(v, list):
        return v
    if isinstance(v, dict):
        try:
            return [v[k] for k in sorted(v.keys(), key=lambda x: int(x) if str(x).isdigit() else str(x))]
        except Exception:
            return list(v.values())
    return []

def _tp_num(x):
    try:
        n = float(x)
        return n if math.isfinite(n) else None
    except Exception:
        return None

def _tp_escape_text(s: str) -> str:
    if not s:
        return ""
    s = s.replace("\r", "")
    s = s.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
    return s

def _tp_get_rect(obj: dict, base_w: float, base_h: float):
    if not isinstance(obj, dict):
        return None
    box = obj.get("box") if isinstance(obj.get("box"), dict) else {}

    l0 = _tp_num(box.get("left"))
    t0 = _tp_num(box.get("top"))
    w0 = _tp_num(box.get("width"))
    h0 = _tp_num(box.get("height"))
    if None not in (l0, t0, w0, h0) and w0 > 0 and h0 > 0:
        l = l0 * base_w
        t = t0 * base_h
        r = (l0 + w0) * base_w
        b = (t0 + h0) * base_h
        deg = _tp_num(box.get("rotation_deg_css"))
        if deg is None:
            deg = _tp_num(box.get("rotation_deg"))
        return {"l": l, "t": t, "r": r, "b": b, "deg": deg or 0.0}

    lp = _tp_num(box.get("left_pct"))
    tp = _tp_num(box.get("top_pct"))
    wp = _tp_num(box.get("width_pct"))
    hp = _tp_num(box.get("height_pct"))
    if None not in (lp, tp, wp, hp) and wp > 0 and hp > 0:
        l0p = lp / 100.0
        t0p = tp / 100.0
        w0p = wp / 100.0
        h0p = hp / 100.0
        l = l0p * base_w
        t = t0p * base_h
        r = (l0p + w0p) * base_w
        b = (t0p + h0p) * base_h
        deg = _tp_num(box.get("rotation_deg_css"))
        if deg is None:
            deg = _tp_num(box.get("rotation_deg"))
        return {"l": l, "t": t, "r": r, "b": b, "deg": deg or 0.0}

    bpx = obj.get("bounds_px")
    if isinstance(bpx, list) and len(bpx) == 4:
        l = _tp_num(bpx[0])
        t = _tp_num(bpx[1])
        r = _tp_num(bpx[2])
        bb = _tp_num(bpx[3])
        if None not in (l, t, r, bb) and r > l and bb > t:
            return {"l": l, "t": t, "r": r, "b": bb, "deg": 0.0}
    return None

def _tp_union_rect(items: list, base_w: float, base_h: float):
    l = float("inf")
    t = float("inf")
    r = float("-inf")
    b = float("-inf")
    for it in items:
        bx = _tp_get_rect(it, base_w, base_h)
        if not bx:
            continue
        l = min(l, bx["l"])
        t = min(t, bx["t"])
        r = max(r, bx["r"])
        b = max(b, bx["b"])
    if not math.isfinite(l) or not math.isfinite(t) or not math.isfinite(r) or not math.isfinite(b):
        return None
    return {"l": l, "t": t, "r": r, "b": b, "deg": 0.0}

def _tp_mean_item_deg(items: list, base_w: float, base_h: float) -> float:
    angles = []
    for it in items or []:
        bx = _tp_get_rect(it, base_w, base_h)
        if not bx:
            continue
        a = _tp_num(bx.get("deg"))
        if a is None:
            continue
        angles.append(float(a))
    if not angles:
        return 0.0
    return float(_mean_angle_deg(angles))

def _tp_oriented_rect_from_points(pts: list, para_deg: float) -> dict | None:
    if len(pts) < 2:
        return None

    ang = float(para_deg or 0.0)
    if not math.isfinite(ang):
        ang = 0.0

    rad_n = math.radians(-ang)
    cn = math.cos(rad_n)
    sn = math.sin(rad_n)
    rpts = [(x * cn - y * sn, x * sn + y * cn) for x, y in pts]
    xs = [p[0] for p in rpts]
    ys = [p[1] for p in rpts]
    minx, maxx = min(xs), max(xs)
    miny, maxy = min(ys), max(ys)

    w = float(maxx - minx)
    h = float(maxy - miny)
    if w <= 0.0 or h <= 0.0:
        return None

    cx0 = float((minx + maxx) / 2.0)
    cy0 = float((miny + maxy) / 2.0)
    rad_a = math.radians(ang)
    ca = math.cos(rad_a)
    sa = math.sin(rad_a)
    cx = (cx0 * ca) - (cy0 * sa)
    cy = (cx0 * sa) + (cy0 * ca)

    l = cx - (w / 2.0)
    t = cy - (h / 2.0)
    return {"l": float(l), "t": float(t), "r": float(l + w), "b": float(t + h), "deg": float(ang)}

def _tp_rect_corners(l: float, t: float, r: float, b: float, deg: float) -> list:
    w = float(r - l)
    h = float(b - t)
    if w <= 0.0 or h <= 0.0:
        return []
    cx = float((l + r) / 2.0)
    cy = float((t + b) / 2.0)
    hw = w / 2.0
    hh = h / 2.0
    rad = math.radians(float(deg or 0.0))
    c = math.cos(rad)
    s = math.sin(rad)
    out = []
    for x, y in ((-hw, -hh), (hw, -hh), (hw, hh), (-hw, hh)):
        rx = (x * c) - (y * s)
        ry = (x * s) + (y * c)
        out.append((cx + rx, cy + ry))
    return out

def _tp_para_rect_from_items(items: list, base_w: float, base_h: float, para_deg: float) -> dict | None:
    if not items:
        return None

    pts = []
    for it in items:
        ibx = _tp_get_rect(it, base_w, base_h)
        if not ibx:
            continue
        w = float(ibx["r"] - ibx["l"])
        h = float(ibx["b"] - ibx["t"])
        if w <= 0.0 or h <= 0.0:
            continue
        deg = float(ibx.get("deg") or 0.0)
        cx = float(ibx["l"] + w / 2.0)
        cy = float(ibx["t"] + h / 2.0)
        hw = w / 2.0
        hh = h / 2.0
        rad = math.radians(deg)
        c = math.cos(rad)
        s = math.sin(rad)
        for x, y in ((-hw, -hh), (hw, -hh), (hw, hh), (-hw, hh)):
            rx = (x * c) - (y * s)
            ry = (x * s) + (y * c)
            pts.append((cx + rx, cy + ry))

    return _tp_oriented_rect_from_points(pts, para_deg)

def _tp_extract_item_text(it: dict) -> str:
    if not isinstance(it, dict):
        return ""
    for k in (
        "text",
        "translated_text",
        "translatedText",
        "ai_text",
        "aiText",
        "display_text",
        "displayText",
    ):
        v = it.get(k)
        if isinstance(v, str) and v:
            return v
    spans = _tp_norm_list(it.get("spans"))
    if spans:
        return "".join(s.get("text") if isinstance(s, dict) and isinstance(s.get("text"), str) else "" for s in spans)
    return ""

def ai_tree_to_tp_html(tree: dict, base_w: int, base_h: int) -> str:
    base_w = int(base_w or 0)
    base_h = int(base_h or 0)
    if base_w <= 0 or base_h <= 0:
        return ""
    paras = _tp_norm_list(tree.get("paragraphs")
                          if isinstance(tree, dict) else None)
    if not paras:
        return ""

    parts = [
        f'<div class="tp-draw-scope" style="width: {base_w}px; height: {base_h}px;">']
    for pi, p in enumerate(paras):
        if not isinstance(p, dict):
            continue
        items = _tp_norm_list(p.get("items"))
        if len(items) > 1 and any(isinstance(x, dict) and _tp_num(x.get("item_index")) is not None for x in items):
            items = sorted(
                items,
                key=lambda x: _tp_num(
                    x.get("item_index")) if isinstance(x, dict) else 0.0,
            )

        para_idx = int(_tp_num(p.get("para_index")) or pi)
        pbx = _tp_get_rect(p, base_w, base_h) or _tp_union_rect(
            items, base_w, base_h)
        if not pbx:
            continue

        para_deg = float(pbx.get("deg") or 0.0)
        if abs(para_deg) <= 0.01:
            derived = _tp_mean_item_deg(items, base_w, base_h)
            if abs(derived) > 0.01:
                pbx2 = _tp_para_rect_from_items(items, base_w, base_h, derived)
                if pbx2:
                    pbx = pbx2
                    para_deg = float(pbx.get("deg") or 0.0)

        pbx_items = _tp_para_rect_from_items(items, base_w, base_h, para_deg)
        if pbx_items:
            pts = _tp_rect_corners(
                pbx["l"], pbx["t"], pbx["r"], pbx["b"], para_deg)
            pts += _tp_rect_corners(pbx_items["l"], pbx_items["t"],
                                    pbx_items["r"], pbx_items["b"], para_deg)
            merged = _tp_oriented_rect_from_points(pts, para_deg)
            if merged:
                pbx = merged

        eps = float(_TP_HTML_EPS_PX or 0.0)
        if eps > 0.0:
            pbx = {
                "l": float(pbx["l"] - eps),
                "t": float(pbx["t"] - eps),
                "r": float(pbx["r"] + eps),
                "b": float(pbx["b"] + eps),
                "deg": float(pbx.get("deg") or para_deg or 0.0),
            }

        pw = max(0.0, pbx["r"] - pbx["l"])
        ph = max(0.0, pbx["b"] - pbx["t"])

        para_style = (
            f'left: {pbx["l"]:.6f}px; '
            f'top: {pbx["t"]:.6f}px; '
            f'width: {pw:.6f}px; '
            f'height: {ph:.6f}px;'
        )
        if abs(para_deg) > 0.01:
            para_style += f' transform: rotate({para_deg:.6g}deg); transform-origin: center center;'

        parts.append(
            f'<div class="tp-para tp-para-{para_idx}" data-para-index="{para_idx}" style="{para_style}">'
        )

        para_cx = (pbx["l"] + pbx["r"]) / 2.0
        para_cy = (pbx["t"] + pbx["b"]) / 2.0
        inv_c = inv_s = None
        if abs(para_deg) > 0.01:
            rad_inv = math.radians(-para_deg)
            inv_c = math.cos(rad_inv)
            inv_s = math.sin(rad_inv)

        raw_texts = [_tp_extract_item_text(it) for it in items]
        mapped = list(raw_texts)
        p_text = p.get("text") if isinstance(p.get("text"), str) else ""
        non_empty = sum(
            1 for t in raw_texts if isinstance(t, str) and t.strip())
        any_nl = any(isinstance(t, str) and re.search(r"\r?\n", t)
                     for t in raw_texts)
        first_nl = bool(raw_texts and isinstance(
            raw_texts[0], str) and re.search(r"\r?\n", raw_texts[0]))
        lines = None
        if p_text and re.search(r"\r?\n", p_text) and (non_empty <= 1 or any_nl):
            lines = [s.rstrip()
                     for s in re.split(r"\r?\n+", p_text) if s.strip()]
        elif first_nl and (non_empty <= 1 or all(not (t or "").strip() for t in raw_texts[1:])):
            lines = [s.rstrip() for s in re.split(
                r"\r?\n+", raw_texts[0]) if s.strip()]
        if lines:
            mapped = [lines[i] if i < len(lines) else (
                raw_texts[i] if i < len(raw_texts) else "") for i in range(len(items))]

        for ii, it in enumerate(items):
            if not isinstance(it, dict):
                continue
            text = (mapped[ii] if ii < len(mapped) else "") or ""
            if not text.strip():
                continue

            ibx = _tp_get_rect(it, base_w, base_h)
            if not ibx:
                continue

            w0 = max(0.0, ibx["r"] - ibx["l"])
            h0 = max(0.0, ibx["b"] - ibx["t"])
            if w0 <= 0 or h0 <= 0:
                continue

            w = float(w0 + (2.0 * eps)) if eps > 0.0 else float(w0)
            h = float(h0 + (2.0 * eps)) if eps > 0.0 else float(h0)

            item_idx = int(_tp_num(it.get("item_index")) or ii)

            fs_raw = _tp_num(it.get("font_size_px"))

            fs = int(round(fs_raw)) if fs_raw and fs_raw > 0 else max(
                10, int(round(h0 * 0.85)))
            fs = max(6, min(fs, max(6, int(math.floor(h0 * 0.95)))))
            lh = max(1, min(int(round(h0)), int(round(fs * 1.12))))
            if inv_c is not None and inv_s is not None:
                icx = (ibx["l"] + ibx["r"]) / 2.0
                icy = (ibx["t"] + ibx["b"]) / 2.0
                dx = icx - para_cx
                dy = icy - para_cy
                rcx = para_cx + (dx * inv_c - dy * inv_s)
                rcy = para_cy + (dx * inv_s + dy * inv_c)
                left = (rcx - (w / 2.0)) - pbx["l"]
                top = (rcy - (h / 2.0)) - pbx["t"]
            else:
                left = (ibx["l"] - pbx["l"]) - eps
                top = (ibx["t"] - pbx["t"]) - eps

            style = (
                f'left: {left:.6f}px; '
                f'top: {top:.6f}px; '
                f'width: {w:.6f}px; '
                f'height: {h:.6f}px; '
                f'font-size: {fs}px; '
                f'line-height: {lh}px; '
                'padding-bottom: 0px;'
            )
            deg = float(ibx.get("deg") or 0.0)
            if inv_c is not None:
                deg = deg - para_deg
            if abs(deg) > 0.01:
                style += f' transform: rotate({deg:.6g}deg); transform-origin: center center;'

            wrap_attr = ' data-wrap="1"' if it.get("_tp_wrap") else ""
            parts.append(
                f'<div class="tp-item tp-item-{item_idx}" data-para-index="{para_idx}" data-item-index="{item_idx}"{wrap_attr} style="{style}">'
                f'<span>{_tp_escape_text(text)}</span></div>'
            )

        parts.append("</div>")
    parts.append("</div>")
    return "".join(parts)

def overlay_css(container_class="RTMDre", token_class="IwqbBf"):
    c = container_class
    t = token_class
    return (
        f".{c}{{"
        "position:absolute!important;"
        "inset:0!important;"
        "width:100%!important;"
        "height:100%!important;"
        "display:block!important;"
        "opacity:1!important;"
        "visibility:visible!important;"
        "pointer-events:none!important;"
        "overflow:visible!important;"
        "z-index:2147483647!important;"
        "transform:none!important;"
        "contain:layout style paint!important;"
        "--lens-text-color:#fff;"
        "--lens-font-family:\"Noto Sans Thai\",\"Noto Sans Thai UI\",\"Noto Sans\",system-ui,-apple-system,BlinkMacSystemFont,\"Segoe UI\",Roboto,Arial,sans-serif;"
        "--lens-text-shadow:0 1px 2px rgba(0,0,0,.85),0 0 1px rgba(0,0,0,.85);"
        "}}"
        f".{c} *{{box-sizing:border-box!important;}}"
        f".{c} .{t}{{"
        "position:absolute!important;"
        "display:flex!important;"
        "align-items:center!important;"
        "justify-content:center!important;"
        "opacity:1!important;"
        "visibility:visible!important;"
        "pointer-events:none!important;"
        "user-select:none!important;"
        "overflow:visible!important;"
        "white-space:pre!important;"
        "transform-origin:top left!important;"
        "filter:none!important;"
        "mix-blend-mode:normal!important;"
        "text-transform:none!important;"
        "letter-spacing:normal!important;"
        "}}"
        f".{c} .{t}::before{{"
        "content:attr(aria-label)!important;"
        "display:block!important;"
        "white-space:pre!important;"
        "color:var(--lens-text-color)!important;"
        "font-family:var(--lens-font-family)!important;"
        "text-shadow:var(--lens-text-shadow)!important;"
        "font-weight:400!important;"
        "font-style:normal!important;"
        "line-height:inherit!important;"
        "text-rendering:geometricPrecision!important;"
        "}}"
    )

def ensure_font(path, urls):
    key = str(path or "")
    cached = _FONT_RESOLVE_CACHE.get(key)
    if cached is not None:
        return cached or None

    if path and os.path.isfile(path):
        _FONT_RESOLVE_CACHE[key] = path
        return path

    candidates = []
    for root in ("/usr/share/fonts", "/usr/local/share/fonts", os.path.expanduser("~/.fonts")):
        if os.path.isdir(root):
            for p in os.walk(root):
                for fn in p[2]:
                    if fn.lower() == os.path.basename(path).lower():
                        candidates.append(os.path.join(p[0], fn))
    if candidates:
        _FONT_RESOLVE_CACHE[key] = candidates[0]
        return candidates[0]

    for url in urls:
        try:
            r = httpx.get(url, timeout=30)
            if r.status_code == 200 and len(r.content) > 10000:
                with open(path, "wb") as f:
                    f.write(r.content)
                if os.path.isfile(path):
                    _FONT_RESOLVE_CACHE[key] = path
                    return path
        except Exception:
            pass
    _FONT_RESOLVE_CACHE[key] = ""
    return None

def pick_font(text, thai_path, latin_path, size):
    def has_thai(s):
        for ch in s:
            o = ord(ch)
            if 0x0E00 <= o <= 0x0E7F:
                return True
        return False

    fp = thai_path if has_thai(text) else latin_path
    if fp and os.path.isfile(fp):
        try:
            return ImageFont.truetype(fp, size=size, layout_engine=getattr(ImageFont, "LAYOUT_RAQM", 0))
        except Exception:
            try:
                return ImageFont.truetype(fp, size=size)
            except Exception:
                pass
    return ImageFont.load_default()

def _get_font_pair(thai_path, latin_path, size):
    key = (str(thai_path or ""), str(latin_path or ""), int(size))
    v = _FONT_PAIR_CACHE.get(key)
    if v:
        return v
    f_th = pick_font("ก", thai_path, latin_path, size)
    f_lat = pick_font("A", thai_path, latin_path, size)
    _FONT_PAIR_CACHE[key] = (f_th, f_lat)
    return f_th, f_lat

def _is_thai_char(ch: str) -> bool:
    if not ch:
        return False
    o = ord(ch)
    return 0x0E00 <= o <= 0x0E7F

def _split_runs_for_fallback(text: str):
    runs = []
    cur = []
    cur_is_th = None
    for ch in text:
        if ch == "\n":
            if cur:
                runs.append(("".join(cur), cur_is_th))
                cur = []
            runs.append(("\n", None))
            cur_is_th = None
            continue
        is_th = _is_thai_char(ch)
        if ch.isspace() and cur_is_th is not None:
            is_th = cur_is_th
        if cur_is_th is None:
            cur_is_th = is_th
            cur = [ch]
            continue
        if is_th == cur_is_th:
            cur.append(ch)
        else:
            runs.append(("".join(cur), cur_is_th))
            cur = [ch]
            cur_is_th = is_th
    if cur:
        runs.append(("".join(cur), cur_is_th))
    return runs

def _draw_text_centered_fallback(draw_ctx, center_xy, text, thai_path, latin_path, size, fill):
    t = _sanitize_draw_text(text)
    if not t:
        return
    f_th, f_lat = _get_font_pair(thai_path, latin_path, size)
    runs = _split_runs_for_fallback(t)

    x = 0.0
    min_t = 0.0
    max_b = 0.0
    for run, is_th in runs:
        if run == "\n":
            continue
        f = f_th if is_th else f_lat
        try:
            bb = draw_ctx.textbbox((x, 0), run, font=f, anchor="ls")
            min_t = min(min_t, float(bb[1]))
            max_b = max(max_b, float(bb[3]))
            x = float(bb[2])
        except Exception:
            try:
                w, h = draw_ctx.textsize(run, font=f)
            except Exception:
                w, h = (len(run) * size * 0.5, size)
            min_t = min(min_t, -float(h) * 0.8)
            max_b = max(max_b, float(h) * 0.2)
            x += float(w)

    total_w = max(1.0, x)
    total_h = max(1.0, max_b - min_t)

    cx, cy = center_xy
    start_x = float(cx) - (total_w / 2.0)
    baseline_y = float(cy) - (total_h / 2.0) - min_t

    x = start_x
    for run, is_th in runs:
        if run == "\n":
            continue
        f = f_th if is_th else f_lat
        draw_ctx.text((x, baseline_y), run, font=f, fill=fill, anchor="ls")
        try:
            x += float(draw_ctx.textlength(run, font=f))
        except Exception:
            try:
                w, _ = draw_ctx.textsize(run, font=f)
            except Exception:
                w = len(run) * size * 0.5
            x += float(w)

def _draw_text_baseline_fallback(draw, pos, text, thai_path, latin_path, size, fill):
    t = _sanitize_draw_text(text)
    if not t:
        return 0.0, 0.0
    f_th, f_lat = _get_font_pair(thai_path, latin_path, size)
    runs = _split_runs_for_fallback(t)

    x0, y0 = pos
    x = float(x0)
    max_ascent = 0
    max_descent = 0

    for run, is_th in runs:
        if run == "\n":
            continue
        f = f_th if is_th else f_lat
        try:
            ascent, descent = f.getmetrics()
        except Exception:
            ascent, descent = size, int(size * 0.25)
        max_ascent = max(max_ascent, ascent)
        max_descent = max(max_descent, descent)

        draw.text((x, y0), run, font=f, fill=fill, anchor="ls")
        try:
            adv = float(f.getlength(run))
        except Exception:
            tmp = Image.new("RGBA", (10, 10), (0, 0, 0, 0))
            dtmp = ImageDraw.Draw(tmp)
            try:
                bb = dtmp.textbbox((0, 0), run, font=f, anchor="ls")
                adv = float(bb[2] - bb[0])
            except Exception:
                w, _ = dtmp.textsize(run, font=f)
                adv = float(w)
        x += adv

    return float(x - x0), float(max_ascent + max_descent)

def _baseline_offset_px_for_text(text: str, thai_path: str, latin_path: str, size: int):
    t = _sanitize_draw_text(text)
    if not t:
        return None
    f_th, f_lat = _get_font_pair(thai_path, latin_path, size)
    runs = _split_runs_for_fallback(t)

    tmp = Image.new("RGBA", (16, 16), (0, 0, 0, 0))
    dtmp = ImageDraw.Draw(tmp)

    x = 0.0
    min_t = 0.0
    max_b = 0.0
    for run, is_th in runs:
        if run == "\n":
            continue
        f = f_th if is_th else f_lat
        try:
            bb = dtmp.textbbox((x, 0), run, font=f, anchor="ls")
            min_t = min(min_t, float(bb[1]))
            max_b = max(max_b, float(bb[3]))
            x = float(bb[2])
        except Exception:
            try:
                w, h = dtmp.textsize(run, font=f)
            except Exception:
                w, h = (len(run) * size * 0.5, size)
            min_t = min(min_t, -float(h) * 0.8)
            max_b = max(max_b, float(h) * 0.2)
            x += float(w)

    total_h = max(1.0, max_b - min_t)
    baseline_offset = -(total_h / 2.0) - min_t
    return baseline_offset, total_h

def _line_metrics_px(text: str, thai_path: str, latin_path: str, size: int):
    t = _sanitize_draw_text(text)
    if not t:
        return None
    f_th, f_lat = _get_font_pair(thai_path, latin_path, size)
    runs = _split_runs_for_fallback(t)

    tmp = Image.new("RGBA", (16, 16), (0, 0, 0, 0))
    dtmp = ImageDraw.Draw(tmp)

    x = 0.0
    min_t = 0.0
    max_b = 0.0
    for run, is_th in runs:
        if run == "\n":
            continue
        f = f_th if is_th else f_lat
        try:
            bb = dtmp.textbbox((x, 0), run, font=f, anchor="ls")
            min_t = min(min_t, float(bb[1]))
            max_b = max(max_b, float(bb[3]))
            x = float(bb[2])
        except Exception:
            try:
                w, h = dtmp.textsize(run, font=f)
            except Exception:
                w, h = (len(run) * size * 0.5, size)
            min_t = min(min_t, -float(h) * 0.8)
            max_b = max(max_b, float(h) * 0.2)
            x += float(w)

    width = max(1.0, x)
    total_h = max(1.0, max_b - min_t)
    baseline_to_center = -((min_t + max_b) / 2.0)
    return width, total_h, baseline_to_center

def _item_avail_w_px(item: dict, W: int, H: int) -> float:
    b = item.get("box") or {}
    w_box = float(b.get("width") or 0.0) * float(W)

    L = 0.0
    p1 = item.get("baseline_p1") or {}
    p2 = item.get("baseline_p2") or {}
    if ("x" in p1 and "y" in p1 and "x" in p2 and "y" in p2):
        dx = (float(p2.get("x") or 0.0) - float(p1.get("x") or 0.0)) * float(W)
        dy = (float(p2.get("y") or 0.0) - float(p1.get("y") or 0.0)) * float(H)
        L = float(math.hypot(dx, dy))

    avail = max(w_box, L)
    return max(1.0, float(avail))

def _item_avail_h_px(item: dict, H: int) -> float:
    b = item.get("box") or {}
    return max(1.0, (float(b.get("height") or 0.0) * float(H)) - 2.0)

def _item_line_text(item: dict) -> str:
    t = str(item.get("text") or "")
    if t.strip():
        return t
    spans = item.get("spans") or []
    return "".join(str(s.get("text") or "") for s in spans)

def _compute_fit_size_px_for_item(item: dict, thai_path: str, latin_path: str, W: int, H: int, base_size: int = 96) -> int | None:
    item.pop("_tp_wrap", None)
    text = _item_line_text(item)
    if not text.strip():
        return None
    m = _line_metrics_px(text, thai_path, latin_path, base_size)
    if m is None:
        return None
    tw, th, _ = m
    avail_w = _item_avail_w_px(item, W, H)
    avail_h = _item_avail_h_px(item, H)
    if tw <= 1e-6 or th <= 1e-6:
        return None

    is_thai = any(_is_thai_char(ch) for ch in text)
    scale_w = (avail_w * 0.98) / tw
    scale_h = (avail_h * (0.90 if is_thai else 0.94)) / th
    scale = min(scale_w, scale_h)
    if scale <= 0:
        return None

    size = max(10, int(base_size * scale))

    while size > 10:
        mm = _line_metrics_px(text, thai_path, latin_path, size)
        if mm is None:
            return None
        tw2, th2, _ = mm
        if (tw2 <= avail_w * 0.999) and (th2 <= avail_h * 0.999):
            break
        size -= 1

    if size <= 12 and avail_h >= 24:
        tw0, th0, _ = m
        if tw0 > (avail_w * 1.2):
            def _wrap_fits(s: int) -> bool:
                if s <= 0:
                    return False
                k = float(s) / float(base_size)
                tw = float(tw0) * k
                th = float(th0) * k
                lines = int(math.ceil(max(1.0, tw) / max(1.0, avail_w)))
                return (float(lines) * th) <= float(avail_h)

            hi = int(min(max(16, avail_h), base_size * 3))
            lo = int(size)
            best = int(size)
            while lo <= hi:
                mid = (lo + hi) // 2
                if _wrap_fits(mid):
                    best = int(mid)
                    lo = mid + 1
                else:
                    hi = mid - 1

            if best >= int(size * 1.25):
                item["_tp_wrap"] = True
                size = int(best)

    return int(size)

def fit_tree_font_sizes_for_tp_html(tree: dict, thai_path: str, latin_path: str, W: int, H: int) -> dict:
    paras = tree.get("paragraphs") or []
    for p in paras:
        items = p.get("items") or []
        if not items:
            continue

        per_item_fit: dict[int, int] = {}
        fits: list[int] = []

        for i, it in enumerate(items):
            s = _compute_fit_size_px_for_item(it, thai_path, latin_path, W, H)
            if s is None:
                continue
            per_item_fit[i] = int(s)
            fits.append(int(s))

        if not fits:
            continue

        fits.sort()
        p["para_font_size_px"] = int(fits[len(fits) // 2])

        for i, it in enumerate(items):
            fs = per_item_fit.get(i)
            if fs is None:
                continue
            it["font_size_px"] = int(fs)
            for sp in (it.get("spans") or []):
                sp["font_size_px"] = int(fs)

    return tree

def _iter_paragraphs(tree: dict):
    ps = (tree or {}).get("paragraphs") or []
    for i, p in enumerate(ps):
        yield i, p

def _apply_para_font_size(tree: dict, para_sizes: dict[int, int]):
    if not tree:
        return
    for pi, p in _iter_paragraphs(tree):
        sz = para_sizes.get(pi)
        if not sz:
            continue
        p["para_font_size_px"] = int(sz)
        for it in (p.get("items") or []):
            it["font_size_px"] = int(sz)
            for sp in (it.get("spans") or []):
                sp["font_size_px"] = int(sz)

def _compute_shared_para_sizes(trees: list[dict], thai_path: str, latin_path: str, W: int, H: int) -> dict[int, int]:
    sizes: dict[int, int] = {}
    for tree in trees:
        if not tree:
            continue
        for pi, p in _iter_paragraphs(tree):
            for it in (p.get("items") or []):
                fit = _compute_fit_size_px_for_item(
                    it, thai_path, latin_path, W, H)
                if fit is None:
                    continue
                cur = sizes.get(pi)
                sizes[pi] = fit if cur is None else min(cur, fit)

    vals = [v for v in sizes.values() if isinstance(v, int) and v > 0]
    if not vals:
        return sizes
    vals.sort()
    mid = len(vals) // 2
    target = vals[mid] if (len(vals) % 2 == 1) else int(
        round((vals[mid - 1] + vals[mid]) / 2))
    for k in list(sizes.keys()):
        try:
            sizes[k] = int(min(int(sizes[k]), int(target)))
        except Exception:
            pass
    return sizes

def _sanitize_draw_text(s: str) -> str:
    t = (s or "").replace("\r\n", "\n").replace("\r", "\n")
    t = t.replace("\u200b", "").replace("\ufeff", "")
    t = "".join(ch for ch in t if (ch == "\n") or (
        unicodedata.category(ch)[0] != "C"))
    return t

def _token_box_px(t, W, H, pad_px=0):
    b = t.get("box") or {}
    left = int(round(float(b.get("left", 0.0)) * W)) - pad_px
    top = int(round(float(b.get("top", 0.0)) * H)) - pad_px
    right = int(round((float(b.get("left", 0.0)) +
                float(b.get("width", 0.0))) * W)) + pad_px
    bottom = int(
        round((float(b.get("top", 0.0)) + float(b.get("height", 0.0))) * H)) + pad_px
    left = max(0, min(W, left))
    top = max(0, min(H, top))
    right = max(0, min(W, right))
    bottom = max(0, min(H, bottom))
    if right <= left or bottom <= top:
        return None
    return left, top, right, bottom

def _token_quad_px(t, W, H, pad_px=0, apply_baseline_shift=True):
    if not t.get("valid_text"):
        return None

    p1 = t.get("baseline_p1") or {}
    p2 = t.get("baseline_p2") or {}
    x1 = float(p1.get("x", 0.0)) * W
    y1 = float(p1.get("y", 0.0)) * H
    x2 = float(p2.get("x", 0.0)) * W
    y2 = float(p2.get("y", 0.0)) * H

    dx = x2 - x1
    dy = y2 - y1
    if dx < 0 or (abs(dx) < 1e-12 and dy < 0):
        x1, y1, x2, y2 = x2, y2, x1, y1
        dx = x2 - x1
        dy = y2 - y1

    L = math.hypot(dx, dy)
    if L <= 1e-9:
        return None

    ux = dx / L
    uy = dy / L

    nx = -uy
    ny = ux
    if ny < 0:
        nx, ny = -nx, -ny

    t0 = float(t.get("t0_raw") if t.get("t0_raw") is not None else 0.0)
    t1 = float(t.get("t1_raw") if t.get("t1_raw") is not None else 1.0)

    sx = x1 + ux * (t0 * L)
    sy = y1 + uy * (t0 * L)
    ex = x1 + ux * (t1 * L)
    ey = y1 + uy * (t1 * L)

    h = max(1.0, float(t.get("height_raw") or 0.0) * H)
    if apply_baseline_shift and BASELINE_SHIFT:
        shift = h * BASELINE_SHIFT_FACTOR
        sx += nx * shift
        sy += ny * shift
        ex += nx * shift
        ey += ny * shift

    pad = max(0.0, float(pad_px))
    sx -= ux * pad
    sy -= uy * pad
    ex += ux * pad
    ey += uy * pad

    hh = (h / 2.0) + pad
    ox = nx * hh
    oy = ny * hh

    return [(sx - ox, sy - oy), (ex - ox, ey - oy), (ex + ox, ey + oy), (sx + ox, sy + oy)]

def _token_box_quad_px(t, W, H, pad_px=0):
    b = t.get("box") or {}
    w = float(b.get("width", 0.0)) * W
    h = float(b.get("height", 0.0)) * H
    if w <= 0.0 or h <= 0.0:
        return None

    left = float(b.get("left", 0.0)) * W
    top = float(b.get("top", 0.0)) * H
    cx = left + (w / 2.0)
    cy = top + (h / 2.0)

    hw = (w / 2.0) + float(pad_px)
    hh = (h / 2.0) + float(pad_px)

    angle_deg = float(b.get("rotation_deg", 0.0))
    rad = math.radians(angle_deg)
    c = math.cos(rad)
    s = math.sin(rad)

    corners = [(-hw, -hh), (hw, -hh), (hw, hh), (-hw, hh)]
    out = []
    for x, y in corners:
        rx = (x * c) - (y * s)
        ry = (x * s) + (y * c)
        out.append((cx + rx, cy + ry))
    return out

def _quad_bbox(quad, W, H):
    xs = [p[0] for p in quad]
    ys = [p[1] for p in quad]
    l = max(0, min(W, int(math.floor(min(xs)))))
    t = max(0, min(H, int(math.floor(min(ys)))))
    r = max(0, min(W, int(math.ceil(max(xs)))))
    b = max(0, min(H, int(math.ceil(max(ys)))))
    if r <= l or b <= t:
        return None
    return l, t, r, b

def _median_rgba(pixels):
    if not pixels:
        return None
    rs = sorted(p[0] for p in pixels)
    gs = sorted(p[1] for p in pixels)
    bs = sorted(p[2] for p in pixels)
    a = 255
    mid = len(rs) // 2
    return (rs[mid], gs[mid], bs[mid], a)

def _rel_luminance(rgb):
    r, g, b = rgb

    def lin(c):
        c = c / 255.0
        return c / 12.92 if c <= 0.04045 else ((c + 0.055) / 1.055) ** 2.4
    return 0.2126 * lin(r) + 0.7152 * lin(g) + 0.0722 * lin(b)

def _contrast_ratio(l1, l2):
    a = max(l1, l2) + 0.05
    b = min(l1, l2) + 0.05
    return a / b

def _pick_bw_text_color(bg_rgb):
    Lb = _rel_luminance(bg_rgb)
    c_black = _contrast_ratio(Lb, 0.0)
    c_white = _contrast_ratio(Lb, 1.0)
    return TEXT_COLOR_LIGHT if c_white >= c_black else TEXT_COLOR_DARK

def _sample_bg_color_from_quad(base_rgb, quad, rect, border_px=3, margin_px=6):
    l, t, r, b = rect
    w = r - l
    h = b - t
    if w <= 0 or h <= 0:
        return _sample_bg_color(base_rgb, rect, margin_px)
    mask = Image.new("L", (w, h), 0)
    d = ImageDraw.Draw(mask)
    qrel = [(x - l, y - t) for x, y in quad]
    d.polygon(qrel, fill=255)
    bp = int(max(0, border_px or 0))
    if bp > 0:
        k = min(w, h)
        bp = min(bp, max(1, (k - 1) // 2))
    if bp > 0:
        er = mask.filter(ImageFilter.MinFilter(size=bp * 2 + 1))
        border = ImageChops.subtract(mask, er)
    else:
        border = mask
    region = base_rgb.crop((l, t, r, b))
    rp = list(region.getdata())
    mp = list(border.getdata())
    samples = [p for p, m in zip(rp, mp) if m > 0]
    if len(samples) < 24:
        ext = _sample_bg_color(base_rgb, rect, margin_px)
        return ext
    med = _median_rgba(samples)
    if med:
        return med[:3]
    return _sample_bg_color(base_rgb, rect, margin_px)

def _sample_bg_color(base_rgb, rect, margin_px):
    W, H = base_rgb.size
    l, t, r, b = rect
    m = max(1, int(margin_px))
    samples = []

    def add_strip(x0, y0, x1, y1):
        x0 = max(0, min(W, x0))
        y0 = max(0, min(H, y0))
        x1 = max(0, min(W, x1))
        y1 = max(0, min(H, y1))
        if x1 <= x0 or y1 <= y0:
            return
        samples.extend(list(base_rgb.crop((x0, y0, x1, y1)).getdata()))
    add_strip(l, t - m, r, t)
    add_strip(l, b, r, b + m)
    add_strip(l - m, t, l, b)
    add_strip(r, t, r + m, b)
    med = _median_rgba(samples)
    if med:
        return med[:3]
    return base_rgb.getpixel((max(0, min(W - 1, l)), max(0, min(H - 1, t))))

def _sample_bg_color_from_quad_ring(base_rgb, quad, rect, ring_px=4):
    W, H = base_rgb.size
    l, t, r, b = rect
    w = r - l
    h = b - t
    if w <= 0 or h <= 0:
        return None

    mask = np.zeros((h, w), dtype=np.uint8)
    pts = np.array([[(x - l, y - t) for x, y in quad]], dtype=np.int32)
    cv2.fillPoly(mask, pts, 255)

    rp = int(max(1, ring_px or 1))
    k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (rp * 2 + 1, rp * 2 + 1))
    dil = cv2.dilate(mask, k, iterations=1)
    ring = cv2.bitwise_and(dil, cv2.bitwise_not(mask))

    rgb = np.array(base_rgb.crop((l, t, r, b)).convert("RGB"), dtype=np.uint8)
    sel = rgb[ring > 0]
    if sel.size < 24:
        return None
    med = np.median(sel, axis=0)
    return (int(med[0]), int(med[1]), int(med[2]))

def _pixelate(img, block_px):
    w, h = img.size
    if w <= 1 or h <= 1:
        return img
    block_px = int(block_px or 1)
    if block_px < 1:
        block_px = 1
    sw = max(1, w // block_px)
    sh = max(1, h // block_px)
    return img.resize((sw, sh), resample=Image.NEAREST).resize((w, h), resample=Image.NEAREST)

def _mean_abs_diff(a, b):
    if a.size != b.size:
        return 1e18
    a = a.convert("RGB")
    b = b.convert("RGB")
    da = list(a.getdata())
    db = list(b.getdata())
    if not da:
        return 1e18
    s = 0
    for (ar, ag, ab), (br, bg, bb) in zip(da, db):
        s += abs(ar - br) + abs(ag - bg) + abs(ab - bb)
    return s / (len(da) * 3)

def _resize_small(img, max_w=64, max_h=64):
    w, h = img.size
    if w <= 0 or h <= 0:
        return img
    scale = min(max_w / w, max_h / h, 1.0)
    nw = max(1, int(w * scale))
    nh = max(1, int(h * scale))
    return img.resize((nw, nh), resample=Image.BILINEAR)

def _clone_candidate_score(base, rect, cand_rect, direction, border_px):
    W, H = base.size
    l, t, r, b = rect
    cl, ct, cr, cb = cand_rect
    w = r - l
    h = b - t
    if w <= 1 or h <= 1:
        return 1e18
    border_px = max(1, int(border_px or 1))
    if direction == "up":
        a = base.crop((l, max(0, t - border_px), r, t))
        d = base.crop((cl, max(0, cb - border_px), cr, cb))
    elif direction == "down":
        a = base.crop((l, b, r, min(H, b + border_px)))
        d = base.crop((cl, ct, cr, min(H, ct + border_px)))
    elif direction == "left":
        a = base.crop((max(0, l - border_px), t, l, b))
        d = base.crop((max(0, cr - border_px), ct, cr, cb))
    else:
        a = base.crop((r, t, min(W, r + border_px), b))
        d = base.crop((cl, ct, min(W, cl + border_px), cb))
    a = _resize_small(a, 64, 16)
    d = _resize_small(d, 64, 16)
    return _mean_abs_diff(a, d)

def _choose_clone_rect(base, rect, gap_px, border_px):
    W, H = base.size
    l, t, r, b = rect
    w = r - l
    h = b - t
    gap_px = max(0, int(gap_px or 0))
    cands = []
    up = (l, t - gap_px - h, r, t - gap_px)
    down = (l, b + gap_px, r, b + gap_px + h)
    left = (l - gap_px - w, t, l - gap_px, b)
    right = (r + gap_px, t, r + gap_px + w, b)
    for direction, (cl, ct, cr, cb) in [("up", up), ("down", down), ("left", left), ("right", right)]:
        if cl < 0 or ct < 0 or cr > W or cb > H:
            continue
        cand_rect = (cl, ct, cr, cb)
        score = _clone_candidate_score(
            base, rect, cand_rect, direction, border_px)
        cands.append((score, cand_rect))
    if not cands:
        return None
    cands.sort(key=lambda x: x[0])
    return cands[0][1]

def _erase_with_clone(base, rect, mask, gap_px, border_px, feather_px):
    l, t, r, b = rect
    cand = _choose_clone_rect(base, rect, gap_px, border_px)
    if not cand:
        return False
    cl, ct, cr, cb = cand
    donor = base.crop((cl, ct, cr, cb))
    region = base.crop((l, t, r, b))
    feather_px = max(0, int(feather_px or 0))
    if feather_px > 0:
        m = mask.filter(ImageFilter.GaussianBlur(radius=feather_px))
    else:
        m = mask
    merged = Image.composite(donor, region, m)
    base.paste(merged, (l, t))
    return True

def _erase_with_blend_patches(base, rect, mask, gap_px=3, feather_px=4):
    l, t, r, b = rect
    W, H = base.size
    w = r - l
    h = b - t
    if w <= 2 or h <= 2:
        return False
    gap = int(max(0, gap_px))
    candidates = []
    dirs = [(0, -(h + gap)), (0, (h + gap)), (-(w + gap), 0), ((w + gap), 0),
            (-(w + gap), -(h + gap)), ((w + gap), -(h + gap)), (-(w + gap), (h + gap)), ((w + gap), (h + gap))]
    for dx, dy in dirs:
        ll = l + dx
        tt = t + dy
        rr = ll + w
        bb = tt + h
        if ll < 0 or tt < 0 or rr > W or bb > H:
            continue
        candidates.append(base.crop((ll, tt, rr, bb)).convert("RGB"))
    if not candidates:
        return False
    acc = candidates[0]
    for c in candidates[1:]:
        acc = ImageChops.add(acc, c, scale=1.0, offset=0)
    n = len(candidates)
    blended = acc.point(lambda p: int(p / n))
    m = mask
    fp = int(max(0, feather_px))
    if fp > 0:
        m = m.filter(ImageFilter.GaussianBlur(radius=fp))
    region = base.crop((l, t, r, b)).convert("RGB")
    merged = Image.composite(blended, region, m)
    base.paste(merged, (l, t))
    return True

def _erase_with_inpaint(base, box_tokens, pad_px=2):
    if not box_tokens:
        return base

    rgb = base.convert("RGB")
    W, H = rgb.size
    mask = Image.new("L", (W, H), 0)
    d = ImageDraw.Draw(mask)
    for t in box_tokens:
        quad = _token_box_quad_px(t, W, H, pad_px=pad_px)
        if not quad:
            quad = _token_quad_px(t, W, H, pad_px=pad_px,
                                  apply_baseline_shift=True)
        if not quad:
            rect = _token_box_px(t, W, H, pad_px=pad_px)
            if not rect:
                continue
            l, tt, r, bb = rect
            quad = [(l, tt), (r, tt), (r, bb), (l, bb)]
        d.polygon(quad, fill=255)

    m = np.array(mask, dtype=np.uint8)
    ys, xs = np.where(m > 0)
    if xs.size == 0 or ys.size == 0:
        return rgb

    l = int(max(0, xs.min() - 8))
    t = int(max(0, ys.min() - 8))
    r = int(min(W, xs.max() + 1 + 8))
    b = int(min(H, ys.max() + 1 + 8))
    if r <= l or b <= t:
        return rgb

    crop_rgb = np.array(rgb.crop((l, t, r, b)), dtype=np.uint8)
    crop_m = m[t:b, l:r]
    dpx = int(max(0, INPAINT_DILATE_PX or 0))
    if dpx > 0:
        k = cv2.getStructuringElement(
            cv2.MORPH_ELLIPSE, (dpx * 2 + 1, dpx * 2 + 1))
        crop_m = cv2.dilate(crop_m, k, iterations=1)

    bgr = cv2.cvtColor(crop_rgb, cv2.COLOR_RGB2BGR)
    method = (INPAINT_METHOD or "telea").strip().lower()
    flag = cv2.INPAINT_TELEA if method in ("telea", "t") else cv2.INPAINT_NS
    radius = float(INPAINT_RADIUS or 3)
    out_bgr = cv2.inpaint(bgr, crop_m, radius, flag)
    out_rgb = cv2.cvtColor(out_bgr, cv2.COLOR_BGR2RGB)

    out = rgb.copy()
    out.paste(Image.fromarray(out_rgb), (l, t))
    return out

def erase_text_with_boxes(img, box_tokens, pad_px=2, sample_margin_px=6, mode=None, mosaic_block_px=None):
    if not box_tokens:
        return img
    mode = (mode or ERASE_MODE or "solid").strip().lower()
    mosaic_block_px = int(mosaic_block_px or ERASE_MOSAIC_BLOCK_PX or 10)
    base = img.convert("RGB").copy()
    if mode in ("inpaint", "cv2", "opencv"):
        return _erase_with_inpaint(base, box_tokens, pad_px=pad_px)
    W, H = base.size
    for t in box_tokens:
        quad = _token_box_quad_px(t, W, H, pad_px=pad_px)
        if not quad:
            quad = _token_quad_px(t, W, H, pad_px=pad_px,
                                  apply_baseline_shift=True)
        if not quad:
            rect = _token_box_px(t, W, H, pad_px=pad_px)
            if not rect:
                continue
            l, tt, r, bb = rect
            quad = [(l, tt), (r, tt), (r, bb), (l, bb)]

        rect = _quad_bbox(quad, W, H)
        if not rect:
            continue

        l, tt, r, bb = rect
        region = base.crop((l, tt, r, bb))
        mask = Image.new("L", (r - l, bb - tt), 0)
        mdraw = ImageDraw.Draw(mask)
        qrel = [(x - l, y - tt) for x, y in quad]
        mdraw.polygon(qrel, fill=255)

        if mode in ("blend_patch", "blend", "avg_patch", "patch"):
            ok = _erase_with_blend_patches(
                base, rect, mask, ERASE_BLEND_GAP_PX, ERASE_BLEND_FEATHER_PX)
            if ok:
                continue
            mode = "solid"

        if mode == "clone":
            ok = _erase_with_clone(
                base, rect, mask, ERASE_CLONE_GAP_PX, ERASE_CLONE_BORDER_PX, ERASE_CLONE_FEATHER_PX)
            if ok:
                continue
            mode = "solid"

        if mode == "mosaic":
            pixelated = _pixelate(region, mosaic_block_px)
            merged = Image.composite(pixelated, region, mask)
            base.paste(merged, (l, tt))
        else:
            color = _sample_bg_color_from_quad(
                base, quad, rect, BG_SAMPLE_BORDER_PX, sample_margin_px)
            region.paste(color, mask=mask)
            base.paste(region, (l, tt))
    return base

def draw_overlay(img, tokens, out_path, thai_path, latin_path, level_outlines=None, font_scale: float = 1.0, fit_to_box: bool = True):
    base = img.convert("RGBA")
    base_rgb = img.convert("RGB")
    overlay = Image.new("RGBA", base.size, (0, 0, 0, 0))
    draw = ImageDraw.Draw(overlay)

    for ol in (level_outlines or []):
        q = ol.get("quad")
        if not q:
            continue
        col = ol.get("color", BOX_OUTLINE)
        w = int(ol.get("width", 2))
        draw.line(q + [q[0]], fill=col, width=w)

    W, H = base.size

    for t in tokens:
        b = t.get("box") or {}
        box_quad = _token_box_quad_px(t, W, H, pad_px=0)
        use_box_center = False
        if box_quad:
            lq, tq, rq, bq = _quad_bbox(box_quad, W, H)
            box_cx = (lq + rq) / 2.0
            box_cy = (tq + bq) / 2.0
            box_w = max(1.0, float(rq - lq))
            box_h = max(1.0, float(bq - tq))
            use_box_center = True
        else:
            left0 = float(b.get("left", 0.0)) * W
            top0 = float(b.get("top", 0.0)) * H
            box_w = max(1.0, float(b.get("width", 0.0)) * W)
            box_h = max(1.0, float(b.get("height", 0.0)) * H)
            box_cx = left0 + (box_w / 2.0)
            box_cy = top0 + (box_h / 2.0)
        if DRAW_OUTLINE_SPAN and DRAW_BOX_OUTLINE:
            quad = _token_box_quad_px(t, W, H, pad_px=0)
            if quad:
                draw.line(quad + [quad[0]], fill=SPAN_OUTLINE,
                          width=SPAN_OUTLINE_WIDTH)
            else:
                left = b["left"] * W
                top = b["top"] * H
                width = b["width"] * W
                height = b["height"] * H
                draw.rectangle([left, top, left + width, top + height],
                               outline=SPAN_OUTLINE, width=SPAN_OUTLINE_WIDTH)

        text = _sanitize_draw_text(t.get("text") or "")
        if text.strip() == "":
            continue

        p1 = t["baseline_p1"]
        p2 = t["baseline_p2"]
        x1 = float(p1["x"]) * W
        y1 = float(p1["y"]) * H
        x2 = float(p2["x"]) * W
        y2 = float(p2["y"]) * H

        dx = x2 - x1
        dy = y2 - y1
        if dx < 0 or (abs(dx) < 1e-12 and dy < 0):
            x1, y1, x2, y2 = x2, y2, x1, y1
            dx = x2 - x1
            dy = y2 - y1

        L = math.hypot(dx, dy)
        if L <= 1e-9:
            continue

        ux = dx / L
        uy = dy / L

        t0 = float(t.get("t0_raw") if t.get("t0_raw") is not None else 0.0)
        t1 = float(t.get("t1_raw") if t.get("t1_raw") is not None else 1.0)

        sx = x1 + ux * (t0 * L)
        sy = y1 + uy * (t0 * L)
        ex = x1 + ux * (t1 * L)
        ey = y1 + uy * (t1 * L)

        avail_w = box_w
        avail_h = box_h

        if BASELINE_SHIFT and (not use_box_center):
            nx, ny = -uy, ux
            shift = avail_h * BASELINE_SHIFT_FACTOR
            sx += nx * shift
            sy += ny * shift

        angle_deg = float(b.get("rotation_deg", 0.0))

        forced_size = t.get("font_size_px")
        if forced_size is not None:
            final_size = int(
                max(10, round(float(forced_size) * float(font_scale))))
            font = pick_font(text, thai_path, latin_path, final_size)

            if fit_to_box:
                tmpc = Image.new("RGBA", (10, 10), (0, 0, 0, 0))
                dc = ImageDraw.Draw(tmpc)
                try:
                    bbc = dc.textbbox((0, 0), text, font=font, anchor="ls")
                    twc = float(bbc[2] - bbc[0])
                    thc = float(bbc[3] - bbc[1])
                except Exception:
                    twc, thc = dc.textsize(text, font=font)
                    twc = float(twc)
                    thc = float(thc)

                if twc > 0 and thc > 0 and (twc > avail_w or thc > avail_h):
                    s = min(avail_w / twc, avail_h / thc)
                    if s < 1.0:
                        final_size = max(10, int(final_size * s))
                        font = pick_font(
                            text, thai_path, latin_path, final_size)
        else:
            base_size = 96
            font0 = pick_font(text, thai_path, latin_path, base_size)

            tmp = Image.new("RGBA", (10, 10), (0, 0, 0, 0))
            dtmp = ImageDraw.Draw(tmp)
            try:
                bb = dtmp.textbbox((0, 0), text, font=font0, anchor="ls")
                tw = bb[2] - bb[0]
                th = bb[3] - bb[1]
            except Exception:
                tw, th = dtmp.textsize(text, font=font0)

            if tw <= 0 or th <= 0:
                continue

            scale = min(avail_w / tw, avail_h / th)
            final_size = max(10, int(base_size * scale))
            if not fit_to_box:
                final_size = max(10, int(final_size * float(font_scale)))
            font = pick_font(text, thai_path, latin_path, final_size)

        tmp2 = Image.new("RGBA", (10, 10), (0, 0, 0, 0))
        d2 = ImageDraw.Draw(tmp2)
        try:
            bb2 = d2.textbbox((0, 0), text, font=font, anchor="ls")
            tw2 = bb2[2] - bb2[0]
            th2 = bb2[3] - bb2[1]
        except Exception:
            tw2, th2 = d2.textsize(text, font=font)

        side = int(max(tw2, th2, avail_h, avail_w) * 2.2 + 40)
        side = min(side, int(max(W, H) * 4))
        if side < 128:
            side = 128

        canvas = Image.new("RGBA", (side, side), (0, 0, 0, 0))
        dc = ImageDraw.Draw(canvas)

        fill = TEXT_COLOR
        if AUTO_TEXT_COLOR:
            q = _token_box_quad_px(t, W, H, pad_px=0)
            if q:
                rr = _quad_bbox(q, W, H)
                if rr:
                    bg = _sample_bg_color_from_quad_ring(
                        base_rgb, q, rr, ring_px=max(2, BG_SAMPLE_BORDER_PX))
                    if bg is None:
                        bg = _sample_bg_color_from_quad(
                            base_rgb, q, rr, BG_SAMPLE_BORDER_PX, ERASE_SAMPLE_MARGIN_PX)
                    fill = _pick_bw_text_color(bg)
            else:
                rr = _token_box_px(t, W, H, pad_px=0)
                if rr:
                    bg = _sample_bg_color(base_rgb, rr, ERASE_SAMPLE_MARGIN_PX)
                    fill = _pick_bw_text_color(bg)

        origin = (side // 2, side // 2)

        p1 = t.get("baseline_p1") or {}
        p2 = t.get("baseline_p2") or {}
        has_baseline = ("x" in p1 and "y" in p1 and "x" in p2 and "y" in p2)

        if has_baseline:
            x1 = float(p1.get("x") or 0.0) * float(W)
            y1 = float(p1.get("y") or 0.0) * float(H)
            x2 = float(p2.get("x") or 0.0) * float(W)
            y2 = float(p2.get("y") or 0.0) * float(H)
            dx = x2 - x1
            dy = y2 - y1
            Lb = float(math.hypot(dx, dy))
            if Lb <= 1e-6:
                Lb = 1.0
            ux = dx / Lb
            uy = dy / Lb
            nx = -uy
            ny = ux

            bb = t.get("box") or {}
            cx = (float(bb.get("left") or 0.0) +
                  float(bb.get("width") or 0.0) / 2.0) * float(W)
            cy = (float(bb.get("top") or 0.0) +
                  float(bb.get("height") or 0.0) / 2.0) * float(H)

            tt = _sanitize_draw_text(text)
            if not tt:
                continue
            font_m = pick_font(tt, thai_path, latin_path, final_size)
            try:
                tw = float(font_m.getlength(tt))
            except Exception:
                tmp = Image.new("RGBA", (10, 10), (0, 0, 0, 0))
                dtmp = ImageDraw.Draw(tmp)
                try:
                    bbm = dtmp.textbbox((0, 0), tt, font=font_m, anchor="ls")
                    tw = float(bbm[2] - bbm[0])
                except Exception:
                    tw, _ = dtmp.textsize(tt, font=font_m)
                    tw = float(tw)

            f_th, f_lat = _get_font_pair(thai_path, latin_path, final_size)
            try:
                a_th, d_th = f_th.getmetrics()
            except Exception:
                a_th, d_th = final_size, int(final_size * 0.25)
            try:
                a_lat, d_lat = f_lat.getmetrics()
            except Exception:
                a_lat, d_lat = final_size, int(final_size * 0.25)
            ascent = float(max(a_th, a_lat))
            descent = float(max(d_th, d_lat))
            center_y_rel = (-ascent + descent) / 2.0

            bx = cx - ux * (tw / 2.0) - nx * center_y_rel
            by = cy - uy * (tw / 2.0) - ny * center_y_rel

            angle_deg = float(math.degrees(math.atan2(dy, dx)))

            _draw_text_baseline_fallback(
                dc, origin, text, thai_path, latin_path, final_size, fill)
            rotated = canvas.rotate(-angle_deg, resample=Image.BICUBIC,
                                    expand=False, center=origin)
            paste_x = int(round(bx - origin[0]))
            paste_y = int(round(by - origin[1]))
            overlay.alpha_composite(rotated, dest=(paste_x, paste_y))
        else:
            _draw_text_centered_fallback(
                dc, origin, text, thai_path, latin_path, final_size, fill)
            rotated = canvas.rotate(-angle_deg, resample=Image.BICUBIC,
                                    expand=False, center=origin)
            paste_x = int(round(box_cx - origin[0]))
            paste_y = int(round(box_cy - origin[1]))
            overlay.alpha_composite(rotated, dest=(paste_x, paste_y))

    out = Image.alpha_composite(base, overlay).convert("RGB")
    out.save(out_path)

def get_lens_data_from_image(image_path, firebase_url, lang):
    ck = _get_firebase_cookie(firebase_url)

    with open(image_path, "rb") as f:
        img_bytes = f.read()

    hdr = {"User-Agent": "Mozilla/5.0", "Referer": "https://lens.google.com/"}
    with httpx.Client(cookies=ck, headers=hdr, follow_redirects=False, timeout=60) as c:
        r = c.post(
            "https://lens.google.com/v3/upload",
            files={"encoded_image": ("file.jpg", img_bytes, "image/jpeg")},
        )
        if r.status_code not in (302, 303):
            raise Exception(f"Upload failed: {r.status_code}\n{r.text}")
        redirect = r.headers["location"]

    u = to_translated(redirect, lang=lang)
    with httpx.Client(cookies=ck, headers=hdr, timeout=60) as c:
        j = c.get(u).text

    data = json.loads(j[5:] if j.startswith(")]}'") else j)
    return data

def _get_firebase_cookie(firebase_url: str):
    u = (firebase_url or '').strip()
    now = time.time()
    cache = _FIREBASE_COOKIE_CACHE
    if cache.get('data') and cache.get('url') == u and (now - float(cache.get('ts') or 0)) < float(FIREBASE_COOKIE_TTL_SEC):
        return cache.get('data')
    r = httpx.get(u, timeout=30)
    ck = r.json()
    cache['ts'] = now
    cache['url'] = u
    cache['data'] = ck
    return ck

def warmup(lang: str = "th") -> dict:
    l = _normalize_lang(lang)
    cookie_ok = False
    try:
        _get_firebase_cookie(FIREBASE_URL)
        cookie_ok = True
    except Exception:
        pass
    thai_font = FONT_THAI_PATH
    latin_font = FONT_LATIN_PATH
    if l == "ja":
        latin_font = FONT_JA_PATH
    elif l in ("zh", "zh-hans", "zh_cn", "zh-cn", "zh_hans"):
        latin_font = FONT_ZH_SC_PATH
    elif l in ("zh-hant", "zh_tw", "zh-tw", "zh_hant"):
        latin_font = FONT_ZH_TC_PATH

    if FONT_DOWNLOD:
        thai_font = ensure_font(thai_font, FONT_THAI_URLS)
        if l == "ja":
            latin_font = ensure_font(latin_font, FONT_JA_URLS)
        elif l in ("zh", "zh-hans", "zh_cn", "zh-cn", "zh_hans"):
            latin_font = ensure_font(latin_font, FONT_ZH_SC_URLS)
        elif l in ("zh-hant", "zh_tw", "zh-tw", "zh_hant"):
            latin_font = ensure_font(latin_font, FONT_ZH_TC_URLS)
        else:
            latin_font = ensure_font(latin_font, FONT_LATIN_URLS)

    _get_font_pair(thai_font or "", latin_font or "", 22)
    _get_font_pair(thai_font or "", latin_font or "", 28)
    return {"ok": True, "lang": l, "thai_font": thai_font or "", "latin_font": latin_font or "", "cookie_ok": cookie_ok}

def main():
    data = get_lens_data_from_image(IMAGE_PATH, FIREBASE_URL, LANG)

    img = Image.open(IMAGE_PATH).convert("RGB")
    W, H = img.size

    thai_font = FONT_THAI_PATH
    latin_font = FONT_LATIN_PATH

    lang = _normalize_lang(LANG)

    if lang == "ja":
        latin_font = FONT_JA_PATH
    elif lang in ("zh", "zh-hans", "zh_cn", "zh-cn", "zh_hans"):
        latin_font = FONT_ZH_SC_PATH
    elif lang in ("zh-hant", "zh_tw", "zh-tw", "zh_hant"):
        latin_font = FONT_ZH_TC_PATH

    if FONT_DOWNLOD:
        thai_font = ensure_font(thai_font, FONT_THAI_URLS)
        if lang == "ja":
            latin_font = ensure_font(latin_font, FONT_JA_URLS)
        elif lang in ("zh", "zh-hans", "zh_cn", "zh-cn", "zh_hans"):
            latin_font = ensure_font(latin_font, FONT_ZH_SC_URLS)
        elif lang in ("zh-hant", "zh_tw", "zh-tw", "zh_hant"):
            latin_font = ensure_font(latin_font, FONT_ZH_TC_URLS)
        else:
            latin_font = ensure_font(latin_font, FONT_LATIN_URLS)

    image_url = data.get("imageUrl") if isinstance(data, dict) else None
    image_datauri = ""
    if DECODE_IMAGEURL_TO_DATAURI and image_url:
        image_datauri = decode_imageurl_to_datauri(image_url)

    out = {
        "imageUrl": image_url,
        "imageDataUri": image_datauri,
        "originalContentLanguage": data.get("originalContentLanguage"),
        "originalTextFull": data.get("originalTextFull"),
        "translatedTextFull": data.get("translatedTextFull"),
        "AiTextFull": "",
        "originalParagraphs": data.get("originalParagraphs") or [],
        "translatedParagraphs": data.get("translatedParagraphs") or [],
        "original": {},
        "translated": {},
        "Ai": {},
    }
    original_span_tokens = None
    original_tree = None
    translated_tree = None

    def _base_img_for_overlay() -> Image.Image:
        if not (ERASE_OLD_TEXT_WITH_ORIGINAL_BOXES and original_span_tokens):
            return img
        return erase_text_with_boxes(
            img,
            original_span_tokens,
            pad_px=ERASE_PADDING_PX,
            sample_margin_px=ERASE_SAMPLE_MARGIN_PX,
        )

    if DO_ORIGINAL:
        tree, _ = decode_tree(
            data.get("originalParagraphs") or [],
            data.get("originalTextFull") or "",
            "original",
            W,
            H,
            want_raw=False,
        )
        original_tree = tree
        original_span_tokens = flatten_tree_spans(tree)
        out["original"] = {"originalTree": tree}
        if DO_ORIGINAL_HTML:
            out["original"]["originalhtml"] = tokens_to_html(
                original_span_tokens)

        if DRAW_OVERLAY_ORIGINAL:
            base_img = _base_img_for_overlay()
            draw_overlay(
                base_img,
                original_span_tokens,
                OVERLAY_ORIGINAL_PATH,
                thai_font or "",
                latin_font or "",
                level_outlines=build_level_outlines(original_tree, W, H),
            )

    if DO_AI and original_tree is None:
        tree0, _ = decode_tree(
            data.get("originalParagraphs") or [],
            data.get("originalTextFull") or "",
            "original",
            W,
            H,
            want_raw=False,
        )
        original_tree = tree0

    if DO_TRANSLATED:
        tree, _ = decode_tree(
            data.get("translatedParagraphs") or [],
            data.get("translatedTextFull") or "",
            "translated",
            W,
            H,
            want_raw=False,
        )
        translated_tree = tree
        out["translated"] = {"translatedTree": tree}
        translated_span_tokens = flatten_tree_spans(tree)
        if DO_TRANSLATED_HTML:
            out["translated"]["translatedhtml"] = tokens_to_html(
                translated_span_tokens)

        if DRAW_OVERLAY_TRANSLATED:
            base_img = _base_img_for_overlay()
            draw_overlay(
                base_img,
                translated_span_tokens,
                OVERLAY_TRANSLATED_PATH,
                thai_font or "",
                latin_font or "",
                level_outlines=build_level_outlines(tree, W, H),
                font_scale=TRANSLATED_OVERLAY_FONT_SCALE,
                fit_to_box=TRANSLATED_OVERLAY_FIT_TO_BOX,
            )

    ai = None
    if DO_AI:
        src_text = out.get("originalTextFull") or ""
        if not src_text:
            src_text = data.get("originalTextFull") or ""

        tree_for_boxes = translated_tree or original_tree
        if tree_for_boxes is None:
            tree_for_boxes, _ = decode_tree(
                data.get("originalParagraphs") or [],
                data.get("originalTextFull") or "",
                "original",
                W,
                H,
                want_raw=False,
            )
            original_tree = tree_for_boxes

        ai = ai_translate_original_text(
            src_text,
            LANG,
        )

        template_tree = translated_tree
        patched = patch({"Ai": {"aiTextFull": str(ai.get(
            "aiTextFull") or ""), "aiTree": template_tree}}, W, H, thai_font, latin_font)
        ai_tree = (patched.get("Ai") or {}).get("aiTree") or {}

        ai["aiTree"] = ai_tree

        shared_para_sizes = _compute_shared_para_sizes(
            [original_tree or {}, translated_tree or {}, ai_tree or {}],
            thai_font or "",
            latin_font or "",
            W,
            H,
        )
        _apply_para_font_size(original_tree or {}, shared_para_sizes)
        _apply_para_font_size(translated_tree or {}, shared_para_sizes)
        _apply_para_font_size(ai_tree or {}, shared_para_sizes)

        _rebuild_ai_spans_after_font_resize(
            ai_tree or {}, W, H, thai_font or "", latin_font or "")

        out["AiTextFull"] = str(ai.get("aiTextFull") or "")
        out["Ai"] = {
            "aiTextFull": str(ai.get("aiTextFull") or ""),
            "aiTree": ai_tree,
        }

        if DO_AI_HTML:
            if AI_OVERLAY_FIT_TO_BOX:
                fit_tree_font_sizes_for_tp_html(
                    ai_tree or {}, thai_font or "", latin_font or "", W, H)
            out["Ai"]["aihtml"] = ai_tree_to_tp_html(ai_tree, W, H)
            out["Ai"]["aihtmlCss"] = tp_overlay_css()
            out["Ai"]["aihtmlMeta"] = {
                "baseW": int(W),
                "baseH": int(H),
                "format": "tp",
            }

        if DO_AI_OVERLAY and translated_tree is not None:
            base_img = _base_img_for_overlay()
            tokens_for_draw = flatten_tree_spans(ai_tree)
            draw_overlay(
                base_img,
                tokens_for_draw,
                AI_PATH_OVERLAY,
                thai_font or "",
                latin_font or "",
                level_outlines=build_level_outlines(ai_tree, W, H),
                font_scale=AI_OVERLAY_FONT_SCALE,
                fit_to_box=AI_OVERLAY_FIT_TO_BOX,
            )

    if HTML_INCLUDE_CSS and (DO_ORIGINAL_HTML or DO_TRANSLATED_HTML or DO_AI_HTML):
        out["htmlCss"] = overlay_css()
        out["htmlMeta"] = {
            "containerClass": "RTMDre",
            "tokenClass": "IwqbBf",
            "sourceWidth": int(W),
            "sourceHeight": int(H),
        }

    if "htmlMeta" not in out:
        out["htmlMeta"] = {
            "containerClass": "RTMDre",
            "tokenClass": "IwqbBf",
            "sourceWidth": int(W),
            "sourceHeight": int(H),
        }

    if WRITE_OUT_JSON:
        with open(OUT_JSON, "w", encoding="utf-8") as f:
            json.dump(out, f, ensure_ascii=False, indent=2)

if __name__ == "__main__":
    main()