Spaces:

plan291037
/

TextPhantom_OCR_API2

Running

App Files Files Community

TextPhantom_OCR_API2 / backend /lens_core.py

plan291037

Update backend/lens_core.py

ab84582 verified 10 days ago

raw

history blame contribute delete

133 kB

	import base64, copy, hashlib, json, math, os, re, struct, time, unicodedata, cv2, httpx, numpy as np, budoux

	from urllib.parse import parse_qs, urlencode, urlparse
	from PIL import Image, ImageChops, ImageDraw, ImageFilter, ImageFont

	IMAGE_PATH = "33.jpg"
	OUT_JSON = "output.json"
	LANG = "th"

	AI_API_KEY = os.getenv("AI_API_KEY", "").strip()

	FIREBASE_URL = "https://cookie-6e1cd-default-rtdb.asia-southeast1.firebasedatabase.app/lens/cookie.json"

	WRITE_OUT_JSON = True

	DECODE_IMAGEURL_TO_DATAURI = True

	DO_ORIGINAL = True
	DO_TRANSLATED = True
	DO_ORIGINAL_HTML = True
	DO_TRANSLATED_HTML = True
	DO_AI_HTML = True
	HTML_INCLUDE_CSS = True

	DRAW_OVERLAY_ORIGINAL = False
	DRAW_OVERLAY_TRANSLATED = False
	OVERLAY_ORIGINAL_PATH = "overlay_original.png"
	OVERLAY_TRANSLATED_PATH = "overlay_translated.png"

	TRANSLATED_OVERLAY_FONT_SCALE = 1.0
	TRANSLATED_OVERLAY_FIT_TO_BOX = True

	AI_OVERLAY_FONT_SCALE = 1.5
	AI_OVERLAY_FIT_TO_BOX = True

	DO_AI = True
	DO_AI_JSON = False
	DO_AI_OVERLAY = False
	AI_CACHE = False
	AI_CACHE_PATH = "ai_cache.json"
	AI_PATH_OVERLAY = "overlay_ai.png"
	AI_PROVIDER = "auto"
	AI_MODEL = "auto"
	AI_BASE_URL = "auto"
	AI_TEMPERATURE = 0.2

	AI_MAX_TOKENS = 1200
	AI_TIMEOUT_SEC = 120

	DRAW_BOX_OUTLINE = True
	AUTO_TEXT_COLOR = True
	TEXT_COLOR = (0, 0, 0, 255)
	TEXT_COLOR_DARK = (0, 0, 0, 255)
	TEXT_COLOR_LIGHT = (255, 255, 255, 255)
	BOX_OUTLINE = (0, 255, 0, 255)
	BOX_OUTLINE_WIDTH = 2

	DRAW_OUTLINE_PARA = False
	DRAW_OUTLINE_ITEM = False
	DRAW_OUTLINE_SPAN = False
	PARA_OUTLINE = (0, 0, 255, 255)
	ITEM_OUTLINE = (255, 0, 0, 255)
	SPAN_OUTLINE = BOX_OUTLINE
	PARA_OUTLINE_WIDTH = 3
	ITEM_OUTLINE_WIDTH = 2
	SPAN_OUTLINE_WIDTH = BOX_OUTLINE_WIDTH

	ERASE_OLD_TEXT_WITH_ORIGINAL_BOXES = True
	ERASE_PADDING_PX = 2
	ERASE_SAMPLE_MARGIN_PX = 6
	ERASE_MODE = "inpaint"
	ERASE_MOSAIC_BLOCK_PX = 10
	ERASE_CLONE_GAP_PX = 4
	ERASE_CLONE_BORDER_PX = 6
	ERASE_CLONE_FEATHER_PX = 3

	ERASE_BLEND_GAP_PX = 3
	ERASE_BLEND_FEATHER_PX = 4

	INPAINT_RADIUS = 3
	INPAINT_METHOD = "telea"
	INPAINT_DILATE_PX = 1

	BG_SAMPLE_BORDER_PX = 3

	BASELINE_SHIFT = True
	BASELINE_SHIFT_FACTOR = 0.40

	FONT_DOWNLOD = True
	FONT_THAI_PATH = "NotoSansThai-Regular.ttf"
	FONT_LATIN_PATH = "NotoSans-Regular.ttf"

	FONT_THAI_URLS = [
	"https://github.com/google/fonts/raw/main/ofl/notosansthai/NotoSansThai-Regular.ttf",
	"https://github.com/google/fonts/raw/main/ofl/notosansthaiui/NotoSansThaiUI-Regular.ttf",
	]
	FONT_LATIN_URLS = [
	"https://github.com/google/fonts/raw/main/ofl/notosans/NotoSans-Regular.ttf",
	]
	FONT_JA_PATH = "NotoSansCJKjp-Regular.otf"
	FONT_JA_URLS = [
	"https://raw.githubusercontent.com/googlefonts/noto-cjk/main/Sans/OTF/Japanese/NotoSansCJKjp-Regular.otf",
	"https://github.com/googlefonts/noto-cjk/raw/main/Sans/OTF/Japanese/NotoSansCJKjp-Regular.otf",
	]
	FONT_ZH_SC_PATH = "NotoSansCJKsc-Regular.otf"
	FONT_ZH_SC_URLS = [
	"https://raw.githubusercontent.com/googlefonts/noto-cjk/main/Sans/OTF/SimplifiedChinese/NotoSansCJKsc-Regular.otf",
	"https://github.com/googlefonts/noto-cjk/raw/main/Sans/OTF/SimplifiedChinese/NotoSansCJKsc-Regular.otf",
	]
	FONT_ZH_TC_PATH = "NotoSansCJKtc-Regular.otf"
	FONT_ZH_TC_URLS = [
	"https://raw.githubusercontent.com/googlefonts/noto-cjk/main/Sans/OTF/TraditionalChinese/NotoSansCJKtc-Regular.otf",
	"https://github.com/googlefonts/noto-cjk/raw/main/Sans/OTF/TraditionalChinese/NotoSansCJKtc-Regular.otf",
	]

	UI_LANGUAGES = [
	{"code": "en", "name": "English"},
	{"code": "th", "name": "Thai"},
	{"code": "ja", "name": "Japanese"},
	{"code": "ko", "name": "Korean"},
	{"code": "zh-CN", "name": "Chinese (Simplified)"},
	{"code": "vi", "name": "Vietnamese"},
	{"code": "es", "name": "Spanish"},
	{"code": "de", "name": "German"},
	{"code": "fr", "name": "French"},
	]

	AI_PROVIDER_DEFAULTS = {
	"gemini": {
	"model": "gemini-2.5-flash",
	"base_url": "",
	},
	"openai": {
	"model": "gpt-4o-mini",
	"base_url": "https://api.openai.com/v1",
	},
	"openrouter": {
	"model": "openai/o4-mini",
	"base_url": "https://openrouter.ai/api/v1",
	},
	"huggingface": {
	"model": "google/gemma-2-2b-it",
	"base_url": "https://router.huggingface.co/v1",
	},
	"featherless": {
	"model": "Qwen/Qwen2.5-7B-Instruct",
	"base_url": "https://api.featherless.ai/v1",
	},
	"groq": {
	"model": "openai/gpt-oss-20b",
	"base_url": "https://api.groq.com/openai/v1",
	},
	"together": {
	"model": "openai/gpt-oss-20b",
	"base_url": "https://api.together.xyz/v1",
	},
	"deepseek": {
	"model": "deepseek-chat",
	"base_url": "https://api.deepseek.com/v1",
	},
	"anthropic": {
	"model": "claude-sonnet-4-20250514",
	"base_url": "https://api.anthropic.com",
	},
	}

	AI_PROVIDER_ALIASES = {
	"hf": "huggingface",
	"huggingface_router": "huggingface",
	"hf_router": "huggingface",
	"openai_compat": "openai",
	"openai-compatible": "openai",
	"gemini3": "gemini",
	"gemini-3": "gemini",
	"google": "gemini",
	}

	AI_MODEL_ALIASES = {
	"gemini": {
	"flash-lite": "gemini-2.5-flash-lite",
	"flash": "gemini-2.5-flash",
	"pro": "gemini-2.5-pro",
	"3-flash": "gemini-3-flash-preview",
	"3-pro": "gemini-3-pro-preview",
	"3-pro-image": "gemini-3-pro-image-preview",
	"flash-image": "gemini-2.5-flash-image",
	}
	}

	AI_PROMPT_SYSTEM_BASE = (
	"You are a professional manga translator and dialogue localizer.\n"
	"Rewrite each paragraph as natural dialogue in the target language while preserving meaning, tone, intent, and character voice.\n"
	"Keep lines concise for speech bubbles. Do not add new information. Do not omit meaning. Do not explain.\n"
	"Preserve emphasis (… ! ?). Avoid excessive punctuation.\n"
	"If the input is already in the target language, improve it (dialogue polish) without changing meaning."
	)

	AI_LANG_STYLE = {
	"th": (
	"Target language: Thai\\n"
	"Write Thai manga dialogue that reads like a high-quality Thai scanlation: natural, concise, and in-character.\\n"
	"Keep lines short for speech bubbles; avoid stiff, literal phrasing.\\n"
	"Default: omit pronouns and omit gendered polite sentence-final particles unless the source line clearly requires them.\\n"
	"Never use the word 'ฉัน'. Prefer omitting the subject.\\n"
	"Never use a male-coded second-person pronoun. When addressing someone by name, do not add a second-person pronoun after the name; prefer NAME + clause.\\n"
	"If a second-person reference is unavoidable, use a neutral/casual form appropriate to tone, but keep it gender-neutral and consistent with the line.\\n"
	"Use particles/interjections sparingly to match tone; do not overuse.\\n"
	"Keep names/terms consistent; transliterate when appropriate.\\n"
	"Output only the translated text."
	),
	"en": (
	"Target language: English\n"
	"Write natural English manga dialogue: concise, conversational, with contractions where natural.\n"
	"Localize tone and character voice; keep emotion and emphasis.\n"
	"Keep proper nouns consistent; do not over-explain."
	),
	"ja": (
	"Target language: Japanese\n"
	"Write natural Japanese manga dialogue: concise, spoken.\n"
	"Choose 丁寧語/タメ口 to match context; keep emotion and emphasis.\n"
	"Keep proper nouns consistent; keep SFX natural in Japanese."
	),
	"default": (
	"Write natural manga dialogue in the target language: concise, spoken, faithful to meaning and tone."
	),
	}


	AI_PROMPT_RESPONSE_CONTRACT_JSON = (
	"Return ONLY valid JSON (no markdown, no extra text).\n"
	"Output JSON MUST have exactly one key: \"aiTextFull\".\n"
	"\"aiTextFull\" MUST be a single JSON string WITHOUT raw newlines.\n"
	"Use literal \\n and \\n\\n to represent line breaks.\n"
	"You MUST preserve paragraph boundaries and order. Paragraphs are separated by a blank line (\\n\\n).\n"
	"Do NOT add extra paragraphs. Do NOT remove paragraphs.\n"
	"Never include code fences or XML/HTML tags.\n"
	"All string values MUST NOT contain raw newlines."
	)

	AI_PROMPT_RESPONSE_CONTRACT_TEXT = (
	"Return ONLY the translated text (no JSON, no markdown, no commentary).\n"
	"You MUST preserve paragraph boundaries and order. Paragraphs are separated by a blank line.\n"
	"Use actual newlines for line breaks.\n"
	"Do NOT add extra paragraphs. Do NOT remove paragraphs.\n"
	"Never include code fences or XML/HTML tags."
	)
	AI_PROMPT_DATA_TEMPLATE = (
	"Input JSON:\n{input_json}\n\n"
	"Output JSON schema (MUST match exactly):\n{output_schema}"
	)

	AI_PROMPT_DATA_TEMPLATE_TEXT = (
	"Input JSON:\n{input_json}\n\n"
	"Return the translation as plain text only."
	)

	FIREBASE_COOKIE_TTL_SEC = int(os.getenv("FIREBASE_COOKIE_TTL_SEC", "900"))
	_FIREBASE_COOKIE_CACHE = {"ts": 0.0, "url": "", "data": None}
	_FONT_RESOLVE_CACHE = {}
	_HF_MODELS_CACHE = {}
	_FONT_PAIR_CACHE = {}
	_TP_HTML_EPS_PX = 0.0
	ZWSP = "\u200b"


	def _active_ai_contract() -> str:
	return AI_PROMPT_RESPONSE_CONTRACT_JSON if DO_AI_JSON else AI_PROMPT_RESPONSE_CONTRACT_TEXT

	def _active_ai_data_template() -> str:
	return AI_PROMPT_DATA_TEMPLATE if DO_AI_JSON else AI_PROMPT_DATA_TEMPLATE_TEXT

	def _canonical_provider(provider: str) -> str:
	p = (provider or "").strip().lower()
	return AI_PROVIDER_ALIASES.get(p, p)

	def _resolve_model(provider: str, model: str) -> str:
	m = (model or "").strip()
	if not m or m.lower() == "auto":
	d = AI_PROVIDER_DEFAULTS.get(provider) or {}
	return (d.get("model") or "").strip() or AI_PROVIDER_DEFAULTS["openai"]["model"]
	key = m.lower()
	aliases = AI_MODEL_ALIASES.get(provider) or {}
	return aliases.get(key) or m

	def _normalize_lang(lang: str) -> str:
	t = (lang or "").strip().lower()
	if t in ("jp", "jpn", "japanese"):
	return "ja"
	if t in ("thai",):
	return "th"
	if t in ("eng", "english"):
	return "en"
	if t.startswith("zh"):
	return t
	if len(t) >= 2:
	return t[:2]
	return t

	def _sha1(s: str) -> str:
	return hashlib.sha1(s.encode("utf-8")).hexdigest()

	def _hf_router_available_models(api_key: str, base_url: str) -> list[str]:
	if not api_key or not base_url:
	return []
	key = _sha1(f"{_sha1(api_key)}\|{base_url}")
	now = time.time()
	cached = _HF_MODELS_CACHE.get(key) or {}
	if cached.get("ts") and now - float(cached["ts"]) < 3600 and isinstance(cached.get("models"), list):
	return cached["models"]

	url = base_url.rstrip("/") + "/models"
	headers = {"Authorization": f"Bearer {api_key}"}
	try:
	with httpx.Client(timeout=float(AI_TIMEOUT_SEC)) as client:
	r = client.get(url, headers=headers)
	r.raise_for_status()
	data = r.json()
	except Exception:
	return []

	models = []
	for m in (data.get("data") or []):
	mid = (m.get("id") if isinstance(m, dict) else None)
	if isinstance(mid, str) and mid.strip():
	models.append(mid.strip())
	_HF_MODELS_CACHE[key] = {"ts": now, "models": models}
	return models

	def _pick_hf_fallback_model(models: list[str]) -> str:
	if not models:
	return ""
	priority_substrings = (
	"gemma-3",
	"gemma-2",
	"llama-3.1",
	"llama-3",
	"mistral",
	"qwen",
	"glm",
	)
	lowered = [(m, m.lower()) for m in models]
	for sub in priority_substrings:
	for m, ml in lowered:
	if sub in ml and ("instruct" in ml or ml.endswith("-it") or ":" in ml):
	return m
	for m, ml in lowered:
	if "instruct" in ml or ml.endswith("-it") or ":" in ml:
	return m
	return models[0]

	def _load_ai_cache(path: str):
	if not path:
	return {}
	if not os.path.exists(path):
	return {}
	try:
	with open(path, "r", encoding="utf-8") as f:
	d = json.load(f)
	return d if isinstance(d, dict) else {}
	except Exception:
	return {}

	def _save_ai_cache(path: str, cache: dict):
	if not path:
	return
	tmp = path + ".tmp"
	with open(tmp, "w", encoding="utf-8") as f:
	json.dump(cache, f, ensure_ascii=False)
	os.replace(tmp, path)

	def _build_ai_prompt_packet(target_lang: str, original_text_full: str):
	lang = _normalize_lang(target_lang)
	input_json = json.dumps(
	{"target_lang": lang, "originalTextFull": original_text_full}, ensure_ascii=False)
	output_schema = json.dumps({"aiTextFull": "..."}, ensure_ascii=False)
	data_template = _active_ai_data_template()
	if DO_AI_JSON:
	data_text = data_template.format(
	input_json=input_json, output_schema=output_schema)
	else:
	data_text = data_template.format(input_json=input_json)

	style = AI_LANG_STYLE.get(lang) or AI_LANG_STYLE.get("default") or ""

	system_parts = [AI_PROMPT_SYSTEM_BASE]
	if style:
	system_parts.append(style)
	system_parts.append(_active_ai_contract())
	system_text = "\n\n".join([p for p in system_parts if p])

	user_parts = []
	user_parts.append(data_text)
	return system_text, user_parts

	def _gemini_generate_json(api_key: str, model: str, system_text: str, user_parts: list[str]):
	url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={api_key}"
	parts = [{"text": p} for p in user_parts if (p or "").strip()]
	payload = {
	"systemInstruction": {"parts": [{"text": system_text}]},
	"contents": [{"role": "user", "parts": parts}],
	"generationConfig": {
	"temperature": float(AI_TEMPERATURE),
	"maxOutputTokens": int(AI_MAX_TOKENS),
	"responseMimeType": "text/plain",
	},
	}
	with httpx.Client(timeout=float(AI_TIMEOUT_SEC)) as client:
	r = client.post(url, json=payload)
	try:
	r.raise_for_status()
	except httpx.HTTPStatusError as e:
	raise Exception(f"Gemini HTTP {r.status_code}: {r.text}") from e
	data = r.json()
	candidates = data.get("candidates") or []
	if not candidates:
	raise Exception("Gemini returned no candidates")
	c = (candidates[0].get("content") or {})
	out_parts = c.get("parts") or []
	if not out_parts:
	raise Exception("Gemini returned empty content parts")
	txt = "".join([str(p.get("text") or "") for p in out_parts]).strip()
	if not txt:
	raise Exception("Gemini returned empty text")
	return txt

	def _read_first_env(*names: str) -> str:
	for n in names:
	v = (os.environ.get(n) or "").strip()
	if v:
	return v
	return ""

	def _detect_ai_provider_from_key(api_key: str) -> str:
	k = (api_key or "").strip()
	if k.startswith("AIza"):
	return "gemini"
	if k.startswith("hf_"):
	return "huggingface"
	if k.startswith("sk-or-"):
	return "openrouter"
	if k.startswith("sk-ant-"):
	return "anthropic"
	if k.startswith("gsk_"):
	return "groq"
	return "openai"

	def _resolve_ai_config():
	api_key = (AI_API_KEY or _read_first_env(
	"AI_API_KEY",
	"OPENAI_API_KEY",
	"HF_TOKEN",
	"HUGGINGFACEHUB_API_TOKEN",
	"GEMINI_API_KEY",
	"OPENROUTER_API_KEY",
	"FEATHERLESS_API_KEY",
	"GROQ_API_KEY",
	"TOGETHER_API_KEY",
	"DEEPSEEK_API_KEY",
	"ANTHROPIC_API_KEY",
	)).strip()

	provider = _canonical_provider((AI_PROVIDER or "auto"))
	model = (AI_MODEL or "auto").strip()
	base_url = (AI_BASE_URL or "auto").strip()

	if provider in ("", "auto"):
	provider = _canonical_provider(_detect_ai_provider_from_key(api_key))

	preset = AI_PROVIDER_DEFAULTS.get(provider) or {}

	model = _resolve_model(provider, model)

	if base_url in ("", "auto"):
	base_url = (preset.get("base_url") or "").strip()

	if provider not in ("gemini", "anthropic"):
	if not base_url:
	base_url = (AI_PROVIDER_DEFAULTS.get("openai") or {}).get(
	"base_url") or "https://api.openai.com/v1"

	return provider, api_key, model, base_url

	def _openai_compat_generate_json(api_key: str, base_url: str, model: str, system_text: str, user_parts: list[str]):
	url = (base_url.rstrip("/") + "/chat/completions")
	messages = [{"role": "system", "content": system_text}]
	for p in user_parts:
	if (p or "").strip():
	messages.append({"role": "user", "content": p})
	payload = {
	"model": model,
	"messages": messages,
	"temperature": float(AI_TEMPERATURE),
	"max_tokens": int(AI_MAX_TOKENS),
	}
	headers = {
	"Authorization": f"Bearer {api_key}",
	"Content-Type": "application/json",
	}
	used_model = model
	with httpx.Client(timeout=float(AI_TIMEOUT_SEC)) as client:
	r = client.post(url, json=payload, headers=headers)
	try:
	r.raise_for_status()
	data = r.json()
	except httpx.HTTPStatusError as e:
	if (
	r.status_code == 400
	and "router.huggingface.co" in (base_url or "")
	and ((AI_MODEL or "").strip().lower() in ("", "auto") or model == (AI_PROVIDER_DEFAULTS.get("huggingface") or {}).get("model"))
	):
	try:
	err = r.json().get("error") or {}
	except Exception:
	err = {}
	if (err.get("code") or "") == "model_not_supported":
	models = _hf_router_available_models(api_key, base_url)
	fallback = _pick_hf_fallback_model(models)
	if fallback and fallback != model:
	payload["model"] = fallback
	used_model = fallback
	r2 = client.post(url, json=payload, headers=headers)
	try:
	r2.raise_for_status()
	except httpx.HTTPStatusError as e2:
	raise Exception(
	f"AI HTTP {r2.status_code}: {r2.text}") from e2
	data = r2.json()
	else:
	preview = ", ".join(models[:8])
	hint = f"\nAvailable models (first 8): {preview}" if preview else ""
	raise Exception(
	f"AI HTTP {r.status_code}: {r.text}{hint}") from e
	else:
	raise Exception(
	f"AI HTTP {r.status_code}: {r.text}") from e
	else:
	raise Exception(f"AI HTTP {r.status_code}: {r.text}") from e
	choices = data.get("choices") or []
	if not choices:
	raise Exception("AI returned no choices")
	msg = (choices[0].get("message") or {})
	txt = (msg.get("content") or "").strip()
	if not txt:
	raise Exception("AI returned empty text")
	return txt, used_model

	def _anthropic_generate_json(api_key: str, model: str, system_text: str, user_parts: list[str]):
	url = "https://api.anthropic.com/v1/messages"
	messages = []
	for p in user_parts:
	if (p or "").strip():
	messages.append({"role": "user", "content": p})
	payload = {
	"model": model,
	"max_tokens": int(AI_MAX_TOKENS),
	"temperature": float(AI_TEMPERATURE),
	"system": system_text,
	"messages": messages,
	}
	headers = {
	"x-api-key": api_key,
	"content-type": "application/json",
	}
	with httpx.Client(timeout=float(AI_TIMEOUT_SEC)) as client:
	r = client.post(url, json=payload, headers=headers)
	try:
	r.raise_for_status()
	except httpx.HTTPStatusError as e:
	raise Exception(f"Anthropic HTTP {r.status_code}: {r.text}") from e
	data = r.json()
	content = data.get("content") or []
	txt = "".join([(c.get("text") or "") for c in content if isinstance(
	c, dict) and c.get("type") == "text"]).strip()
	if not txt:
	raise Exception("Anthropic returned empty text")
	return txt

	def _strip_wrappers(s: str) -> str:
	t = (s or "").strip()
	if not t:
	return ""
	t = t.replace("\r\n", "\n").replace("\r", "\n")
	if "```" in t:
	t = re.sub(r"```[a-zA-Z0-9_-]*", "", t)
	t = t.replace("```", "")
	t = re.sub(r"</?AiTextFull>", "", t, flags=re.IGNORECASE).strip()
	return t

	def _sanitize_json_like_text(raw: str) -> str:
	t = _strip_wrappers(raw)
	if not t:
	return ""
	out = []
	in_str = False
	esc = False
	run_ch = ""
	run_len = 0

	def _flush_run():
	nonlocal run_ch, run_len
	if run_len:
	out.append(run_ch * min(run_len, 3))
	run_ch = ""
	run_len = 0

	for ch in t:
	if in_str:
	if esc:
	_flush_run()
	out.append(ch)
	esc = False
	continue
	if ch == "\\":
	_flush_run()
	out.append(ch)
	esc = True
	continue
	if ch == '"':
	_flush_run()
	out.append(ch)
	in_str = False
	continue
	if ch == "\n":
	_flush_run()
	out.append("\\n")
	continue
	if ch == "\t":
	_flush_run()
	out.append("\\t")
	continue
	if ch == run_ch:
	run_len += 1
	continue
	_flush_run()
	run_ch = ch
	run_len = 1
	continue

	_flush_run()
	if ch == '"':
	out.append(ch)
	in_str = True
	esc = False
	continue
	out.append(ch)

	_flush_run()
	return "".join(out)

	def _extract_first_json(raw: str):
	t = _sanitize_json_like_text(raw)
	if not t:
	raise Exception("AI returned empty text")
	start = t.find("{")
	if start < 0:
	raise Exception("AI returned no JSON object")

	in_str = False
	esc = False
	depth = 0
	json_start = None

	for i in range(start, len(t)):
	ch = t[i]
	if in_str:
	if esc:
	esc = False
	elif ch == "\\":
	esc = True
	elif ch == '"':
	in_str = False
	continue

	if ch == '"':
	in_str = True
	continue
	if ch == "{":
	if depth == 0:
	json_start = i
	depth += 1
	continue
	if ch == "}":
	if depth > 0:
	depth -= 1
	if depth == 0 and json_start is not None:
	cand = t[json_start: i + 1]
	return json.loads(cand)

	raise Exception("Failed to parse AI JSON")

	def _parse_ai_textfull_only(raw: str) -> str:
	obj = _extract_first_json(raw)
	if not isinstance(obj, dict):
	raise Exception("AI JSON is not an object")
	txt = obj.get("aiTextFull")
	if txt is None:
	txt = obj.get("textFull")
	if txt is None:
	raise Exception("AI JSON missing aiTextFull")
	t = str(txt)
	if "\\n" in t and "\n" not in t:
	t = t.replace("\\n", "\n")
	t = t.replace("\r\n", "\n").replace("\r", "\n").strip()
	return t

	def _parse_ai_textfull_text_only(raw: str) -> str:
	t = _strip_wrappers(raw)
	if not t:
	raise Exception("AI returned empty text")
	if t.lstrip().startswith("{"):
	return _parse_ai_textfull_only(t)
	if "\\n" in t and "\n" not in t:
	t = t.replace("\\n", "\n")
	t = re.sub(r"^aiTextFull\s[:=]\s", "", t, flags=re.IGNORECASE).strip()
	return t

	def _budoux_parser_for_lang(lang: str):
	lang = _normalize_lang(lang)
	if not budoux:
	return None
	if lang == "th":
	return budoux.load_default_thai_parser()
	if lang == "ja":
	return budoux.load_default_japanese_parser()
	if lang in ("zh", "zh-hans", "zh_cn", "zh-cn", "zh_hans"):
	return budoux.load_default_simplified_chinese_parser()
	if lang in ("zh-hant", "zh_tw", "zh-tw", "zh_hant"):
	return budoux.load_default_traditional_chinese_parser()
	model_path = os.environ.get("BUDOUX_MODEL_PATH")
	if not model_path:
	return None
	with open(model_path, "r", encoding="utf-8") as f:
	model = json.load(f)
	return budoux.Parser(model)

	def _ensure_box_fields(box: dict):
	if not isinstance(box, dict):
	return {}
	b = copy.deepcopy(box)
	if "rotation_deg" not in b:
	b["rotation_deg"] = 0.0
	if "rotation_deg_css" not in b:
	b["rotation_deg_css"] = 0.0
	if "center" not in b and all(k in b for k in ("left", "top", "width", "height")):
	b["center"] = {"x": b["left"] + b["width"] /
	2.0, "y": b["top"] + b["height"]/2.0}
	if all(k in b for k in ("left", "top", "width", "height")):
	if "left_pct" not in b:
	b["left_pct"] = b["left"] * 100.0
	if "top_pct" not in b:
	b["top_pct"] = b["top"] * 100.0
	if "width_pct" not in b:
	b["width_pct"] = b["width"] * 100.0
	if "height_pct" not in b:
	b["height_pct"] = b["height"] * 100.0
	return b

	def _tokens_with_spaces(text: str, parser, lang: str):
	t = (text or "")
	if not t:
	return []
	out = []
	parts = re.findall(r"\s+\|\S+", t)
	for part in parts:
	if not part:
	continue
	if part.isspace():
	out.append(("space", part))
	continue
	segs = parser.parse(part) if parser else [part]
	for seg in segs:
	if seg:
	out.append(("word", seg))
	return out

	def _line_cap_px_for_item(item: dict, img_w: int, img_h: int) -> float:
	p1 = item.get("baseline_p1") or {}
	p2 = item.get("baseline_p2") or {}
	dx = (float(p2.get("x") or 0.0) - float(p1.get("x") or 0.0)) * float(img_w)
	dy = (float(p2.get("y") or 0.0) - float(p1.get("y") or 0.0)) * float(img_h)
	cap = float(math.hypot(dx, dy))
	if cap > 1e-6:
	return cap
	b = _ensure_box_fields(item.get("box") or {})
	return float(b.get("width") or 0.0) * float(img_w)

	def _wrap_tokens_to_lines_px(tokens, items, img_w: int, img_h: int, thai_font: str, latin_font: str, font_size: int, min_lines: int):
	max_lines = len(items)
	if max_lines <= 0:
	return []

	caps = [_line_cap_px_for_item(it, img_w, img_h) for it in items]
	desired = max(1, min(int(min_lines), max_lines))
	soft_factor = 0.90 if desired > 1 else 1.0

	lines = [[]]
	cur_w = 0.0
	li = 0

	last_word_hint = ""
	pending_space = ""

	tmp = Image.new("RGBA", (10, 10), (0, 0, 0, 0))
	dtmp = ImageDraw.Draw(tmp)

	def _measure_w(font, txt: str) -> float:
	try:
	return float(font.getlength(txt))
	except Exception:
	try:
	bb = dtmp.textbbox((0, 0), txt, font=font, anchor="ls")
	return float(bb[2] - bb[0])
	except Exception:
	w, _ = dtmp.textsize(txt, font=font)
	return float(w)

	def _cap_for_line(idx: int) -> float:
	return float(caps[min(idx, max_lines - 1)])

	for k, s in (tokens or []):
	if k == "space":
	if not lines[-1]:
	continue
	pending_space += str(s)
	continue

	if k != "word":
	continue

	txt = str(s)
	if not txt:
	continue

	font = pick_font(txt, thai_font, latin_font, int(font_size))
	w = _measure_w(font, txt)

	sw = 0.0
	if pending_space:
	hint = last_word_hint or txt
	font_s = pick_font(hint, thai_font, latin_font, int(font_size))
	sw = _measure_w(font_s, pending_space)

	cap = _cap_for_line(li)
	soft_cap = cap * soft_factor if (li < desired and cap > 0.0) else cap

	need_w = cur_w + sw + w
	if lines[-1] and li < max_lines - 1:
	if cap > 0.0 and need_w > cap:
	lines.append([])
	li += 1
	cur_w = 0.0
	pending_space = ""
	sw = 0.0
	elif soft_cap > 0.0 and need_w > soft_cap:
	lines.append([])
	li += 1
	cur_w = 0.0
	pending_space = ""
	sw = 0.0

	if pending_space and lines[-1]:
	lines[-1].append(("space", pending_space, sw))
	cur_w += sw
	pending_space = ""

	lines[-1].append(("word", txt, w))
	cur_w += w
	last_word_hint = txt

	if len(lines) > max_lines:
	head = lines[: max_lines - 1]
	tail = []
	for seg in lines[max_lines - 1:]:
	tail.extend(seg)
	lines = head + [tail]

	for i in range(len(lines)):
	while lines[i] and lines[i][0][0] == "space":
	lines[i] = lines[i][1:]
	while lines[i] and lines[i][-1][0] == "space":
	lines[i] = lines[i][:-1]

	return lines

	def _ensure_min_lines_by_split(lines, min_lines: int, max_lines: int):
	if not lines:
	return []
	min_lines = int(min_lines)
	max_lines = int(max_lines)
	if min_lines <= 1:
	return lines

	target = min(min_lines, max_lines)
	lines = [list(seg) for seg in (lines or [])]

	def _trim(seg):
	while seg and seg[0][0] == "space":
	seg.pop(0)
	while seg and seg[-1][0] == "space":
	seg.pop()
	return seg

	while len(lines) < target:
	idx = None
	best = 0
	for i, seg in enumerate(lines):
	n_words = sum(1 for k, s, _ in seg if k == "word" and s != ZWSP)
	if n_words > best and n_words > 1:
	best = n_words
	idx = i
	if idx is None:
	break

	seg = lines[idx]
	word_pos = [i for i, (k, s, _) in enumerate(seg)
	if k == "word" and s != ZWSP]
	if len(word_pos) <= 1:
	break
	cut_word = len(word_pos) // 2
	cut_pos = word_pos[cut_word]

	left = _trim(seg[:cut_pos])
	right = _trim(seg[cut_pos:])

	lines[idx] = left
	lines.insert(idx + 1, right)
	if len(lines) >= max_lines:
	break

	return lines

	def _fit_para_size_and_lines(ptext: str, parser, items, img_w: int, img_h: int, thai_font: str, latin_font: str, base_size: int, min_lines: int, lang: str):
	tokens2 = _tokens_with_spaces(ptext, parser, lang)
	if not tokens2 or not items:
	return int(base_size), [[] for _ in range(len(items))]

	max_lines = len(items)
	n_words = 0
	for k, s in tokens2:
	if k == "word" and str(s):
	n_words += 1
	desired_lines = max(1, min(max_lines, n_words))
	size = max(10, int(base_size))

	heights = []
	for it in items:
	b = _ensure_box_fields(it.get("box") or {})
	heights.append(float(b.get("height") or 0.0) * float(img_h))

	while size >= 10:
	lines = _wrap_tokens_to_lines_px(
	tokens2, items, img_w, img_h, thai_font, latin_font, size, min_lines=desired_lines)
	lines = _ensure_min_lines_by_split(
	lines, min_lines=desired_lines, max_lines=max_lines)

	if len(lines) <= max_lines:
	ok = True
	for ii, seg in enumerate(lines):
	words = [s for k, s, _ in seg if k == "word" and s != ZWSP]
	if not words:
	continue
	line_text = "".join(words)
	mline = _line_metrics_px(
	line_text, thai_font, latin_font, size)
	if mline is None:
	continue
	_, th, _ = mline
	if ii < len(heights) and heights[ii] > 0.0 and th > heights[ii] * 1.01:
	ok = False
	break
	if ok:
	return size, lines

	size -= 1

	lines10 = _wrap_tokens_to_lines_px(
	tokens2, items, img_w, img_h, thai_font, latin_font, 10, min_lines=desired_lines)
	lines10 = _ensure_min_lines_by_split(
	lines10, min_lines=desired_lines, max_lines=max_lines)
	return 10, lines10

	def _pad_lines(lines, max_lines: int):
	max_lines = int(max_lines)
	if max_lines <= 0:
	return []
	lines = list(lines or [])
	if len(lines) > max_lines:
	return lines[:max_lines]
	if len(lines) < max_lines:
	lines.extend([[] for _ in range(max_lines - len(lines))])
	return lines

	def _contains_thai(text: str) -> bool:
	for ch in (text or ""):
	if _is_thai_char(ch):
	return True
	return False

	def _apply_line_to_item(
	item: dict,
	line_tokens,
	para_index: int,
	item_index: int,
	abs_line_start_raw: int,
	W: int,
	H: int,
	thai_path: str,
	latin_path: str,
	forced_size_px: int \| None,
	apply_baseline_shift: bool = True,
	kerning_adjust: bool = False,
	):
	tokens = []
	for t in (line_tokens or []):
	if not isinstance(t, (list, tuple)) or len(t) < 2:
	continue
	k = str(t[0])
	s = str(t[1])
	w = float(t[2]) if len(t) > 2 and isinstance(
	t[2], (int, float)) else 0.0
	tokens.append((k, s, w))

	words = [s for k, s, _ in tokens if k == "word" and s != ZWSP]
	item_text = "".join(s for _, s, _ in tokens if s != ZWSP).strip()
	item["text"] = item_text
	item["valid_text"] = bool(item_text)

	b = _ensure_box_fields(item.get("box") or {})
	item["box"] = b
	base_left = float(b.get("left") or 0.0)
	base_top = float(b.get("top") or 0.0)
	base_w = float(b.get("width") or 0.0)
	base_h = float(b.get("height") or 0.0)

	if not words or base_w <= 0.0 or base_h <= 0.0 or W <= 0 or H <= 0:
	item["spans"] = []
	return

	p1 = item.get("baseline_p1") or {}
	p2 = item.get("baseline_p2") or {}
	x1 = float(p1.get("x") or 0.0) * float(W)
	y1 = float(p1.get("y") or 0.0) * float(H)
	x2 = float(p2.get("x") or 0.0) * float(W)
	y2 = float(p2.get("y") or 0.0) * float(H)

	dx = x2 - x1
	dy = y2 - y1
	L = float(math.hypot(dx, dy))
	if L <= 1e-9:
	item["spans"] = []
	return

	ux = dx / L
	uy = dy / L
	nx = -uy
	ny = ux
	if ny < 0:
	nx, ny = -nx, -ny

	base_w_px = L
	base_h_px = base_h * float(H)

	base_size = 96

	widths_px = []
	max_ascent = 0
	max_descent = 0

	layout_units = []
	for k, s, _ in tokens:
	if s == ZWSP:
	continue
	if k == "space":
	layout_units.append(("space", _sanitize_draw_text(s)))
	elif k == "word":
	layout_units.append(("word", _sanitize_draw_text(s)))

	def _measure_len_px(font, text: str) -> float:
	try:
	return float(font.getlength(text))
	except Exception:
	tmp = Image.new("RGBA", (10, 10), (0, 0, 0, 0))
	dtmp = ImageDraw.Draw(tmp)
	try:
	bb = dtmp.textbbox((0, 0), text, font=font, anchor="ls")
	return float(bb[2] - bb[0])
	except Exception:
	w, _ = dtmp.textsize(text, font=font)
	return float(w)

	for i, (k, t) in enumerate(layout_units):
	if k == "space":
	hint = ""
	for j in range(i - 1, -1, -1):
	if layout_units[j][0] == "word":
	hint = layout_units[j][1]
	break
	if not hint:
	for j in range(i + 1, len(layout_units)):
	if layout_units[j][0] == "word":
	hint = layout_units[j][1]
	break
	font0 = pick_font(hint or "a", thai_path, latin_path, base_size)
	widths_px.append(max(0.0, _measure_len_px(font0, t)))
	continue

	font0 = pick_font(t, thai_path, latin_path, base_size)
	try:
	ascent, descent = font0.getmetrics()
	except Exception:
	ascent, descent = base_size, int(base_size * 0.25)
	if ascent > max_ascent:
	max_ascent = ascent
	if descent > max_descent:
	max_descent = descent

	if kerning_adjust and (i + 1) < len(layout_units) and layout_units[i + 1][0] == "word":
	nxt = layout_units[i + 1][1]
	nxt1 = nxt[:1] if nxt else ""
	if nxt1 and (_contains_thai(t) == _contains_thai(nxt1)):
	tw = _measure_len_px(font0, t + nxt1) - \
	_measure_len_px(font0, nxt1)
	else:
	tw = _measure_len_px(font0, t)
	else:
	tw = _measure_len_px(font0, t)

	widths_px.append(max(0.0, tw))

	line_tw = sum(widths_px)
	bo_base = _baseline_offset_px_for_text(
	item_text, thai_path, latin_path, base_size)
	if bo_base is not None:
	_, total_h_base = bo_base
	line_th = float(total_h_base)
	else:
	line_th = float(max_ascent + max_descent)

	if line_tw <= 1e-9 or line_th <= 1e-9:
	item["spans"] = []
	return

	if forced_size_px is None:
	scale_line = min((base_w_px * 1.0) / line_tw,
	(base_h_px * 0.995) / line_th)
	if scale_line <= 0.0:
	item["spans"] = []
	return
	final_size = max(10, int(base_size * scale_line))
	else:
	final_size = int(max(10, forced_size_px))
	scale_line = float(final_size) / float(base_size)

	item["font_size_px"] = final_size

	w_scaled = [w * scale_line for w in widths_px]
	total_scaled = sum(w_scaled)
	margin_px = (base_w_px - total_scaled) / \
	2.0 if total_scaled < base_w_px else 0.0

	bo = _baseline_offset_px_for_text(
	item_text, thai_path, latin_path, final_size)
	if apply_baseline_shift and bo is not None:
	baseline_offset_px, _ = bo
	cx = (base_left + (base_w / 2.0)) * float(W)
	cy = (base_top + (base_h / 2.0)) * float(H)
	target = (cx + (baseline_offset_px * nx),
	cy + (baseline_offset_px * ny))
	s = ((target[0] - x1) * nx) + ((target[1] - y1) * ny)
	x1 += nx * s
	y1 += ny * s
	x2 += nx * s
	y2 += ny * s

	item["baseline_p1"] = {"x": x1 / float(W), "y": y1 / float(H)}
	item["baseline_p2"] = {"x": x2 / float(W), "y": y2 / float(H)}

	raw_pos = 0
	span_i = 0
	unit_i = 0
	cum_px = 0.0
	spans = []

	for kind, s, _ in tokens:
	if s == ZWSP:
	continue

	start_raw = abs_line_start_raw + raw_pos
	raw_pos += len(s)
	end_raw = abs_line_start_raw + raw_pos

	if unit_i >= len(w_scaled):
	break

	wpx = w_scaled[unit_i]
	t0 = (margin_px + cum_px) / base_w_px
	cum_px += wpx
	t1 = (margin_px + cum_px) / base_w_px

	if kind == "space":
	unit_i += 1
	continue

	span_box = _ensure_box_fields({
	"left": base_left + (base_w * t0),
	"top": base_top,
	"width": base_w * (t1 - t0),
	"height": base_h,
	"rotation_deg": float(b.get("rotation_deg") or 0.0),
	"rotation_deg_css": float(b.get("rotation_deg_css") or 0.0),
	})

	spans.append({
	"side": "Ai",
	"para_index": para_index,
	"item_index": item_index,
	"span_index": span_i,
	"text": s,
	"valid_text": True,
	"start_raw": start_raw,
	"end_raw": end_raw,
	"t0_raw": t0,
	"t1_raw": t1,
	"box": span_box,
	"height_raw": item.get("height_raw"),
	"baseline_p1": item.get("baseline_p1"),
	"baseline_p2": item.get("baseline_p2"),
	"font_size_px": final_size,
	})
	span_i += 1
	unit_i += 1
	item["spans"] = spans

	def patch(payload: dict, img_w: int, img_h: int, thai_font: str, latin_font: str, lang: str \| None = None) -> dict:
	ai = payload.get("Ai") or {}
	ai_text_full = str(ai.get("aiTextFull") or "")
	template_tree = ai.get("aiTree") or {}
	if not isinstance(template_tree, dict):
	raise ValueError("Ai.aiTree template must be a dict")
	lang_norm = _normalize_lang(lang or LANG)
	parser = _budoux_parser_for_lang(lang_norm)

	out_tree = copy.deepcopy(template_tree)
	out_tree["side"] = "Ai"
	paragraphs = out_tree.get("paragraphs") or []

	ai_text_full_clean = ai_text_full

	def _extract_paras_by_markers(txt: str, expected: int) -> tuple[list[str], str, int] \| None:
	if not txt or expected <= 0 or "<<TP_P" not in txt:
	return None
	matches = list(re.finditer(r"<<TP_P(\d+)>>", txt))
	if not matches:
	return None
	out: list[str] = [""] * expected
	for mi, m in enumerate(matches):
	try:
	idx = int(m.group(1))
	except Exception:
	continue
	seg_start = m.end()
	seg_end = matches[mi + 1].start() if (mi +
	1) < len(matches) else len(txt)
	seg = (txt[seg_start:seg_end] or "").lstrip("\r\n").strip()
	if 0 <= idx < expected and not out[idx]:
	out[idx] = seg
	clean = "\n\n".join(out)
	return out, clean, len(matches)

	marked = _extract_paras_by_markers(ai_text_full, len(paragraphs))
	if marked is not None:
	ai_paras, ai_text_full_clean, _marker_count = marked
	else:
	ai_paras = ai_text_full.split("\n\n") if ai_text_full else []
	if len(ai_paras) < len(paragraphs):
	ai_paras = ai_paras + [""] * (len(paragraphs) - len(ai_paras))
	if len(ai_paras) > len(paragraphs):
	ai_paras = ai_paras[:len(paragraphs)]
	ai_text_full_clean = "\n\n".join(ai_paras)

	raw_cursor = 0
	for pi, (p, ptext) in enumerate(zip(paragraphs, ai_paras)):
	p["side"] = "Ai"
	p["para_index"] = int(p.get("para_index", pi))
	items = p.get("items") or []
	max_lines = len(items)
	if max_lines <= 0:
	continue

	base_size_ref = None
	if isinstance(p.get("para_font_size_px"), int) and int(p.get("para_font_size_px")) > 0:
	base_size_ref = int(p.get("para_font_size_px"))
	else:
	ref_sizes = []
	for it in items:
	fs = it.get("font_size_px")
	if isinstance(fs, int) and fs > 0:
	ref_sizes.append(fs)
	if ref_sizes:
	base_size_ref = min(ref_sizes)

	base_size = int(base_size_ref or 96)
	min_lines = int(max_lines)

	para_size, lines = _fit_para_size_and_lines(
	ptext,
	parser,
	items,
	img_w,
	img_h,
	thai_font,
	latin_font,
	base_size,
	min_lines=min_lines,
	lang=lang_norm,
	)
	lines = _pad_lines(lines, max_lines)
	p["para_font_size_px"] = int(para_size)

	p["text"] = ptext
	p["valid_text"] = bool(ptext)
	p["start_raw"] = raw_cursor
	p["end_raw"] = raw_cursor + len(ptext)

	line_start = raw_cursor
	for ii in range(max_lines):
	it = items[ii]
	it["side"] = "Ai"
	it["para_index"] = pi
	it["item_index"] = ii
	_apply_line_to_item(
	it,
	(lines[ii] if ii < len(lines) else []),
	pi,
	ii,
	line_start,
	img_w,
	img_h,
	thai_font,
	latin_font,
	para_size,
	apply_baseline_shift=True,
	kerning_adjust=True,
	)
	line_raw_len = sum(len(s) for k, s, w in (
	lines[ii] if ii < len(lines) else []) if s != ZWSP)
	line_start += line_raw_len
	raw_cursor = p["end_raw"] + 2

	return {"Ai": {"aiTextFull": ai_text_full_clean, "aiTree": out_tree}}

	def _uniformize_ai_item_span_font_size(item: dict, img_w: int, img_h: int, thai_font: str, latin_font: str):
	spans = item.get("spans") or []
	if not spans or img_w <= 0 or img_h <= 0:
	return

	base_size = item.get("font_size_px")
	try:
	base_size = int(base_size) if base_size is not None else None
	except Exception:
	base_size = None

	if not base_size:
	for sp in spans:
	fs = sp.get("font_size_px") if isinstance(sp, dict) else None
	if isinstance(fs, int) and fs > 0:
	base_size = fs
	break

	if not base_size or base_size <= 0:
	return

	tmp = Image.new("RGBA", (10, 10), (0, 0, 0, 0))
	dtmp = ImageDraw.Draw(tmp)
	font_cache = {}

	def _font_for(text: str, size: int):
	key = (int(size), 1 if _contains_thai(text) else 0)
	f = font_cache.get(key)
	if f:
	return f
	f = pick_font(text, thai_font, latin_font, int(size))
	font_cache[key] = f
	return f

	min_size = int(base_size)

	for sp in spans:
	if not isinstance(sp, dict):
	continue
	txt = _sanitize_draw_text(sp.get("text") or "")
	if txt.strip() == "":
	continue

	b = sp.get("box") or {}
	aw = float(b.get("width") or 0.0) * float(img_w)
	ah = float(b.get("height") or 0.0) * float(img_h)
	if aw <= 0.0 or ah <= 0.0:
	continue

	font = _font_for(txt, base_size)
	try:
	bb = dtmp.textbbox((0, 0), txt, font=font, anchor="ls")
	tw = float(bb[2] - bb[0])
	th = float(bb[3] - bb[1])
	except Exception:
	tw, th = dtmp.textsize(txt, font=font)
	tw = float(tw)
	th = float(th)

	if tw <= 0.0 or th <= 0.0:
	continue

	s = min((aw * 0.995) / tw, (ah * 0.995) / th)
	if s < 1.0:
	req = max(10, int(base_size * s))
	if req < min_size:
	min_size = req

	if min_size != base_size:
	item["font_size_px"] = int(min_size)
	for sp in spans:
	if isinstance(sp, dict):
	sp["font_size_px"] = int(min_size)

	def _rebuild_ai_spans_after_font_resize(ai_tree: dict, img_w: int, img_h: int, thai_font: str, latin_font: str, lang: str \| None = None):
	if not ai_tree or img_w <= 0 or img_h <= 0:
	return
	lang_norm = _normalize_lang(lang or LANG)
	parser = _budoux_parser_for_lang(lang_norm)
	for pi, p in _iter_paragraphs(ai_tree):
	items = p.get("items") or []
	for ii, it in enumerate(items):
	txt = _item_line_text(it)
	if not str(txt).strip():
	it["spans"] = []
	continue
	tokens = _tokens_with_spaces(str(txt), parser, lang_norm)
	line_tokens = [(k, s, 0.0) for k, s in tokens]

	forced = it.get("font_size_px") or p.get("para_font_size_px")
	if isinstance(forced, float):
	forced = int(forced)
	elif isinstance(forced, str) and forced.strip().isdigit():
	forced = int(forced.strip())

	_apply_line_to_item(
	it,
	line_tokens,
	int(p.get("para_index", pi)),
	int(it.get("item_index", ii)),
	int(it.get("start_raw", 0)),
	img_w,
	img_h,
	thai_font,
	latin_font,
	forced,
	apply_baseline_shift=False,
	kerning_adjust=True,
	)
	_uniformize_ai_item_span_font_size(
	it, img_w, img_h, thai_font, latin_font)

	def ai_translate_original_text(original_text_full: str, target_lang: str):
	provider, api_key, model, base_url = _resolve_ai_config()
	if not api_key:
	raise Exception("AI_API_KEY is required for AI translation")

	lang = _normalize_lang(target_lang)
	prompt_sig = _sha1(
	json.dumps(
	{
	"sys": AI_PROMPT_SYSTEM_BASE,
	"contract": _active_ai_contract(),
	"data": _active_ai_data_template(),
	"style": AI_LANG_STYLE.get(lang) or AI_LANG_STYLE.get("default") or "",
	},
	ensure_ascii=False,
	)
	)

	cache = None
	cache_key = None
	if AI_CACHE:
	cache = _load_ai_cache(AI_CACHE_PATH)
	cache_key = _sha1(
	json.dumps(
	{"provider": provider, "m": model, "u": base_url,
	"l": lang, "p": prompt_sig, "t": original_text_full},
	ensure_ascii=False,
	)
	)
	if cache_key in cache:
	cached = cache[cache_key]
	if lang == "th" and cached:
	t = str(cached.get("aiTextFull") or "")
	if t:
	t2 = re.sub(
	r"(?:(?<=^)\|(?<=[\s\"'“”‘’()\[\]{}<>]))\u0e19\u0e32\u0e22(?=(?:\s\|$))", "", t)
	t2 = re.sub(r"[ \t]{2,}", " ", t2)
	t2 = re.sub(r"^[ \t]+", "", t2, flags=re.MULTILINE)
	if t2 != t:
	cached = dict(cached)
	cached["aiTextFull"] = t2
	cache[cache_key] = cached
	_save_ai_cache(AI_CACHE_PATH, cache)
	return cached

	system_text, user_parts = _build_ai_prompt_packet(lang, original_text_full)

	started = time.time()
	used_model = model
	if provider == "gemini":
	raw = _gemini_generate_json(api_key, model, system_text, user_parts)
	elif provider == "anthropic":
	raw = _anthropic_generate_json(api_key, model, system_text, user_parts)
	else:
	raw, used_model = _openai_compat_generate_json(
	api_key, base_url, model, system_text, user_parts)

	ai_text_full = _parse_ai_textfull_only(
	raw) if DO_AI_JSON else _parse_ai_textfull_text_only(raw)

	if lang == "th" and ai_text_full:
	ai_text_full = re.sub(
	r"(?:(?<=^)\|(?<=[\s\"'“”‘’()\[\]{}<>]))\u0e19\u0e32\u0e22(?=(?:\s\|$))", "", ai_text_full)
	ai_text_full = re.sub(r"[ \t]{2,}", " ", ai_text_full)
	ai_text_full = re.sub(r"^[ \t]+", "", ai_text_full, flags=re.MULTILINE)

	result = {
	"aiTextFull": ai_text_full,
	"meta": {"model": used_model, "provider": provider, "base_url": base_url, "latency_sec": round(time.time() - started, 3)},
	}
	if AI_CACHE and cache is not None and cache_key is not None:
	cache[cache_key] = result
	_save_ai_cache(AI_CACHE_PATH, cache)
	return result

	def to_translated(u, lang="th"):
	q = parse_qs(urlparse(u).query)
	return "https://lens.google.com/translatedimage?" + urlencode(
	dict(
	vsrid=q["vsrid"][0],
	gsessionid=q["gsessionid"][0],
	sl="auto",
	tl=lang,
	se=1,
	ib="1",
	)
	)

	def _b64pad(s: str) -> str:
	return s + "=" * ((4 - (len(s) % 4)) % 4)

	def decode_imageurl_to_datauri(imageUrl: str):
	if not imageUrl:
	return None
	if isinstance(imageUrl, str) and imageUrl.startswith("data:image") and "base64," in imageUrl:
	return imageUrl
	for fn in (base64.b64decode, base64.urlsafe_b64decode):
	try:
	b = fn(_b64pad(imageUrl))
	try:
	t = b.decode("utf-8")
	except Exception:
	t = b.decode("utf-8", errors="ignore")
	if "data:image" in t and "base64," in t:
	i = t.find("data:image")
	return t[i:].strip() if i >= 0 else t.strip()
	except Exception:
	pass
	return None

	def read_varint(buf, i):
	shift = 0
	result = 0
	while True:
	if i >= len(buf):
	raise ValueError("eof varint")
	b = buf[i]
	i += 1
	result \|= ((b & 0x7F) << shift)
	if (b & 0x80) == 0:
	return result, i
	shift += 7
	if shift > 70:
	raise ValueError("varint too long")

	def parse_proto(buf, start=0, end=None):
	if end is None:
	end = len(buf)
	i = start
	out = []
	while i < end:
	key, i = read_varint(buf, i)
	field = key >> 3
	wire = key & 7
	if wire == 0:
	val, i = read_varint(buf, i)
	out.append((field, wire, val))
	elif wire == 1:
	val = buf[i: i + 8]
	i += 8
	out.append((field, wire, val))
	elif wire == 2:
	l, i = read_varint(buf, i)
	val = buf[i: i + l]
	i += l
	out.append((field, wire, val))
	elif wire == 5:
	val = buf[i: i + 4]
	i += 4
	out.append((field, wire, val))
	else:
	raise ValueError(f"wiretype {wire}")
	return out

	def b2f(b4):
	return struct.unpack("<f", b4)[0]

	def b2hex(b):
	return b.hex()

	def _get_float_field(msg_fields, field_num):
	for f, w, v in msg_fields:
	if f == field_num and w == 5:
	return b2f(v)
	return None

	def _get_points_from_geom(geom_bytes):
	pts = []
	height = None
	geom_fields = parse_proto(geom_bytes)
	for f, w, v in geom_fields:
	if f == 1 and w == 2:
	p_fields = parse_proto(v)
	x = _get_float_field(p_fields, 1)
	y = _get_float_field(p_fields, 2)
	if x is not None and y is not None:
	pts.append((x, y))
	if f == 3 and w == 5:
	height = b2f(v)
	if len(pts) >= 2 and height is not None:
	return pts[0], pts[1], height
	return None, None, None

	def _looks_like_geom(geom_bytes):
	geom_fields = parse_proto(geom_bytes)
	pts = 0
	has_height = False
	for f, w, v in geom_fields:
	if f == 1 and w == 2:
	p_fields = parse_proto(v)
	if _get_float_field(p_fields, 1) is not None and _get_float_field(p_fields, 2) is not None:
	pts += 1
	elif f == 3 and w == 5:
	has_height = True
	return pts >= 2 and has_height

	def _looks_like_span(span_bytes):
	span_fields = parse_proto(span_bytes)
	has_t = False
	has_range = False
	for f, w, v in span_fields:
	if f in (3, 4) and w == 5:
	has_t = True
	elif f in (1, 2) and w == 0:
	has_range = True
	return has_t and has_range

	def _is_item_message(msg_bytes):
	fields = parse_proto(msg_bytes)
	geom_ok = False
	span_ok = 0
	for f, w, v in fields:
	if f == 1 and w == 2 and not geom_ok:
	geom_ok = _looks_like_geom(v)
	elif f == 2 and w == 2:
	if _looks_like_span(v):
	span_ok += 1
	return geom_ok and span_ok > 0

	def _extract_items_from_paragraph(par_bytes):
	top = parse_proto(par_bytes)
	items = []
	for _, w, v in top:
	if w == 2 and _is_item_message(v):
	items.append(v)
	if items:
	return items
	items = []
	seen = set()
	nodes = 0

	def walk(buf, depth):
	nonlocal nodes
	if depth >= 4 or nodes > 20000:
	return
	for _, w, v in parse_proto(buf):
	if w != 2:
	continue
	nodes += 1
	if nodes > 20000:
	return
	if _is_item_message(v):
	if v in seen:
	continue
	seen.add(v)
	items.append(v)
	else:
	walk(v, depth + 1)
	walk(par_bytes, 0)
	return items

	def _extract_item_geom_spans(item_bytes):
	fields = parse_proto(item_bytes)
	geom_bytes = None
	spans_bytes = []
	for f, w, v in fields:
	if f == 1 and w == 2:
	geom_bytes = v
	if f == 2 and w == 2:
	spans_bytes.append(v)
	return geom_bytes, spans_bytes

	def _extract_span(span_bytes):
	span_fields = parse_proto(span_bytes)
	start = None
	end = None
	t0 = None
	t1 = None
	for f, w, v in span_fields:
	if f == 1 and w == 0:
	start = int(v)
	elif f == 2 and w == 0:
	end = int(v)
	elif f == 3 and w == 5:
	t0 = b2f(v)
	elif f == 4 and w == 5:
	t1 = b2f(v)
	return start, end, t0, t1, span_fields

	def _normalize_angle_deg(angle_deg):
	while angle_deg <= -180.0:
	angle_deg += 360.0
	while angle_deg > 180.0:
	angle_deg -= 360.0
	if angle_deg < -90.0:
	angle_deg += 180.0
	if angle_deg > 90.0:
	angle_deg -= 180.0
	return angle_deg

	def _slice_text(full_text, start, end):
	if start is None or end is None:
	return ""
	if start < 0 or end < 0 or start > end or end > len(full_text):
	return ""
	return full_text[start:end]

	def _range_min_max(ranges):
	if not ranges:
	return None, None
	s = min(r[0] for r in ranges)
	e = max(r[1] for r in ranges)
	return s, e

	def decode_tree(paragraphs_b64, full_text, side, img_w, img_h, want_raw=True):
	raw_dump = []
	paragraphs = []

	cursor = 0

	for para_index, b64s in enumerate(paragraphs_b64):
	par_bytes = base64.b64decode(b64s)
	if want_raw:
	raw_dump.append({"para_index": para_index,
	"b64": b64s, "bytes_hex": b2hex(par_bytes)})

	item_msgs = _extract_items_from_paragraph(par_bytes)
	items = []
	para_ranges = []
	para_bounds = None

	for item_index, item_bytes in enumerate(item_msgs):
	geom_bytes, spans_bytes = _extract_item_geom_spans(item_bytes)
	if geom_bytes is None:
	continue

	p1, p2, height_norm = _get_points_from_geom(geom_bytes)
	if p1 is None or p2 is None or height_norm is None:
	continue

	x1n, y1n = p1
	x2n, y2n = p2
	x1 = x1n * img_w
	y1 = y1n * img_h
	x2 = x2n * img_w
	y2 = y2n * img_h

	dx = x2 - x1
	dy = y2 - y1
	if dx < 0 or (abs(dx) < 1e-12 and dy < 0):
	x1, y1, x2, y2 = x2, y2, x1, y1
	x1n, y1n, x2n, y2n = x2n, y2n, x1n, y1n
	dx = x2 - x1
	dy = y2 - y1

	L = math.hypot(dx, dy)
	if L <= 1e-12:
	continue

	ux = dx / L
	uy = dy / L

	angle_deg_raw = math.degrees(math.atan2(dy, dx))
	angle_deg = _normalize_angle_deg(angle_deg_raw)

	angle_deg_css = angle_deg

	height_px = height_norm * img_h

	item_spans = []
	item_ranges = []
	item_bounds = None

	for span_index, sb in enumerate(spans_bytes):
	start, end, t0, t1, _ = _extract_span(sb)

	if start is None:
	start = cursor
	else:
	cursor = max(cursor, start)
	if end is None:
	continue
	cursor = max(cursor, end)

	if t0 is None and t1 is None:
	continue
	if t0 is None:
	t0 = 0.0
	if t1 is None:
	t1 = 1.0

	valid_text = False
	span_text = ""
	if start is not None and end is not None and 0 <= start <= end <= len(full_text):
	span_text = full_text[start:end]
	valid_text = span_text.strip() != ""
	if valid_text:
	item_ranges.append((start, end))

	e1x = x1 + ux * (t0 * L)
	e1y = y1 + uy * (t0 * L)
	e2x = x1 + ux * (t1 * L)
	e2y = y1 + uy * (t1 * L)

	cx = (e1x + e2x) / 2.0
	cy = (e1y + e2y) / 2.0

	width_px = abs(t1 - t0) * L
	left_px = cx - width_px / 2.0
	top_px = cy - height_px / 2.0

	left = left_px / img_w
	top = top_px / img_h
	width = width_px / img_w
	height = height_px / img_h

	span_node = {
	"side": side,
	"para_index": para_index,
	"item_index": item_index,
	"span_index": span_index,
	"start_raw": start,
	"end_raw": end,
	"t0_raw": t0,
	"t1_raw": t1,
	"height_raw": height_norm,
	"baseline_p1": {"x": x1n, "y": y1n},
	"baseline_p2": {"x": x2n, "y": y2n},
	"box": {
	"left": left,
	"top": top,
	"width": width,
	"height": height,
	"rotation_deg": angle_deg,
	"rotation_deg_css": angle_deg_css,
	"center": {"x": cx / img_w, "y": cy / img_h},
	"left_pct": left * 100.0,
	"top_pct": top * 100.0,
	"width_pct": width * 100.0,
	"height_pct": height * 100.0,
	},
	"text": span_text,
	"valid_text": valid_text,
	}

	quad = _token_box_quad_px(span_node, img_w, img_h, pad_px=0)
	if quad:
	xs = [p[0] for p in quad]
	ys = [p[1] for p in quad]
	b = (min(xs), min(ys), max(xs), max(ys))
	item_bounds = b if item_bounds is None else (min(item_bounds[0], b[0]), min(
	item_bounds[1], b[1]), max(item_bounds[2], b[2]), max(item_bounds[3], b[3]))
	item_bounds = item_bounds
	item_spans.append(span_node)

	s0, s1 = _range_min_max(item_ranges)
	item_text = _slice_text(
	full_text, s0, s1).strip() if s0 is not None else ""
	item_valid_text = item_text.strip() != ""
	if s0 is not None:
	para_ranges.append((s0, s1))

	cx = (x1 + x2) / 2.0
	cy = (y1 + y2) / 2.0
	left_px = cx - L / 2.0
	top_px = cy - height_px / 2.0

	item_box = {
	"left": left_px / img_w,
	"top": top_px / img_h,
	"width": L / img_w,
	"height": height_px / img_h,
	"rotation_deg": angle_deg,
	"rotation_deg_css": angle_deg_css,
	"center": {"x": cx / img_w, "y": cy / img_h},
	}

	if item_bounds is not None:
	para_bounds = item_bounds if para_bounds is None else (min(para_bounds[0], item_bounds[0]), min(
	para_bounds[1], item_bounds[1]), max(para_bounds[2], item_bounds[2]), max(para_bounds[3], item_bounds[3]))

	items.append(
	{
	"side": side,
	"para_index": para_index,
	"item_index": item_index,
	"start_raw": s0,
	"end_raw": s1,
	"text": item_text,
	"valid_text": item_valid_text,
	"height_raw": height_norm,
	"baseline_p1": {"x": x1n, "y": y1n},
	"baseline_p2": {"x": x2n, "y": y2n},
	"box": item_box,
	"bounds_px": item_bounds,
	"spans": item_spans,
	}
	)

	p0, p1 = _range_min_max(para_ranges)
	para_text = _slice_text(
	full_text, p0, p1).strip() if p0 is not None else ""
	para_valid_text = para_text.strip() != ""
	paragraphs.append(
	{
	"side": side,
	"para_index": para_index,
	"start_raw": p0,
	"end_raw": p1,
	"text": para_text,
	"valid_text": para_valid_text,
	"bounds_px": para_bounds,
	"items": items,
	}
	)

	tree = {"side": side, "paragraphs": paragraphs}
	return tree, raw_dump

	def flatten_tree_spans(tree):
	spans = []
	for p in tree.get("paragraphs") or []:
	for it in p.get("items") or []:
	for sp in it.get("spans") or []:
	spans.append(sp)
	return spans

	def flatten_tree_items_as_tokens(tree, img_w, img_h):
	toks = []
	for p in tree.get("paragraphs") or []:
	for it in p.get("items") or []:
	t = {
	"side": it["side"],
	"para_index": it["para_index"],
	"item_index": it["item_index"],
	"span_index": -1,
	"start_raw": it.get("start_raw"),
	"end_raw": it.get("end_raw"),
	"t0_raw": 0.0,
	"t1_raw": 1.0,
	"height_raw": it.get("height_raw"),
	"baseline_p1": it.get("baseline_p1"),
	"baseline_p2": it.get("baseline_p2"),
	"box": it.get("box"),
	"text": it.get("text") or "",
	"valid_text": it.get("valid_text", False),
	}
	toks.append(t)
	return toks

	def _mean_angle_deg(angles_deg):
	vals = [a for a in (angles_deg or []) if a is not None]
	if not vals:
	return 0.0
	xs = [math.cos(math.radians(a)) for a in vals]
	ys = [math.sin(math.radians(a)) for a in vals]
	return math.degrees(math.atan2(sum(ys) / len(ys), sum(xs) / len(xs)))

	def _rotate_xy(x, y, cos_a, sin_a):
	return (x * cos_a - y * sin_a, x * sin_a + y * cos_a)

	def _para_obb_quad_px(para_node, W, H):
	items = para_node.get("items") or []
	if not items:
	return None

	angles = []
	pts = []
	for it in items:
	b = (it.get("box") or {})
	angles.append(b.get("rotation_deg", 0.0))
	q = _token_box_quad_px(it, W, H, pad_px=0)
	if q:
	pts.extend(q)

	if len(pts) < 4:
	return None

	ang = _mean_angle_deg(angles)
	cos_a = math.cos(math.radians(ang))
	sin_a = math.sin(math.radians(ang))
	cos_n = cos_a
	sin_n = -sin_a

	rpts = [_rotate_xy(x, y, cos_n, sin_n) for (x, y) in pts]
	xs = [p[0] for p in rpts]
	ys = [p[1] for p in rpts]
	minx, maxx = min(xs), max(xs)
	miny, maxy = min(ys), max(ys)
	corners = [(minx, miny), (maxx, miny), (maxx, maxy), (minx, maxy)]
	return [_rotate_xy(x, y, cos_a, sin_a) for (x, y) in corners]

	def build_level_outlines(tree, W, H):
	outlines = []
	if not tree:
	return outlines

	if DRAW_OUTLINE_PARA:
	for para in tree.get("paragraphs") or []:
	q = _para_obb_quad_px(para, W, H)
	if q:
	outlines.append(
	{"quad": q, "color": PARA_OUTLINE, "width": PARA_OUTLINE_WIDTH})

	if DRAW_OUTLINE_ITEM:
	for itok in flatten_tree_items_as_tokens(tree, W, H):
	q = _token_box_quad_px(itok, W, H, pad_px=0)
	if q:
	outlines.append(
	{"quad": q, "color": ITEM_OUTLINE, "width": ITEM_OUTLINE_WIDTH})

	return outlines

	def tokens_to_html(tokens, container_class="RTMDre"):
	parts = []
	parts.append(f'<div class="{container_class}">')
	for t in tokens:
	if not t.get("valid_text"):
	continue
	b = t["box"]
	aria = (t.get("text") or "").replace('"', """).replace("\n", " ")
	wi = t.get("wi", 0)
	rot = b.get("rotation_deg_css", b.get("rotation_deg", 0.0))
	fs = t.get("font_size_px") or b.get("font_size_px")
	lh = None
	if fs:
	try:
	lh = max(1, int(round(float(fs) * 1.05)))
	except Exception:
	lh = None
	style = (
	f'top: calc({b["top_pct"]}%); '
	f'left: calc({b["left_pct"]}%); '
	f'width: calc({b["width_pct"]}%); '
	f'height: calc({b["height_pct"]}%); '
	f"transform: rotate({rot}deg);"
	)
	if fs:
	style += f" font-size: {float(fs):.4g}px;"
	if lh:
	style += f" line-height: {lh}px;"
	parts.append(
	f'<div class="IwqbBf" aria-label="{aria}" data-wi="{wi}" role="button" tabindex="-1" style="{style}"></div>'
	)
	parts.append("</div>")
	return "".join(parts)

	def tp_overlay_css():
	return (
	".tp-draw-root{position:absolute;inset:0;pointer-events:none;}"
	".tp-draw-scope{position:absolute;left:0;top:0;transform-origin:0 0;}"
	".tp-para{position:absolute;left:0;top:0;}"
	".tp-item{position:absolute;left:0;top:0;display:flex;align-items:center;justify-content:center;"
	"white-space:pre;pointer-events:none;box-sizing:border-box;overflow:visible;"
	"font-family:var(--tp-font,system-ui);font-weight:500;"
	"color:var(--tp-fg,rgba(20,20,20,.98));"
	"text-shadow:0 0 2px rgba(255,255,255,.90),0 0 2px rgba(0,0,0,.60),0 1px 1px rgba(0,0,0,.35);}"
	".tp-item>span{display:inline-block;white-space:pre;transform-origin:center;"
	"padding:0;border-radius:3px;"
	"background:var(--tp-bg,rgba(255,255,255,.65));"
	"box-decoration-break:clone;-webkit-box-decoration-break:clone;}"
	".tp-item[data-wrap='1'],.tp-item[data-wrap='1']>span{white-space:pre-wrap;word-break:break-word;}"
	".tp-item[data-wrap='1']>span{text-align:center;}"
	)

	def _tp_norm_list(v):
	if isinstance(v, list):
	return v
	if isinstance(v, dict):
	try:
	return [v[k] for k in sorted(v.keys(), key=lambda x: int(x) if str(x).isdigit() else str(x))]
	except Exception:
	return list(v.values())
	return []

	def _tp_num(x):
	try:
	n = float(x)
	return n if math.isfinite(n) else None
	except Exception:
	return None

	def _tp_escape_text(s: str) -> str:
	if not s:
	return ""
	s = s.replace("\r", "")
	s = s.replace("&", "&").replace("<", "<").replace(">", ">")
	return s

	def _tp_get_rect(obj: dict, base_w: float, base_h: float):
	if not isinstance(obj, dict):
	return None
	box = obj.get("box") if isinstance(obj.get("box"), dict) else {}

	l0 = _tp_num(box.get("left"))
	t0 = _tp_num(box.get("top"))
	w0 = _tp_num(box.get("width"))
	h0 = _tp_num(box.get("height"))
	if None not in (l0, t0, w0, h0) and w0 > 0 and h0 > 0:
	l = l0 * base_w
	t = t0 * base_h
	r = (l0 + w0) * base_w
	b = (t0 + h0) * base_h
	deg = _tp_num(box.get("rotation_deg_css"))
	if deg is None:
	deg = _tp_num(box.get("rotation_deg"))
	return {"l": l, "t": t, "r": r, "b": b, "deg": deg or 0.0}

	lp = _tp_num(box.get("left_pct"))
	tp = _tp_num(box.get("top_pct"))
	wp = _tp_num(box.get("width_pct"))
	hp = _tp_num(box.get("height_pct"))
	if None not in (lp, tp, wp, hp) and wp > 0 and hp > 0:
	l0p = lp / 100.0
	t0p = tp / 100.0
	w0p = wp / 100.0
	h0p = hp / 100.0
	l = l0p * base_w
	t = t0p * base_h
	r = (l0p + w0p) * base_w
	b = (t0p + h0p) * base_h
	deg = _tp_num(box.get("rotation_deg_css"))
	if deg is None:
	deg = _tp_num(box.get("rotation_deg"))
	return {"l": l, "t": t, "r": r, "b": b, "deg": deg or 0.0}

	bpx = obj.get("bounds_px")
	if isinstance(bpx, list) and len(bpx) == 4:
	l = _tp_num(bpx[0])
	t = _tp_num(bpx[1])
	r = _tp_num(bpx[2])
	bb = _tp_num(bpx[3])
	if None not in (l, t, r, bb) and r > l and bb > t:
	return {"l": l, "t": t, "r": r, "b": bb, "deg": 0.0}
	return None

	def _tp_union_rect(items: list, base_w: float, base_h: float):
	l = float("inf")
	t = float("inf")
	r = float("-inf")
	b = float("-inf")
	for it in items:
	bx = _tp_get_rect(it, base_w, base_h)
	if not bx:
	continue
	l = min(l, bx["l"])
	t = min(t, bx["t"])
	r = max(r, bx["r"])
	b = max(b, bx["b"])
	if not math.isfinite(l) or not math.isfinite(t) or not math.isfinite(r) or not math.isfinite(b):
	return None
	return {"l": l, "t": t, "r": r, "b": b, "deg": 0.0}

	def _tp_mean_item_deg(items: list, base_w: float, base_h: float) -> float:
	angles = []
	for it in items or []:
	bx = _tp_get_rect(it, base_w, base_h)
	if not bx:
	continue
	a = _tp_num(bx.get("deg"))
	if a is None:
	continue
	angles.append(float(a))
	if not angles:
	return 0.0
	return float(_mean_angle_deg(angles))

	def _tp_oriented_rect_from_points(pts: list, para_deg: float) -> dict \| None:
	if len(pts) < 2:
	return None

	ang = float(para_deg or 0.0)
	if not math.isfinite(ang):
	ang = 0.0

	rad_n = math.radians(-ang)
	cn = math.cos(rad_n)
	sn = math.sin(rad_n)
	rpts = [(x * cn - y * sn, x * sn + y * cn) for x, y in pts]
	xs = [p[0] for p in rpts]
	ys = [p[1] for p in rpts]
	minx, maxx = min(xs), max(xs)
	miny, maxy = min(ys), max(ys)

	w = float(maxx - minx)
	h = float(maxy - miny)
	if w <= 0.0 or h <= 0.0:
	return None

	cx0 = float((minx + maxx) / 2.0)
	cy0 = float((miny + maxy) / 2.0)
	rad_a = math.radians(ang)
	ca = math.cos(rad_a)
	sa = math.sin(rad_a)
	cx = (cx0 * ca) - (cy0 * sa)
	cy = (cx0 * sa) + (cy0 * ca)

	l = cx - (w / 2.0)
	t = cy - (h / 2.0)
	return {"l": float(l), "t": float(t), "r": float(l + w), "b": float(t + h), "deg": float(ang)}

	def _tp_rect_corners(l: float, t: float, r: float, b: float, deg: float) -> list:
	w = float(r - l)
	h = float(b - t)
	if w <= 0.0 or h <= 0.0:
	return []
	cx = float((l + r) / 2.0)
	cy = float((t + b) / 2.0)
	hw = w / 2.0
	hh = h / 2.0
	rad = math.radians(float(deg or 0.0))
	c = math.cos(rad)
	s = math.sin(rad)
	out = []
	for x, y in ((-hw, -hh), (hw, -hh), (hw, hh), (-hw, hh)):
	rx = (x * c) - (y * s)
	ry = (x * s) + (y * c)
	out.append((cx + rx, cy + ry))
	return out

	def _tp_para_rect_from_items(items: list, base_w: float, base_h: float, para_deg: float) -> dict \| None:
	if not items:
	return None

	pts = []
	for it in items:
	ibx = _tp_get_rect(it, base_w, base_h)
	if not ibx:
	continue
	w = float(ibx["r"] - ibx["l"])
	h = float(ibx["b"] - ibx["t"])
	if w <= 0.0 or h <= 0.0:
	continue
	deg = float(ibx.get("deg") or 0.0)
	cx = float(ibx["l"] + w / 2.0)
	cy = float(ibx["t"] + h / 2.0)
	hw = w / 2.0
	hh = h / 2.0
	rad = math.radians(deg)
	c = math.cos(rad)
	s = math.sin(rad)
	for x, y in ((-hw, -hh), (hw, -hh), (hw, hh), (-hw, hh)):
	rx = (x * c) - (y * s)
	ry = (x * s) + (y * c)
	pts.append((cx + rx, cy + ry))

	return _tp_oriented_rect_from_points(pts, para_deg)

	def _tp_extract_item_text(it: dict) -> str:
	if not isinstance(it, dict):
	return ""
	for k in (
	"text",
	"translated_text",
	"translatedText",
	"ai_text",
	"aiText",
	"display_text",
	"displayText",
	):
	v = it.get(k)
	if isinstance(v, str) and v:
	return v
	spans = _tp_norm_list(it.get("spans"))
	if spans:
	return "".join(s.get("text") if isinstance(s, dict) and isinstance(s.get("text"), str) else "" for s in spans)
	return ""

	def ai_tree_to_tp_html(tree: dict, base_w: int, base_h: int) -> str:
	base_w = int(base_w or 0)
	base_h = int(base_h or 0)
	if base_w <= 0 or base_h <= 0:
	return ""
	paras = _tp_norm_list(tree.get("paragraphs")
	if isinstance(tree, dict) else None)
	if not paras:
	return ""

	parts = [
	f'<div class="tp-draw-scope" style="width: {base_w}px; height: {base_h}px;">']
	for pi, p in enumerate(paras):
	if not isinstance(p, dict):
	continue
	items = _tp_norm_list(p.get("items"))
	if len(items) > 1 and any(isinstance(x, dict) and _tp_num(x.get("item_index")) is not None for x in items):
	items = sorted(
	items,
	key=lambda x: _tp_num(
	x.get("item_index")) if isinstance(x, dict) else 0.0,
	)

	para_idx = int(_tp_num(p.get("para_index")) or pi)
	pbx = _tp_get_rect(p, base_w, base_h) or _tp_union_rect(
	items, base_w, base_h)
	if not pbx:
	continue

	para_deg = float(pbx.get("deg") or 0.0)
	if abs(para_deg) <= 0.01:
	derived = _tp_mean_item_deg(items, base_w, base_h)
	if abs(derived) > 0.01:
	pbx2 = _tp_para_rect_from_items(items, base_w, base_h, derived)
	if pbx2:
	pbx = pbx2
	para_deg = float(pbx.get("deg") or 0.0)

	pbx_items = _tp_para_rect_from_items(items, base_w, base_h, para_deg)
	if pbx_items:
	pts = _tp_rect_corners(
	pbx["l"], pbx["t"], pbx["r"], pbx["b"], para_deg)
	pts += _tp_rect_corners(pbx_items["l"], pbx_items["t"],
	pbx_items["r"], pbx_items["b"], para_deg)
	merged = _tp_oriented_rect_from_points(pts, para_deg)
	if merged:
	pbx = merged

	eps = float(_TP_HTML_EPS_PX or 0.0)
	if eps > 0.0:
	pbx = {
	"l": float(pbx["l"] - eps),
	"t": float(pbx["t"] - eps),
	"r": float(pbx["r"] + eps),
	"b": float(pbx["b"] + eps),
	"deg": float(pbx.get("deg") or para_deg or 0.0),
	}

	pw = max(0.0, pbx["r"] - pbx["l"])
	ph = max(0.0, pbx["b"] - pbx["t"])

	para_style = (
	f'left: {pbx["l"]:.6f}px; '
	f'top: {pbx["t"]:.6f}px; '
	f'width: {pw:.6f}px; '
	f'height: {ph:.6f}px;'
	)
	if abs(para_deg) > 0.01:
	para_style += f' transform: rotate({para_deg:.6g}deg); transform-origin: center center;'

	parts.append(
	f'<div class="tp-para tp-para-{para_idx}" data-para-index="{para_idx}" style="{para_style}">'
	)

	para_cx = (pbx["l"] + pbx["r"]) / 2.0
	para_cy = (pbx["t"] + pbx["b"]) / 2.0
	inv_c = inv_s = None
	if abs(para_deg) > 0.01:
	rad_inv = math.radians(-para_deg)
	inv_c = math.cos(rad_inv)
	inv_s = math.sin(rad_inv)

	raw_texts = [_tp_extract_item_text(it) for it in items]
	mapped = list(raw_texts)
	p_text = p.get("text") if isinstance(p.get("text"), str) else ""
	non_empty = sum(
	1 for t in raw_texts if isinstance(t, str) and t.strip())
	any_nl = any(isinstance(t, str) and re.search(r"\r?\n", t)
	for t in raw_texts)
	first_nl = bool(raw_texts and isinstance(
	raw_texts[0], str) and re.search(r"\r?\n", raw_texts[0]))
	lines = None
	if p_text and re.search(r"\r?\n", p_text) and (non_empty <= 1 or any_nl):
	lines = [s.rstrip()
	for s in re.split(r"\r?\n+", p_text) if s.strip()]
	elif first_nl and (non_empty <= 1 or all(not (t or "").strip() for t in raw_texts[1:])):
	lines = [s.rstrip() for s in re.split(
	r"\r?\n+", raw_texts[0]) if s.strip()]
	if lines:
	mapped = [lines[i] if i < len(lines) else (
	raw_texts[i] if i < len(raw_texts) else "") for i in range(len(items))]

	for ii, it in enumerate(items):
	if not isinstance(it, dict):
	continue
	text = (mapped[ii] if ii < len(mapped) else "") or ""
	if not text.strip():
	continue

	ibx = _tp_get_rect(it, base_w, base_h)
	if not ibx:
	continue

	w0 = max(0.0, ibx["r"] - ibx["l"])
	h0 = max(0.0, ibx["b"] - ibx["t"])
	if w0 <= 0 or h0 <= 0:
	continue

	w = float(w0 + (2.0 * eps)) if eps > 0.0 else float(w0)
	h = float(h0 + (2.0 * eps)) if eps > 0.0 else float(h0)

	item_idx = int(_tp_num(it.get("item_index")) or ii)

	fs_raw = _tp_num(it.get("font_size_px"))

	fs = int(round(fs_raw)) if fs_raw and fs_raw > 0 else max(
	10, int(round(h0 * 0.85)))
	fs = max(6, min(fs, max(6, int(math.floor(h0 * 0.95)))))
	lh = max(1, min(int(round(h0)), int(round(fs * 1.12))))
	if inv_c is not None and inv_s is not None:
	icx = (ibx["l"] + ibx["r"]) / 2.0
	icy = (ibx["t"] + ibx["b"]) / 2.0
	dx = icx - para_cx
	dy = icy - para_cy
	rcx = para_cx + (dx * inv_c - dy * inv_s)
	rcy = para_cy + (dx * inv_s + dy * inv_c)
	left = (rcx - (w / 2.0)) - pbx["l"]
	top = (rcy - (h / 2.0)) - pbx["t"]
	else:
	left = (ibx["l"] - pbx["l"]) - eps
	top = (ibx["t"] - pbx["t"]) - eps

	style = (
	f'left: {left:.6f}px; '
	f'top: {top:.6f}px; '
	f'width: {w:.6f}px; '
	f'height: {h:.6f}px; '
	f'font-size: {fs}px; '
	f'line-height: {lh}px; '
	'padding-bottom: 0px;'
	)
	deg = float(ibx.get("deg") or 0.0)
	if inv_c is not None:
	deg = deg - para_deg
	if abs(deg) > 0.01:
	style += f' transform: rotate({deg:.6g}deg); transform-origin: center center;'

	wrap_attr = ' data-wrap="1"' if it.get("_tp_wrap") else ""
	parts.append(
	f'<div class="tp-item tp-item-{item_idx}" data-para-index="{para_idx}" data-item-index="{item_idx}"{wrap_attr} style="{style}">'
	f'<span>{_tp_escape_text(text)}</span></div>'
	)

	parts.append("</div>")
	parts.append("</div>")
	return "".join(parts)

	def overlay_css(container_class="RTMDre", token_class="IwqbBf"):
	c = container_class
	t = token_class
	return (
	f".{c}{{"
	"position:absolute!important;"
	"inset:0!important;"
	"width:100%!important;"
	"height:100%!important;"
	"display:block!important;"
	"opacity:1!important;"
	"visibility:visible!important;"
	"pointer-events:none!important;"
	"overflow:visible!important;"
	"z-index:2147483647!important;"
	"transform:none!important;"
	"contain:layout style paint!important;"
	"--lens-text-color:#fff;"
	"--lens-font-family:\"Noto Sans Thai\",\"Noto Sans Thai UI\",\"Noto Sans\",system-ui,-apple-system,BlinkMacSystemFont,\"Segoe UI\",Roboto,Arial,sans-serif;"
	"--lens-text-shadow:0 1px 2px rgba(0,0,0,.85),0 0 1px rgba(0,0,0,.85);"
	"}}"
	f".{c} *{{box-sizing:border-box!important;}}"
	f".{c} .{t}{{"
	"position:absolute!important;"
	"display:flex!important;"
	"align-items:center!important;"
	"justify-content:center!important;"
	"opacity:1!important;"
	"visibility:visible!important;"
	"pointer-events:none!important;"
	"user-select:none!important;"
	"overflow:visible!important;"
	"white-space:pre!important;"
	"transform-origin:top left!important;"
	"filter:none!important;"
	"mix-blend-mode:normal!important;"
	"text-transform:none!important;"
	"letter-spacing:normal!important;"
	"}}"
	f".{c} .{t}::before{{"
	"content:attr(aria-label)!important;"
	"display:block!important;"
	"white-space:pre!important;"
	"color:var(--lens-text-color)!important;"
	"font-family:var(--lens-font-family)!important;"
	"text-shadow:var(--lens-text-shadow)!important;"
	"font-weight:400!important;"
	"font-style:normal!important;"
	"line-height:inherit!important;"
	"text-rendering:geometricPrecision!important;"
	"}}"
	)

	def ensure_font(path, urls):
	key = str(path or "")
	cached = _FONT_RESOLVE_CACHE.get(key)
	if cached is not None:
	return cached or None

	if path and os.path.isfile(path):
	_FONT_RESOLVE_CACHE[key] = path
	return path

	candidates = []
	for root in ("/usr/share/fonts", "/usr/local/share/fonts", os.path.expanduser("~/.fonts")):
	if os.path.isdir(root):
	for p in os.walk(root):
	for fn in p[2]:
	if fn.lower() == os.path.basename(path).lower():
	candidates.append(os.path.join(p[0], fn))
	if candidates:
	_FONT_RESOLVE_CACHE[key] = candidates[0]
	return candidates[0]

	for url in urls:
	try:
	r = httpx.get(url, timeout=30)
	if r.status_code == 200 and len(r.content) > 10000:
	with open(path, "wb") as f:
	f.write(r.content)
	if os.path.isfile(path):
	_FONT_RESOLVE_CACHE[key] = path
	return path
	except Exception:
	pass
	_FONT_RESOLVE_CACHE[key] = ""
	return None

	def pick_font(text, thai_path, latin_path, size):
	def has_thai(s):
	for ch in s:
	o = ord(ch)
	if 0x0E00 <= o <= 0x0E7F:
	return True
	return False

	fp = thai_path if has_thai(text) else latin_path
	if fp and os.path.isfile(fp):
	try:
	return ImageFont.truetype(fp, size=size, layout_engine=getattr(ImageFont, "LAYOUT_RAQM", 0))
	except Exception:
	try:
	return ImageFont.truetype(fp, size=size)
	except Exception:
	pass
	return ImageFont.load_default()

	def _get_font_pair(thai_path, latin_path, size):
	key = (str(thai_path or ""), str(latin_path or ""), int(size))
	v = _FONT_PAIR_CACHE.get(key)
	if v:
	return v
	f_th = pick_font("ก", thai_path, latin_path, size)
	f_lat = pick_font("A", thai_path, latin_path, size)
	_FONT_PAIR_CACHE[key] = (f_th, f_lat)
	return f_th, f_lat

	def _is_thai_char(ch: str) -> bool:
	if not ch:
	return False
	o = ord(ch)
	return 0x0E00 <= o <= 0x0E7F

	def _split_runs_for_fallback(text: str):
	runs = []
	cur = []
	cur_is_th = None
	for ch in text:
	if ch == "\n":
	if cur:
	runs.append(("".join(cur), cur_is_th))
	cur = []
	runs.append(("\n", None))
	cur_is_th = None
	continue
	is_th = _is_thai_char(ch)
	if ch.isspace() and cur_is_th is not None:
	is_th = cur_is_th
	if cur_is_th is None:
	cur_is_th = is_th
	cur = [ch]
	continue
	if is_th == cur_is_th:
	cur.append(ch)
	else:
	runs.append(("".join(cur), cur_is_th))
	cur = [ch]
	cur_is_th = is_th
	if cur:
	runs.append(("".join(cur), cur_is_th))
	return runs

	def _draw_text_centered_fallback(draw_ctx, center_xy, text, thai_path, latin_path, size, fill):
	t = _sanitize_draw_text(text)
	if not t:
	return
	f_th, f_lat = _get_font_pair(thai_path, latin_path, size)
	runs = _split_runs_for_fallback(t)

	x = 0.0
	min_t = 0.0
	max_b = 0.0
	for run, is_th in runs:
	if run == "\n":
	continue
	f = f_th if is_th else f_lat
	try:
	bb = draw_ctx.textbbox((x, 0), run, font=f, anchor="ls")
	min_t = min(min_t, float(bb[1]))
	max_b = max(max_b, float(bb[3]))
	x = float(bb[2])
	except Exception:
	try:
	w, h = draw_ctx.textsize(run, font=f)
	except Exception:
	w, h = (len(run) * size * 0.5, size)
	min_t = min(min_t, -float(h) * 0.8)
	max_b = max(max_b, float(h) * 0.2)
	x += float(w)

	total_w = max(1.0, x)
	total_h = max(1.0, max_b - min_t)

	cx, cy = center_xy
	start_x = float(cx) - (total_w / 2.0)
	baseline_y = float(cy) - (total_h / 2.0) - min_t

	x = start_x
	for run, is_th in runs:
	if run == "\n":
	continue
	f = f_th if is_th else f_lat
	draw_ctx.text((x, baseline_y), run, font=f, fill=fill, anchor="ls")
	try:
	x += float(draw_ctx.textlength(run, font=f))
	except Exception:
	try:
	w, _ = draw_ctx.textsize(run, font=f)
	except Exception:
	w = len(run) * size * 0.5
	x += float(w)

	def _draw_text_baseline_fallback(draw, pos, text, thai_path, latin_path, size, fill):
	t = _sanitize_draw_text(text)
	if not t:
	return 0.0, 0.0
	f_th, f_lat = _get_font_pair(thai_path, latin_path, size)
	runs = _split_runs_for_fallback(t)

	x0, y0 = pos
	x = float(x0)
	max_ascent = 0
	max_descent = 0

	for run, is_th in runs:
	if run == "\n":
	continue
	f = f_th if is_th else f_lat
	try:
	ascent, descent = f.getmetrics()
	except Exception:
	ascent, descent = size, int(size * 0.25)
	max_ascent = max(max_ascent, ascent)
	max_descent = max(max_descent, descent)

	draw.text((x, y0), run, font=f, fill=fill, anchor="ls")
	try:
	adv = float(f.getlength(run))
	except Exception:
	tmp = Image.new("RGBA", (10, 10), (0, 0, 0, 0))
	dtmp = ImageDraw.Draw(tmp)
	try:
	bb = dtmp.textbbox((0, 0), run, font=f, anchor="ls")
	adv = float(bb[2] - bb[0])
	except Exception:
	w, _ = dtmp.textsize(run, font=f)
	adv = float(w)
	x += adv

	return float(x - x0), float(max_ascent + max_descent)

	def _baseline_offset_px_for_text(text: str, thai_path: str, latin_path: str, size: int):
	t = _sanitize_draw_text(text)
	if not t:
	return None
	f_th, f_lat = _get_font_pair(thai_path, latin_path, size)
	runs = _split_runs_for_fallback(t)

	tmp = Image.new("RGBA", (16, 16), (0, 0, 0, 0))
	dtmp = ImageDraw.Draw(tmp)

	x = 0.0
	min_t = 0.0
	max_b = 0.0
	for run, is_th in runs:
	if run == "\n":
	continue
	f = f_th if is_th else f_lat
	try:
	bb = dtmp.textbbox((x, 0), run, font=f, anchor="ls")
	min_t = min(min_t, float(bb[1]))
	max_b = max(max_b, float(bb[3]))
	x = float(bb[2])
	except Exception:
	try:
	w, h = dtmp.textsize(run, font=f)
	except Exception:
	w, h = (len(run) * size * 0.5, size)
	min_t = min(min_t, -float(h) * 0.8)
	max_b = max(max_b, float(h) * 0.2)
	x += float(w)

	total_h = max(1.0, max_b - min_t)
	baseline_offset = -(total_h / 2.0) - min_t
	return baseline_offset, total_h

	def _line_metrics_px(text: str, thai_path: str, latin_path: str, size: int):
	t = _sanitize_draw_text(text)
	if not t:
	return None
	f_th, f_lat = _get_font_pair(thai_path, latin_path, size)
	runs = _split_runs_for_fallback(t)

	tmp = Image.new("RGBA", (16, 16), (0, 0, 0, 0))
	dtmp = ImageDraw.Draw(tmp)

	x = 0.0
	min_t = 0.0
	max_b = 0.0
	for run, is_th in runs:
	if run == "\n":
	continue
	f = f_th if is_th else f_lat
	try:
	bb = dtmp.textbbox((x, 0), run, font=f, anchor="ls")
	min_t = min(min_t, float(bb[1]))
	max_b = max(max_b, float(bb[3]))
	x = float(bb[2])
	except Exception:
	try:
	w, h = dtmp.textsize(run, font=f)
	except Exception:
	w, h = (len(run) * size * 0.5, size)
	min_t = min(min_t, -float(h) * 0.8)
	max_b = max(max_b, float(h) * 0.2)
	x += float(w)

	width = max(1.0, x)
	total_h = max(1.0, max_b - min_t)
	baseline_to_center = -((min_t + max_b) / 2.0)
	return width, total_h, baseline_to_center

	def _item_avail_w_px(item: dict, W: int, H: int) -> float:
	b = item.get("box") or {}
	w_box = float(b.get("width") or 0.0) * float(W)

	L = 0.0
	p1 = item.get("baseline_p1") or {}
	p2 = item.get("baseline_p2") or {}
	if ("x" in p1 and "y" in p1 and "x" in p2 and "y" in p2):
	dx = (float(p2.get("x") or 0.0) - float(p1.get("x") or 0.0)) * float(W)
	dy = (float(p2.get("y") or 0.0) - float(p1.get("y") or 0.0)) * float(H)
	L = float(math.hypot(dx, dy))

	avail = max(w_box, L)
	return max(1.0, float(avail))

	def _item_avail_h_px(item: dict, H: int) -> float:
	b = item.get("box") or {}
	return max(1.0, (float(b.get("height") or 0.0) * float(H)) - 2.0)

	def _item_line_text(item: dict) -> str:
	t = str(item.get("text") or "")
	if t.strip():
	return t
	spans = item.get("spans") or []
	return "".join(str(s.get("text") or "") for s in spans)

	def _compute_fit_size_px_for_item(item: dict, thai_path: str, latin_path: str, W: int, H: int, base_size: int = 96) -> int \| None:
	item.pop("_tp_wrap", None)
	text = _item_line_text(item)
	if not text.strip():
	return None
	m = _line_metrics_px(text, thai_path, latin_path, base_size)
	if m is None:
	return None
	tw, th, _ = m
	avail_w = _item_avail_w_px(item, W, H)
	avail_h = _item_avail_h_px(item, H)
	if tw <= 1e-6 or th <= 1e-6:
	return None

	is_thai = any(_is_thai_char(ch) for ch in text)
	scale_w = (avail_w * 0.98) / tw
	scale_h = (avail_h * (0.90 if is_thai else 0.94)) / th
	scale = min(scale_w, scale_h)
	if scale <= 0:
	return None

	size = max(10, int(base_size * scale))

	while size > 10:
	mm = _line_metrics_px(text, thai_path, latin_path, size)
	if mm is None:
	return None
	tw2, th2, _ = mm
	if (tw2 <= avail_w * 0.999) and (th2 <= avail_h * 0.999):
	break
	size -= 1

	if size <= 12 and avail_h >= 24:
	tw0, th0, _ = m
	if tw0 > (avail_w * 1.2):
	def _wrap_fits(s: int) -> bool:
	if s <= 0:
	return False
	k = float(s) / float(base_size)
	tw = float(tw0) * k
	th = float(th0) * k
	lines = int(math.ceil(max(1.0, tw) / max(1.0, avail_w)))
	return (float(lines) * th) <= float(avail_h)

	hi = int(min(max(16, avail_h), base_size * 3))
	lo = int(size)
	best = int(size)
	while lo <= hi:
	mid = (lo + hi) // 2
	if _wrap_fits(mid):
	best = int(mid)
	lo = mid + 1
	else:
	hi = mid - 1

	if best >= int(size * 1.25):
	item["_tp_wrap"] = True
	size = int(best)

	return int(size)

	def fit_tree_font_sizes_for_tp_html(tree: dict, thai_path: str, latin_path: str, W: int, H: int) -> dict:
	paras = tree.get("paragraphs") or []
	for p in paras:
	items = p.get("items") or []
	if not items:
	continue

	per_item_fit: dict[int, int] = {}
	fits: list[int] = []

	for i, it in enumerate(items):
	s = _compute_fit_size_px_for_item(it, thai_path, latin_path, W, H)
	if s is None:
	continue
	per_item_fit[i] = int(s)
	fits.append(int(s))

	if not fits:
	continue

	fits.sort()
	p["para_font_size_px"] = int(fits[len(fits) // 2])

	for i, it in enumerate(items):
	fs = per_item_fit.get(i)
	if fs is None:
	continue
	it["font_size_px"] = int(fs)
	for sp in (it.get("spans") or []):
	sp["font_size_px"] = int(fs)

	return tree

	def _iter_paragraphs(tree: dict):
	ps = (tree or {}).get("paragraphs") or []
	for i, p in enumerate(ps):
	yield i, p

	def _apply_para_font_size(tree: dict, para_sizes: dict[int, int]):
	if not tree:
	return
	for pi, p in _iter_paragraphs(tree):
	sz = para_sizes.get(pi)
	if not sz:
	continue
	p["para_font_size_px"] = int(sz)
	for it in (p.get("items") or []):
	it["font_size_px"] = int(sz)
	for sp in (it.get("spans") or []):
	sp["font_size_px"] = int(sz)

	def _compute_shared_para_sizes(trees: list[dict], thai_path: str, latin_path: str, W: int, H: int) -> dict[int, int]:
	sizes: dict[int, int] = {}
	for tree in trees:
	if not tree:
	continue
	for pi, p in _iter_paragraphs(tree):
	for it in (p.get("items") or []):
	fit = _compute_fit_size_px_for_item(
	it, thai_path, latin_path, W, H)
	if fit is None:
	continue
	cur = sizes.get(pi)
	sizes[pi] = fit if cur is None else min(cur, fit)

	vals = [v for v in sizes.values() if isinstance(v, int) and v > 0]
	if not vals:
	return sizes
	vals.sort()
	mid = len(vals) // 2
	target = vals[mid] if (len(vals) % 2 == 1) else int(
	round((vals[mid - 1] + vals[mid]) / 2))
	for k in list(sizes.keys()):
	try:
	sizes[k] = int(min(int(sizes[k]), int(target)))
	except Exception:
	pass
	return sizes

	def _sanitize_draw_text(s: str) -> str:
	t = (s or "").replace("\r\n", "\n").replace("\r", "\n")
	t = t.replace("\u200b", "").replace("\ufeff", "")
	t = "".join(ch for ch in t if (ch == "\n") or (
	unicodedata.category(ch)[0] != "C"))
	return t

	def _token_box_px(t, W, H, pad_px=0):
	b = t.get("box") or {}
	left = int(round(float(b.get("left", 0.0)) * W)) - pad_px
	top = int(round(float(b.get("top", 0.0)) * H)) - pad_px
	right = int(round((float(b.get("left", 0.0)) +
	float(b.get("width", 0.0))) * W)) + pad_px
	bottom = int(
	round((float(b.get("top", 0.0)) + float(b.get("height", 0.0))) * H)) + pad_px
	left = max(0, min(W, left))
	top = max(0, min(H, top))
	right = max(0, min(W, right))
	bottom = max(0, min(H, bottom))
	if right <= left or bottom <= top:
	return None
	return left, top, right, bottom

	def _token_quad_px(t, W, H, pad_px=0, apply_baseline_shift=True):
	if not t.get("valid_text"):
	return None

	p1 = t.get("baseline_p1") or {}
	p2 = t.get("baseline_p2") or {}
	x1 = float(p1.get("x", 0.0)) * W
	y1 = float(p1.get("y", 0.0)) * H
	x2 = float(p2.get("x", 0.0)) * W
	y2 = float(p2.get("y", 0.0)) * H

	dx = x2 - x1
	dy = y2 - y1
	if dx < 0 or (abs(dx) < 1e-12 and dy < 0):
	x1, y1, x2, y2 = x2, y2, x1, y1
	dx = x2 - x1
	dy = y2 - y1

	L = math.hypot(dx, dy)
	if L <= 1e-9:
	return None

	ux = dx / L
	uy = dy / L

	nx = -uy
	ny = ux
	if ny < 0:
	nx, ny = -nx, -ny

	t0 = float(t.get("t0_raw") if t.get("t0_raw") is not None else 0.0)
	t1 = float(t.get("t1_raw") if t.get("t1_raw") is not None else 1.0)

	sx = x1 + ux * (t0 * L)
	sy = y1 + uy * (t0 * L)
	ex = x1 + ux * (t1 * L)
	ey = y1 + uy * (t1 * L)

	h = max(1.0, float(t.get("height_raw") or 0.0) * H)
	if apply_baseline_shift and BASELINE_SHIFT:
	shift = h * BASELINE_SHIFT_FACTOR
	sx += nx * shift
	sy += ny * shift
	ex += nx * shift
	ey += ny * shift

	pad = max(0.0, float(pad_px))
	sx -= ux * pad
	sy -= uy * pad
	ex += ux * pad
	ey += uy * pad

	hh = (h / 2.0) + pad
	ox = nx * hh
	oy = ny * hh

	return [(sx - ox, sy - oy), (ex - ox, ey - oy), (ex + ox, ey + oy), (sx + ox, sy + oy)]

	def _token_box_quad_px(t, W, H, pad_px=0):
	b = t.get("box") or {}
	w = float(b.get("width", 0.0)) * W
	h = float(b.get("height", 0.0)) * H
	if w <= 0.0 or h <= 0.0:
	return None

	left = float(b.get("left", 0.0)) * W
	top = float(b.get("top", 0.0)) * H
	cx = left + (w / 2.0)
	cy = top + (h / 2.0)

	hw = (w / 2.0) + float(pad_px)
	hh = (h / 2.0) + float(pad_px)

	angle_deg = float(b.get("rotation_deg", 0.0))
	rad = math.radians(angle_deg)
	c = math.cos(rad)
	s = math.sin(rad)

	corners = [(-hw, -hh), (hw, -hh), (hw, hh), (-hw, hh)]
	out = []
	for x, y in corners:
	rx = (x * c) - (y * s)
	ry = (x * s) + (y * c)
	out.append((cx + rx, cy + ry))
	return out

	def _quad_bbox(quad, W, H):
	xs = [p[0] for p in quad]
	ys = [p[1] for p in quad]
	l = max(0, min(W, int(math.floor(min(xs)))))
	t = max(0, min(H, int(math.floor(min(ys)))))
	r = max(0, min(W, int(math.ceil(max(xs)))))
	b = max(0, min(H, int(math.ceil(max(ys)))))
	if r <= l or b <= t:
	return None
	return l, t, r, b

	def _median_rgba(pixels):
	if not pixels:
	return None
	rs = sorted(p[0] for p in pixels)
	gs = sorted(p[1] for p in pixels)
	bs = sorted(p[2] for p in pixels)
	a = 255
	mid = len(rs) // 2
	return (rs[mid], gs[mid], bs[mid], a)

	def _rel_luminance(rgb):
	r, g, b = rgb

	def lin(c):
	c = c / 255.0
	return c / 12.92 if c <= 0.04045 else ((c + 0.055) / 1.055) ** 2.4
	return 0.2126 * lin(r) + 0.7152 * lin(g) + 0.0722 * lin(b)

	def _contrast_ratio(l1, l2):
	a = max(l1, l2) + 0.05
	b = min(l1, l2) + 0.05
	return a / b

	def _pick_bw_text_color(bg_rgb):
	Lb = _rel_luminance(bg_rgb)
	c_black = _contrast_ratio(Lb, 0.0)
	c_white = _contrast_ratio(Lb, 1.0)
	return TEXT_COLOR_LIGHT if c_white >= c_black else TEXT_COLOR_DARK

	def _sample_bg_color_from_quad(base_rgb, quad, rect, border_px=3, margin_px=6):
	l, t, r, b = rect
	w = r - l
	h = b - t
	if w <= 0 or h <= 0:
	return _sample_bg_color(base_rgb, rect, margin_px)
	mask = Image.new("L", (w, h), 0)
	d = ImageDraw.Draw(mask)
	qrel = [(x - l, y - t) for x, y in quad]
	d.polygon(qrel, fill=255)
	bp = int(max(0, border_px or 0))
	if bp > 0:
	k = min(w, h)
	bp = min(bp, max(1, (k - 1) // 2))
	if bp > 0:
	er = mask.filter(ImageFilter.MinFilter(size=bp * 2 + 1))
	border = ImageChops.subtract(mask, er)
	else:
	border = mask
	region = base_rgb.crop((l, t, r, b))
	rp = list(region.getdata())
	mp = list(border.getdata())
	samples = [p for p, m in zip(rp, mp) if m > 0]
	if len(samples) < 24:
	ext = _sample_bg_color(base_rgb, rect, margin_px)
	return ext
	med = _median_rgba(samples)
	if med:
	return med[:3]
	return _sample_bg_color(base_rgb, rect, margin_px)

	def _sample_bg_color(base_rgb, rect, margin_px):
	W, H = base_rgb.size
	l, t, r, b = rect
	m = max(1, int(margin_px))
	samples = []

	def add_strip(x0, y0, x1, y1):
	x0 = max(0, min(W, x0))
	y0 = max(0, min(H, y0))
	x1 = max(0, min(W, x1))
	y1 = max(0, min(H, y1))
	if x1 <= x0 or y1 <= y0:
	return
	samples.extend(list(base_rgb.crop((x0, y0, x1, y1)).getdata()))
	add_strip(l, t - m, r, t)
	add_strip(l, b, r, b + m)
	add_strip(l - m, t, l, b)
	add_strip(r, t, r + m, b)
	med = _median_rgba(samples)
	if med:
	return med[:3]
	return base_rgb.getpixel((max(0, min(W - 1, l)), max(0, min(H - 1, t))))

	def _sample_bg_color_from_quad_ring(base_rgb, quad, rect, ring_px=4):
	W, H = base_rgb.size
	l, t, r, b = rect
	w = r - l
	h = b - t
	if w <= 0 or h <= 0:
	return None

	mask = np.zeros((h, w), dtype=np.uint8)
	pts = np.array([[(x - l, y - t) for x, y in quad]], dtype=np.int32)
	cv2.fillPoly(mask, pts, 255)

	rp = int(max(1, ring_px or 1))
	k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (rp * 2 + 1, rp * 2 + 1))
	dil = cv2.dilate(mask, k, iterations=1)
	ring = cv2.bitwise_and(dil, cv2.bitwise_not(mask))

	rgb = np.array(base_rgb.crop((l, t, r, b)).convert("RGB"), dtype=np.uint8)
	sel = rgb[ring > 0]
	if sel.size < 24:
	return None
	med = np.median(sel, axis=0)
	return (int(med[0]), int(med[1]), int(med[2]))

	def _pixelate(img, block_px):
	w, h = img.size
	if w <= 1 or h <= 1:
	return img
	block_px = int(block_px or 1)
	if block_px < 1:
	block_px = 1
	sw = max(1, w // block_px)
	sh = max(1, h // block_px)
	return img.resize((sw, sh), resample=Image.NEAREST).resize((w, h), resample=Image.NEAREST)

	def _mean_abs_diff(a, b):
	if a.size != b.size:
	return 1e18
	a = a.convert("RGB")
	b = b.convert("RGB")
	da = list(a.getdata())
	db = list(b.getdata())
	if not da:
	return 1e18
	s = 0
	for (ar, ag, ab), (br, bg, bb) in zip(da, db):
	s += abs(ar - br) + abs(ag - bg) + abs(ab - bb)
	return s / (len(da) * 3)

	def _resize_small(img, max_w=64, max_h=64):
	w, h = img.size
	if w <= 0 or h <= 0:
	return img
	scale = min(max_w / w, max_h / h, 1.0)
	nw = max(1, int(w * scale))
	nh = max(1, int(h * scale))
	return img.resize((nw, nh), resample=Image.BILINEAR)

	def _clone_candidate_score(base, rect, cand_rect, direction, border_px):
	W, H = base.size
	l, t, r, b = rect
	cl, ct, cr, cb = cand_rect
	w = r - l
	h = b - t
	if w <= 1 or h <= 1:
	return 1e18
	border_px = max(1, int(border_px or 1))
	if direction == "up":
	a = base.crop((l, max(0, t - border_px), r, t))
	d = base.crop((cl, max(0, cb - border_px), cr, cb))
	elif direction == "down":
	a = base.crop((l, b, r, min(H, b + border_px)))
	d = base.crop((cl, ct, cr, min(H, ct + border_px)))
	elif direction == "left":
	a = base.crop((max(0, l - border_px), t, l, b))
	d = base.crop((max(0, cr - border_px), ct, cr, cb))
	else:
	a = base.crop((r, t, min(W, r + border_px), b))
	d = base.crop((cl, ct, min(W, cl + border_px), cb))
	a = _resize_small(a, 64, 16)
	d = _resize_small(d, 64, 16)
	return _mean_abs_diff(a, d)

	def _choose_clone_rect(base, rect, gap_px, border_px):
	W, H = base.size
	l, t, r, b = rect
	w = r - l
	h = b - t
	gap_px = max(0, int(gap_px or 0))
	cands = []
	up = (l, t - gap_px - h, r, t - gap_px)
	down = (l, b + gap_px, r, b + gap_px + h)
	left = (l - gap_px - w, t, l - gap_px, b)
	right = (r + gap_px, t, r + gap_px + w, b)
	for direction, (cl, ct, cr, cb) in [("up", up), ("down", down), ("left", left), ("right", right)]:
	if cl < 0 or ct < 0 or cr > W or cb > H:
	continue
	cand_rect = (cl, ct, cr, cb)
	score = _clone_candidate_score(
	base, rect, cand_rect, direction, border_px)
	cands.append((score, cand_rect))
	if not cands:
	return None
	cands.sort(key=lambda x: x[0])
	return cands[0][1]

	def _erase_with_clone(base, rect, mask, gap_px, border_px, feather_px):
	l, t, r, b = rect
	cand = _choose_clone_rect(base, rect, gap_px, border_px)
	if not cand:
	return False
	cl, ct, cr, cb = cand
	donor = base.crop((cl, ct, cr, cb))
	region = base.crop((l, t, r, b))
	feather_px = max(0, int(feather_px or 0))
	if feather_px > 0:
	m = mask.filter(ImageFilter.GaussianBlur(radius=feather_px))
	else:
	m = mask
	merged = Image.composite(donor, region, m)
	base.paste(merged, (l, t))
	return True

	def _erase_with_blend_patches(base, rect, mask, gap_px=3, feather_px=4):
	l, t, r, b = rect
	W, H = base.size
	w = r - l
	h = b - t
	if w <= 2 or h <= 2:
	return False
	gap = int(max(0, gap_px))
	candidates = []
	dirs = [(0, -(h + gap)), (0, (h + gap)), (-(w + gap), 0), ((w + gap), 0),
	(-(w + gap), -(h + gap)), ((w + gap), -(h + gap)), (-(w + gap), (h + gap)), ((w + gap), (h + gap))]
	for dx, dy in dirs:
	ll = l + dx
	tt = t + dy
	rr = ll + w
	bb = tt + h
	if ll < 0 or tt < 0 or rr > W or bb > H:
	continue
	candidates.append(base.crop((ll, tt, rr, bb)).convert("RGB"))
	if not candidates:
	return False
	acc = candidates[0]
	for c in candidates[1:]:
	acc = ImageChops.add(acc, c, scale=1.0, offset=0)
	n = len(candidates)
	blended = acc.point(lambda p: int(p / n))
	m = mask
	fp = int(max(0, feather_px))
	if fp > 0:
	m = m.filter(ImageFilter.GaussianBlur(radius=fp))
	region = base.crop((l, t, r, b)).convert("RGB")
	merged = Image.composite(blended, region, m)
	base.paste(merged, (l, t))
	return True

	def _erase_with_inpaint(base, box_tokens, pad_px=2):
	if not box_tokens:
	return base

	rgb = base.convert("RGB")
	W, H = rgb.size
	mask = Image.new("L", (W, H), 0)
	d = ImageDraw.Draw(mask)
	for t in box_tokens:
	quad = _token_box_quad_px(t, W, H, pad_px=pad_px)
	if not quad:
	quad = _token_quad_px(t, W, H, pad_px=pad_px,
	apply_baseline_shift=True)
	if not quad:
	rect = _token_box_px(t, W, H, pad_px=pad_px)
	if not rect:
	continue
	l, tt, r, bb = rect
	quad = [(l, tt), (r, tt), (r, bb), (l, bb)]
	d.polygon(quad, fill=255)

	m = np.array(mask, dtype=np.uint8)
	ys, xs = np.where(m > 0)
	if xs.size == 0 or ys.size == 0:
	return rgb

	l = int(max(0, xs.min() - 8))
	t = int(max(0, ys.min() - 8))
	r = int(min(W, xs.max() + 1 + 8))
	b = int(min(H, ys.max() + 1 + 8))
	if r <= l or b <= t:
	return rgb

	crop_rgb = np.array(rgb.crop((l, t, r, b)), dtype=np.uint8)
	crop_m = m[t:b, l:r]
	dpx = int(max(0, INPAINT_DILATE_PX or 0))
	if dpx > 0:
	k = cv2.getStructuringElement(
	cv2.MORPH_ELLIPSE, (dpx * 2 + 1, dpx * 2 + 1))
	crop_m = cv2.dilate(crop_m, k, iterations=1)

	bgr = cv2.cvtColor(crop_rgb, cv2.COLOR_RGB2BGR)
	method = (INPAINT_METHOD or "telea").strip().lower()
	flag = cv2.INPAINT_TELEA if method in ("telea", "t") else cv2.INPAINT_NS
	radius = float(INPAINT_RADIUS or 3)
	out_bgr = cv2.inpaint(bgr, crop_m, radius, flag)
	out_rgb = cv2.cvtColor(out_bgr, cv2.COLOR_BGR2RGB)

	out = rgb.copy()
	out.paste(Image.fromarray(out_rgb), (l, t))
	return out

	def erase_text_with_boxes(img, box_tokens, pad_px=2, sample_margin_px=6, mode=None, mosaic_block_px=None):
	if not box_tokens:
	return img
	mode = (mode or ERASE_MODE or "solid").strip().lower()
	mosaic_block_px = int(mosaic_block_px or ERASE_MOSAIC_BLOCK_PX or 10)
	base = img.convert("RGB").copy()
	if mode in ("inpaint", "cv2", "opencv"):
	return _erase_with_inpaint(base, box_tokens, pad_px=pad_px)
	W, H = base.size
	for t in box_tokens:
	quad = _token_box_quad_px(t, W, H, pad_px=pad_px)
	if not quad:
	quad = _token_quad_px(t, W, H, pad_px=pad_px,
	apply_baseline_shift=True)
	if not quad:
	rect = _token_box_px(t, W, H, pad_px=pad_px)
	if not rect:
	continue
	l, tt, r, bb = rect
	quad = [(l, tt), (r, tt), (r, bb), (l, bb)]

	rect = _quad_bbox(quad, W, H)
	if not rect:
	continue

	l, tt, r, bb = rect
	region = base.crop((l, tt, r, bb))
	mask = Image.new("L", (r - l, bb - tt), 0)
	mdraw = ImageDraw.Draw(mask)
	qrel = [(x - l, y - tt) for x, y in quad]
	mdraw.polygon(qrel, fill=255)

	if mode in ("blend_patch", "blend", "avg_patch", "patch"):
	ok = _erase_with_blend_patches(
	base, rect, mask, ERASE_BLEND_GAP_PX, ERASE_BLEND_FEATHER_PX)
	if ok:
	continue
	mode = "solid"

	if mode == "clone":
	ok = _erase_with_clone(
	base, rect, mask, ERASE_CLONE_GAP_PX, ERASE_CLONE_BORDER_PX, ERASE_CLONE_FEATHER_PX)
	if ok:
	continue
	mode = "solid"

	if mode == "mosaic":
	pixelated = _pixelate(region, mosaic_block_px)
	merged = Image.composite(pixelated, region, mask)
	base.paste(merged, (l, tt))
	else:
	color = _sample_bg_color_from_quad(
	base, quad, rect, BG_SAMPLE_BORDER_PX, sample_margin_px)
	region.paste(color, mask=mask)
	base.paste(region, (l, tt))
	return base

	def draw_overlay(img, tokens, out_path, thai_path, latin_path, level_outlines=None, font_scale: float = 1.0, fit_to_box: bool = True):
	base = img.convert("RGBA")
	base_rgb = img.convert("RGB")
	overlay = Image.new("RGBA", base.size, (0, 0, 0, 0))
	draw = ImageDraw.Draw(overlay)

	for ol in (level_outlines or []):
	q = ol.get("quad")
	if not q:
	continue
	col = ol.get("color", BOX_OUTLINE)
	w = int(ol.get("width", 2))
	draw.line(q + [q[0]], fill=col, width=w)

	W, H = base.size

	for t in tokens:
	b = t.get("box") or {}
	box_quad = _token_box_quad_px(t, W, H, pad_px=0)
	use_box_center = False
	if box_quad:
	lq, tq, rq, bq = _quad_bbox(box_quad, W, H)
	box_cx = (lq + rq) / 2.0
	box_cy = (tq + bq) / 2.0
	box_w = max(1.0, float(rq - lq))
	box_h = max(1.0, float(bq - tq))
	use_box_center = True
	else:
	left0 = float(b.get("left", 0.0)) * W
	top0 = float(b.get("top", 0.0)) * H
	box_w = max(1.0, float(b.get("width", 0.0)) * W)
	box_h = max(1.0, float(b.get("height", 0.0)) * H)
	box_cx = left0 + (box_w / 2.0)
	box_cy = top0 + (box_h / 2.0)
	if DRAW_OUTLINE_SPAN and DRAW_BOX_OUTLINE:
	quad = _token_box_quad_px(t, W, H, pad_px=0)
	if quad:
	draw.line(quad + [quad[0]], fill=SPAN_OUTLINE,
	width=SPAN_OUTLINE_WIDTH)
	else:
	left = b["left"] * W
	top = b["top"] * H
	width = b["width"] * W
	height = b["height"] * H
	draw.rectangle([left, top, left + width, top + height],
	outline=SPAN_OUTLINE, width=SPAN_OUTLINE_WIDTH)

	text = _sanitize_draw_text(t.get("text") or "")
	if text.strip() == "":
	continue

	p1 = t["baseline_p1"]
	p2 = t["baseline_p2"]
	x1 = float(p1["x"]) * W
	y1 = float(p1["y"]) * H
	x2 = float(p2["x"]) * W
	y2 = float(p2["y"]) * H

	dx = x2 - x1
	dy = y2 - y1
	if dx < 0 or (abs(dx) < 1e-12 and dy < 0):
	x1, y1, x2, y2 = x2, y2, x1, y1
	dx = x2 - x1
	dy = y2 - y1

	L = math.hypot(dx, dy)
	if L <= 1e-9:
	continue

	ux = dx / L
	uy = dy / L

	t0 = float(t.get("t0_raw") if t.get("t0_raw") is not None else 0.0)
	t1 = float(t.get("t1_raw") if t.get("t1_raw") is not None else 1.0)

	sx = x1 + ux * (t0 * L)
	sy = y1 + uy * (t0 * L)
	ex = x1 + ux * (t1 * L)
	ey = y1 + uy * (t1 * L)

	avail_w = box_w
	avail_h = box_h

	if BASELINE_SHIFT and (not use_box_center):
	nx, ny = -uy, ux
	shift = avail_h * BASELINE_SHIFT_FACTOR
	sx += nx * shift
	sy += ny * shift

	angle_deg = float(b.get("rotation_deg", 0.0))

	forced_size = t.get("font_size_px")
	if forced_size is not None:
	final_size = int(
	max(10, round(float(forced_size) * float(font_scale))))
	font = pick_font(text, thai_path, latin_path, final_size)

	if fit_to_box:
	tmpc = Image.new("RGBA", (10, 10), (0, 0, 0, 0))
	dc = ImageDraw.Draw(tmpc)
	try:
	bbc = dc.textbbox((0, 0), text, font=font, anchor="ls")
	twc = float(bbc[2] - bbc[0])
	thc = float(bbc[3] - bbc[1])
	except Exception:
	twc, thc = dc.textsize(text, font=font)
	twc = float(twc)
	thc = float(thc)

	if twc > 0 and thc > 0 and (twc > avail_w or thc > avail_h):
	s = min(avail_w / twc, avail_h / thc)
	if s < 1.0:
	final_size = max(10, int(final_size * s))
	font = pick_font(
	text, thai_path, latin_path, final_size)
	else:
	base_size = 96
	font0 = pick_font(text, thai_path, latin_path, base_size)

	tmp = Image.new("RGBA", (10, 10), (0, 0, 0, 0))
	dtmp = ImageDraw.Draw(tmp)
	try:
	bb = dtmp.textbbox((0, 0), text, font=font0, anchor="ls")
	tw = bb[2] - bb[0]
	th = bb[3] - bb[1]
	except Exception:
	tw, th = dtmp.textsize(text, font=font0)

	if tw <= 0 or th <= 0:
	continue

	scale = min(avail_w / tw, avail_h / th)
	final_size = max(10, int(base_size * scale))
	if not fit_to_box:
	final_size = max(10, int(final_size * float(font_scale)))
	font = pick_font(text, thai_path, latin_path, final_size)

	tmp2 = Image.new("RGBA", (10, 10), (0, 0, 0, 0))
	d2 = ImageDraw.Draw(tmp2)
	try:
	bb2 = d2.textbbox((0, 0), text, font=font, anchor="ls")
	tw2 = bb2[2] - bb2[0]
	th2 = bb2[3] - bb2[1]
	except Exception:
	tw2, th2 = d2.textsize(text, font=font)

	side = int(max(tw2, th2, avail_h, avail_w) * 2.2 + 40)
	side = min(side, int(max(W, H) * 4))
	if side < 128:
	side = 128

	canvas = Image.new("RGBA", (side, side), (0, 0, 0, 0))
	dc = ImageDraw.Draw(canvas)

	fill = TEXT_COLOR
	if AUTO_TEXT_COLOR:
	q = _token_box_quad_px(t, W, H, pad_px=0)
	if q:
	rr = _quad_bbox(q, W, H)
	if rr:
	bg = _sample_bg_color_from_quad_ring(
	base_rgb, q, rr, ring_px=max(2, BG_SAMPLE_BORDER_PX))
	if bg is None:
	bg = _sample_bg_color_from_quad(
	base_rgb, q, rr, BG_SAMPLE_BORDER_PX, ERASE_SAMPLE_MARGIN_PX)
	fill = _pick_bw_text_color(bg)
	else:
	rr = _token_box_px(t, W, H, pad_px=0)
	if rr:
	bg = _sample_bg_color(base_rgb, rr, ERASE_SAMPLE_MARGIN_PX)
	fill = _pick_bw_text_color(bg)

	origin = (side // 2, side // 2)

	p1 = t.get("baseline_p1") or {}
	p2 = t.get("baseline_p2") or {}
	has_baseline = ("x" in p1 and "y" in p1 and "x" in p2 and "y" in p2)

	if has_baseline:
	x1 = float(p1.get("x") or 0.0) * float(W)
	y1 = float(p1.get("y") or 0.0) * float(H)
	x2 = float(p2.get("x") or 0.0) * float(W)
	y2 = float(p2.get("y") or 0.0) * float(H)
	dx = x2 - x1
	dy = y2 - y1
	Lb = float(math.hypot(dx, dy))
	if Lb <= 1e-6:
	Lb = 1.0
	ux = dx / Lb
	uy = dy / Lb
	nx = -uy
	ny = ux

	bb = t.get("box") or {}
	cx = (float(bb.get("left") or 0.0) +
	float(bb.get("width") or 0.0) / 2.0) * float(W)
	cy = (float(bb.get("top") or 0.0) +
	float(bb.get("height") or 0.0) / 2.0) * float(H)

	tt = _sanitize_draw_text(text)
	if not tt:
	continue
	font_m = pick_font(tt, thai_path, latin_path, final_size)
	try:
	tw = float(font_m.getlength(tt))
	except Exception:
	tmp = Image.new("RGBA", (10, 10), (0, 0, 0, 0))
	dtmp = ImageDraw.Draw(tmp)
	try:
	bbm = dtmp.textbbox((0, 0), tt, font=font_m, anchor="ls")
	tw = float(bbm[2] - bbm[0])
	except Exception:
	tw, _ = dtmp.textsize(tt, font=font_m)
	tw = float(tw)

	f_th, f_lat = _get_font_pair(thai_path, latin_path, final_size)
	try:
	a_th, d_th = f_th.getmetrics()
	except Exception:
	a_th, d_th = final_size, int(final_size * 0.25)
	try:
	a_lat, d_lat = f_lat.getmetrics()
	except Exception:
	a_lat, d_lat = final_size, int(final_size * 0.25)
	ascent = float(max(a_th, a_lat))
	descent = float(max(d_th, d_lat))
	center_y_rel = (-ascent + descent) / 2.0

	bx = cx - ux * (tw / 2.0) - nx * center_y_rel
	by = cy - uy * (tw / 2.0) - ny * center_y_rel

	angle_deg = float(math.degrees(math.atan2(dy, dx)))

	_draw_text_baseline_fallback(
	dc, origin, text, thai_path, latin_path, final_size, fill)
	rotated = canvas.rotate(-angle_deg, resample=Image.BICUBIC,
	expand=False, center=origin)
	paste_x = int(round(bx - origin[0]))
	paste_y = int(round(by - origin[1]))
	overlay.alpha_composite(rotated, dest=(paste_x, paste_y))
	else:
	_draw_text_centered_fallback(
	dc, origin, text, thai_path, latin_path, final_size, fill)
	rotated = canvas.rotate(-angle_deg, resample=Image.BICUBIC,
	expand=False, center=origin)
	paste_x = int(round(box_cx - origin[0]))
	paste_y = int(round(box_cy - origin[1]))
	overlay.alpha_composite(rotated, dest=(paste_x, paste_y))

	out = Image.alpha_composite(base, overlay).convert("RGB")
	out.save(out_path)

	def get_lens_data_from_image(image_path, firebase_url, lang):
	ck = _get_firebase_cookie(firebase_url)

	with open(image_path, "rb") as f:
	img_bytes = f.read()

	hdr = {"User-Agent": "Mozilla/5.0", "Referer": "https://lens.google.com/"}
	with httpx.Client(cookies=ck, headers=hdr, follow_redirects=False, timeout=60) as c:
	r = c.post(
	"https://lens.google.com/v3/upload",
	files={"encoded_image": ("file.jpg", img_bytes, "image/jpeg")},
	)
	if r.status_code not in (302, 303):
	raise Exception(f"Upload failed: {r.status_code}\n{r.text}")
	redirect = r.headers["location"]

	u = to_translated(redirect, lang=lang)
	with httpx.Client(cookies=ck, headers=hdr, timeout=60) as c:
	j = c.get(u).text

	data = json.loads(j[5:] if j.startswith(")]}'") else j)
	return data

	def _get_firebase_cookie(firebase_url: str):
	u = (firebase_url or '').strip()
	now = time.time()
	cache = _FIREBASE_COOKIE_CACHE
	if cache.get('data') and cache.get('url') == u and (now - float(cache.get('ts') or 0)) < float(FIREBASE_COOKIE_TTL_SEC):
	return cache.get('data')
	r = httpx.get(u, timeout=30)
	ck = r.json()
	cache['ts'] = now
	cache['url'] = u
	cache['data'] = ck
	return ck

	def warmup(lang: str = "th") -> dict:
	l = _normalize_lang(lang)
	cookie_ok = False
	try:
	_get_firebase_cookie(FIREBASE_URL)
	cookie_ok = True
	except Exception:
	pass
	thai_font = FONT_THAI_PATH
	latin_font = FONT_LATIN_PATH
	if l == "ja":
	latin_font = FONT_JA_PATH
	elif l in ("zh", "zh-hans", "zh_cn", "zh-cn", "zh_hans"):
	latin_font = FONT_ZH_SC_PATH
	elif l in ("zh-hant", "zh_tw", "zh-tw", "zh_hant"):
	latin_font = FONT_ZH_TC_PATH

	if FONT_DOWNLOD:
	thai_font = ensure_font(thai_font, FONT_THAI_URLS)
	if l == "ja":
	latin_font = ensure_font(latin_font, FONT_JA_URLS)
	elif l in ("zh", "zh-hans", "zh_cn", "zh-cn", "zh_hans"):
	latin_font = ensure_font(latin_font, FONT_ZH_SC_URLS)
	elif l in ("zh-hant", "zh_tw", "zh-tw", "zh_hant"):
	latin_font = ensure_font(latin_font, FONT_ZH_TC_URLS)
	else:
	latin_font = ensure_font(latin_font, FONT_LATIN_URLS)

	_get_font_pair(thai_font or "", latin_font or "", 22)
	_get_font_pair(thai_font or "", latin_font or "", 28)
	return {"ok": True, "lang": l, "thai_font": thai_font or "", "latin_font": latin_font or "", "cookie_ok": cookie_ok}

	def main():
	data = get_lens_data_from_image(IMAGE_PATH, FIREBASE_URL, LANG)

	img = Image.open(IMAGE_PATH).convert("RGB")
	W, H = img.size

	thai_font = FONT_THAI_PATH
	latin_font = FONT_LATIN_PATH

	lang = _normalize_lang(LANG)

	if lang == "ja":
	latin_font = FONT_JA_PATH
	elif lang in ("zh", "zh-hans", "zh_cn", "zh-cn", "zh_hans"):
	latin_font = FONT_ZH_SC_PATH
	elif lang in ("zh-hant", "zh_tw", "zh-tw", "zh_hant"):
	latin_font = FONT_ZH_TC_PATH

	if FONT_DOWNLOD:
	thai_font = ensure_font(thai_font, FONT_THAI_URLS)
	if lang == "ja":
	latin_font = ensure_font(latin_font, FONT_JA_URLS)
	elif lang in ("zh", "zh-hans", "zh_cn", "zh-cn", "zh_hans"):
	latin_font = ensure_font(latin_font, FONT_ZH_SC_URLS)
	elif lang in ("zh-hant", "zh_tw", "zh-tw", "zh_hant"):
	latin_font = ensure_font(latin_font, FONT_ZH_TC_URLS)
	else:
	latin_font = ensure_font(latin_font, FONT_LATIN_URLS)

	image_url = data.get("imageUrl") if isinstance(data, dict) else None
	image_datauri = ""
	if DECODE_IMAGEURL_TO_DATAURI and image_url:
	image_datauri = decode_imageurl_to_datauri(image_url)

	out = {
	"imageUrl": image_url,
	"imageDataUri": image_datauri,
	"originalContentLanguage": data.get("originalContentLanguage"),
	"originalTextFull": data.get("originalTextFull"),
	"translatedTextFull": data.get("translatedTextFull"),
	"AiTextFull": "",
	"originalParagraphs": data.get("originalParagraphs") or [],
	"translatedParagraphs": data.get("translatedParagraphs") or [],
	"original": {},
	"translated": {},
	"Ai": {},
	}
	original_span_tokens = None
	original_tree = None
	translated_tree = None

	def _base_img_for_overlay() -> Image.Image:
	if not (ERASE_OLD_TEXT_WITH_ORIGINAL_BOXES and original_span_tokens):
	return img
	return erase_text_with_boxes(
	img,
	original_span_tokens,
	pad_px=ERASE_PADDING_PX,
	sample_margin_px=ERASE_SAMPLE_MARGIN_PX,
	)

	if DO_ORIGINAL:
	tree, _ = decode_tree(
	data.get("originalParagraphs") or [],
	data.get("originalTextFull") or "",
	"original",
	W,
	H,
	want_raw=False,
	)
	original_tree = tree
	original_span_tokens = flatten_tree_spans(tree)
	out["original"] = {"originalTree": tree}
	if DO_ORIGINAL_HTML:
	out["original"]["originalhtml"] = tokens_to_html(
	original_span_tokens)

	if DRAW_OVERLAY_ORIGINAL:
	base_img = _base_img_for_overlay()
	draw_overlay(
	base_img,
	original_span_tokens,
	OVERLAY_ORIGINAL_PATH,
	thai_font or "",
	latin_font or "",
	level_outlines=build_level_outlines(original_tree, W, H),
	)

	if DO_AI and original_tree is None:
	tree0, _ = decode_tree(
	data.get("originalParagraphs") or [],
	data.get("originalTextFull") or "",
	"original",
	W,
	H,
	want_raw=False,
	)
	original_tree = tree0

	if DO_TRANSLATED:
	tree, _ = decode_tree(
	data.get("translatedParagraphs") or [],
	data.get("translatedTextFull") or "",
	"translated",
	W,
	H,
	want_raw=False,
	)
	translated_tree = tree
	out["translated"] = {"translatedTree": tree}
	translated_span_tokens = flatten_tree_spans(tree)
	if DO_TRANSLATED_HTML:
	out["translated"]["translatedhtml"] = tokens_to_html(
	translated_span_tokens)

	if DRAW_OVERLAY_TRANSLATED:
	base_img = _base_img_for_overlay()
	draw_overlay(
	base_img,
	translated_span_tokens,
	OVERLAY_TRANSLATED_PATH,
	thai_font or "",
	latin_font or "",
	level_outlines=build_level_outlines(tree, W, H),
	font_scale=TRANSLATED_OVERLAY_FONT_SCALE,
	fit_to_box=TRANSLATED_OVERLAY_FIT_TO_BOX,
	)

	ai = None
	if DO_AI:
	src_text = out.get("originalTextFull") or ""
	if not src_text:
	src_text = data.get("originalTextFull") or ""

	tree_for_boxes = translated_tree or original_tree
	if tree_for_boxes is None:
	tree_for_boxes, _ = decode_tree(
	data.get("originalParagraphs") or [],
	data.get("originalTextFull") or "",
	"original",
	W,
	H,
	want_raw=False,
	)
	original_tree = tree_for_boxes

	ai = ai_translate_original_text(
	src_text,
	LANG,
	)

	template_tree = translated_tree
	patched = patch({"Ai": {"aiTextFull": str(ai.get(
	"aiTextFull") or ""), "aiTree": template_tree}}, W, H, thai_font, latin_font)
	ai_tree = (patched.get("Ai") or {}).get("aiTree") or {}

	ai["aiTree"] = ai_tree

	shared_para_sizes = _compute_shared_para_sizes(
	[original_tree or {}, translated_tree or {}, ai_tree or {}],
	thai_font or "",
	latin_font or "",
	W,
	H,
	)
	_apply_para_font_size(original_tree or {}, shared_para_sizes)
	_apply_para_font_size(translated_tree or {}, shared_para_sizes)
	_apply_para_font_size(ai_tree or {}, shared_para_sizes)

	_rebuild_ai_spans_after_font_resize(
	ai_tree or {}, W, H, thai_font or "", latin_font or "")

	out["AiTextFull"] = str(ai.get("aiTextFull") or "")
	out["Ai"] = {
	"aiTextFull": str(ai.get("aiTextFull") or ""),
	"aiTree": ai_tree,
	}

	if DO_AI_HTML:
	if AI_OVERLAY_FIT_TO_BOX:
	fit_tree_font_sizes_for_tp_html(
	ai_tree or {}, thai_font or "", latin_font or "", W, H)
	out["Ai"]["aihtml"] = ai_tree_to_tp_html(ai_tree, W, H)
	out["Ai"]["aihtmlCss"] = tp_overlay_css()
	out["Ai"]["aihtmlMeta"] = {
	"baseW": int(W),
	"baseH": int(H),
	"format": "tp",
	}

	if DO_AI_OVERLAY and translated_tree is not None:
	base_img = _base_img_for_overlay()
	tokens_for_draw = flatten_tree_spans(ai_tree)
	draw_overlay(
	base_img,
	tokens_for_draw,
	AI_PATH_OVERLAY,
	thai_font or "",
	latin_font or "",
	level_outlines=build_level_outlines(ai_tree, W, H),
	font_scale=AI_OVERLAY_FONT_SCALE,
	fit_to_box=AI_OVERLAY_FIT_TO_BOX,
	)

	if HTML_INCLUDE_CSS and (DO_ORIGINAL_HTML or DO_TRANSLATED_HTML or DO_AI_HTML):
	out["htmlCss"] = overlay_css()
	out["htmlMeta"] = {
	"containerClass": "RTMDre",
	"tokenClass": "IwqbBf",
	"sourceWidth": int(W),
	"sourceHeight": int(H),
	}

	if "htmlMeta" not in out:
	out["htmlMeta"] = {
	"containerClass": "RTMDre",
	"tokenClass": "IwqbBf",
	"sourceWidth": int(W),
	"sourceHeight": int(H),
	}

	if WRITE_OUT_JSON:
	with open(OUT_JSON, "w", encoding="utf-8") as f:
	json.dump(out, f, ensure_ascii=False, indent=2)

	if __name__ == "__main__":
	main()