plan291037 commited on
Commit
ef44fe4
·
verified ·
1 Parent(s): 2e2d57d

Update backend/lens_core.py

Browse files
Files changed (1) hide show
  1. backend/lens_core.py +100 -168
backend/lens_core.py CHANGED
@@ -187,115 +187,71 @@ AI_MODEL_ALIASES = {
187
  }
188
  }
189
 
190
- AI_PROMPT_SYSTEM_BASE = ""
191
-
192
- AI_LANG_STYLE = {"default": ""}
193
-
194
- AI_PROMPT_USER_BY_LANG = {"default": ""}
195
-
196
- TP_REMOTE_DEFAULTS_URL = (
197
- os.environ.get("TP_REMOTE_DEFAULTS_URL")
198
- or "https://raw.githubusercontent.com/Kuju29/TextPhantomOCR_Overlay/refs/heads/main/defaults_api.json"
199
- ).strip()
200
- TP_REMOTE_DEFAULTS_TIMEOUT_SEC = float(os.environ.get("TP_REMOTE_DEFAULTS_TIMEOUT_SEC", "2"))
201
-
202
-
203
- def _remote_defaults() -> dict:
204
- url = TP_REMOTE_DEFAULTS_URL
205
- if not url:
206
- raise RuntimeError("TP_REMOTE_DEFAULTS_URL is required")
207
-
208
- if url.startswith("file://"):
209
- with open(url[len("file://"):], "r", encoding="utf-8") as f:
210
- raw = f.read()
211
- else:
212
- with httpx.Client(timeout=TP_REMOTE_DEFAULTS_TIMEOUT_SEC) as client:
213
- r = client.get(
214
- url,
215
- headers={"accept": "application/json"},
216
- follow_redirects=True,
217
- )
218
- r.raise_for_status()
219
- raw = r.text
220
-
221
- data = json.loads((raw or "").strip() or "{}")
222
- if not isinstance(data, dict) or not data:
223
- raise RuntimeError("Remote defaults is empty")
224
- return data
225
-
226
-
227
- def _remote_first_str(data: dict, *keys: str) -> str:
228
- if not data:
229
- return ""
230
- for k in keys:
231
- v = data.get(k)
232
- if isinstance(v, str) and v.strip():
233
- return v.strip()
234
- return ""
235
-
236
-
237
- def _remote_first_map(data: dict, *keys: str) -> dict:
238
- if not data:
239
- return {}
240
- for k in keys:
241
- v = data.get(k)
242
- if isinstance(v, dict) and v:
243
- return v
244
- return {}
245
-
246
-
247
- def ai_prompt_system_base(data: dict | None = None) -> str:
248
- d = data if isinstance(data, dict) else _remote_defaults()
249
- v = _remote_first_str(
250
- d,
251
- "AI_PROMPT_SYSTEM_BASE",
252
- "aiPromptSystemBase",
253
- "promptSystemBase",
254
- "systemBase",
255
- )
256
- if not v:
257
- raise RuntimeError("Missing AI_PROMPT_SYSTEM_BASE in remote defaults")
258
- return v
259
-
260
-
261
- def ai_lang_style_map(data: dict | None = None) -> dict[str, str]:
262
- d = data if isinstance(data, dict) else _remote_defaults()
263
- remote = _remote_first_map(d, "AI_LANG_STYLE", "aiLangStyle", "langStyle")
264
- if not remote:
265
- raise RuntimeError("Missing AI_LANG_STYLE in remote defaults")
266
- out: dict[str, str] = {}
267
- for k, v in remote.items():
268
- if not isinstance(k, str) or not isinstance(v, str):
269
- continue
270
- kk = _normalize_lang(k)
271
- if not kk:
272
- continue
273
- out[kk] = v.strip()
274
- out.setdefault("default", "")
275
- return out
276
-
277
 
278
- def ai_prompt_user_by_lang_map(data: dict | None = None) -> dict[str, str]:
279
- d = data if isinstance(data, dict) else _remote_defaults()
280
- remote = _remote_first_map(
281
- d,
282
- "AI_PROMPT_USER_BY_LANG",
283
- "aiPromptUserByLang",
284
- "promptUserByLang",
285
- )
286
- if not remote:
287
- raise RuntimeError("Missing AI_PROMPT_USER_BY_LANG in remote defaults")
288
- out: dict[str, str] = {}
289
- for k, v in remote.items():
290
- if not isinstance(k, str) or not isinstance(v, str):
291
- continue
292
- kk = _normalize_lang(k)
293
- if not kk:
294
- continue
295
- out[kk] = v.strip()
296
- out.setdefault("default", "")
297
- return out
 
 
 
 
 
 
 
 
 
298
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
 
300
  AI_PROMPT_RESPONSE_CONTRACT_JSON = (
301
  "Return ONLY valid JSON (no markdown, no extra text).\n"
@@ -333,10 +289,9 @@ _FONT_PAIR_CACHE = {}
333
  _TP_HTML_EPS_PX = 0.0
334
  ZWSP = "\u200b"
335
 
336
- def ai_prompt_user_default(lang: str, data: dict | None = None) -> str:
337
  l = _normalize_lang(lang)
338
- m = ai_prompt_user_by_lang_map(data)
339
- return (m.get(l) or m.get("default") or "").strip()
340
 
341
  def _active_ai_contract() -> str:
342
  return AI_PROMPT_RESPONSE_CONTRACT_JSON if DO_AI_JSON else AI_PROMPT_RESPONSE_CONTRACT_TEXT
@@ -443,22 +398,22 @@ def _save_ai_cache(path: str, cache: dict):
443
  json.dump(cache, f, ensure_ascii=False)
444
  os.replace(tmp, path)
445
 
446
- def _build_ai_prompt_packet(target_lang: str, original_text_full: str, defaults: dict | None = None):
447
  lang = _normalize_lang(target_lang)
448
- d = defaults if isinstance(defaults, dict) else _remote_defaults()
449
- input_json = json.dumps({"target_lang": lang, "originalTextFull": original_text_full}, ensure_ascii=False)
450
  output_schema = json.dumps({"aiTextFull": "..."}, ensure_ascii=False)
451
  data_template = _active_ai_data_template()
452
  if DO_AI_JSON:
453
- data_text = data_template.format(input_json=input_json, output_schema=output_schema)
 
454
  else:
455
  data_text = data_template.format(input_json=input_json)
456
 
457
- styles = ai_lang_style_map(d)
458
- style = styles.get(lang) or styles.get("default") or ""
459
- editable = (ai_prompt_user_default(lang, d) or "").strip()
460
 
461
- system_parts = [ai_prompt_system_base(d)]
462
  if style:
463
  system_parts.append(style)
464
  system_parts.append(_active_ai_contract())
@@ -470,7 +425,6 @@ def _build_ai_prompt_packet(target_lang: str, original_text_full: str, defaults:
470
  user_parts.append(data_text)
471
  return system_text, user_parts
472
 
473
-
474
  def _gemini_generate_json(api_key: str, model: str, system_text: str, user_parts: list[str]):
475
  url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={api_key}"
476
  parts = [{"text": p} for p in user_parts if (p or "").strip()]
@@ -502,7 +456,6 @@ def _gemini_generate_json(api_key: str, model: str, system_text: str, user_parts
502
  raise Exception("Gemini returned empty text")
503
  return txt
504
 
505
-
506
  def _read_first_env(*names: str) -> str:
507
  for n in names:
508
  v = (os.environ.get(n) or "").strip()
@@ -562,29 +515,10 @@ def _resolve_ai_config():
562
 
563
  def _openai_compat_generate_json(api_key: str, base_url: str, model: str, system_text: str, user_parts: list[str]):
564
  url = (base_url.rstrip("/") + "/chat/completions")
565
-
566
- def _user_only_prompt_for_model(m: str) -> bool:
567
- ml = (m or "").strip().lower()
568
- if not ml:
569
- return False
570
- if "gemma-3" in ml or "gemma-2" in ml:
571
- return True
572
- if "gemma" in ml and ("-it" in ml or "instruct" in ml):
573
- return True
574
- return False
575
-
576
- def _build_messages(m: str):
577
- parts = [p.strip() for p in (user_parts or []) if isinstance(p, str) and p.strip()]
578
- sys = (system_text or "").strip()
579
- if _user_only_prompt_for_model(m):
580
- combined = "\n\n".join([x for x in ([sys] + parts) if x])
581
- return [{"role": "user", "content": combined}]
582
- msgs = [{"role": "system", "content": system_text}]
583
- for p in parts:
584
- msgs.append({"role": "user", "content": p})
585
- return msgs
586
-
587
- messages = _build_messages(model)
588
  payload = {
589
  "model": model,
590
  "messages": messages,
@@ -616,7 +550,6 @@ def _openai_compat_generate_json(api_key: str, base_url: str, model: str, system
616
  fallback = _pick_hf_fallback_model(models)
617
  if fallback and fallback != model:
618
  payload["model"] = fallback
619
- payload["messages"] = _build_messages(fallback)
620
  used_model = fallback
621
  r2 = client.post(url, json=payload, headers=headers)
622
  try:
@@ -1566,20 +1499,15 @@ def ai_translate_original_text(original_text_full: str, target_lang: str):
1566
  if not api_key:
1567
  raise Exception("AI_API_KEY is required for AI translation")
1568
 
1569
- defaults = _remote_defaults()
1570
  lang = _normalize_lang(target_lang)
1571
- styles = ai_lang_style_map(defaults)
1572
- edit_map = ai_prompt_user_by_lang_map(defaults)
1573
- sys_base = ai_prompt_system_base(defaults)
1574
-
1575
  prompt_sig = _sha1(
1576
  json.dumps(
1577
  {
1578
- "sys": sys_base,
1579
- "edit": edit_map,
1580
  "contract": _active_ai_contract(),
1581
  "data": _active_ai_data_template(),
1582
- "style": styles.get(lang) or styles.get("default") or "",
1583
  },
1584
  ensure_ascii=False,
1585
  )
@@ -1591,7 +1519,8 @@ def ai_translate_original_text(original_text_full: str, target_lang: str):
1591
  cache = _load_ai_cache(AI_CACHE_PATH)
1592
  cache_key = _sha1(
1593
  json.dumps(
1594
- {"provider": provider, "m": model, "u": base_url, "l": lang, "p": prompt_sig, "t": original_text_full},
 
1595
  ensure_ascii=False,
1596
  )
1597
  )
@@ -1600,9 +1529,10 @@ def ai_translate_original_text(original_text_full: str, target_lang: str):
1600
  if lang == "th" and cached:
1601
  t = str(cached.get("aiTextFull") or "")
1602
  if t:
1603
- t2 = re.sub(r"(?:(?<=^)|(?<=[\s\"'“”‘’()\[\]{}<>]))นาย(?=(?:\s|$))", "", t)
1604
- t2 = re.sub(r"[ ]{2,}", " ", t2)
1605
- t2 = re.sub(r"^[ ]+", "", t2, flags=re.MULTILINE)
 
1606
  if t2 != t:
1607
  cached = dict(cached)
1608
  cached["aiTextFull"] = t2
@@ -1610,7 +1540,7 @@ def ai_translate_original_text(original_text_full: str, target_lang: str):
1610
  _save_ai_cache(AI_CACHE_PATH, cache)
1611
  return cached
1612
 
1613
- system_text, user_parts = _build_ai_prompt_packet(lang, original_text_full, defaults)
1614
 
1615
  started = time.time()
1616
  used_model = model
@@ -1619,14 +1549,17 @@ def ai_translate_original_text(original_text_full: str, target_lang: str):
1619
  elif provider == "anthropic":
1620
  raw = _anthropic_generate_json(api_key, model, system_text, user_parts)
1621
  else:
1622
- raw, used_model = _openai_compat_generate_json(api_key, base_url, model, system_text, user_parts)
 
1623
 
1624
- ai_text_full = _parse_ai_textfull_only(raw) if DO_AI_JSON else _parse_ai_textfull_text_only(raw)
 
1625
 
1626
  if lang == "th" and ai_text_full:
1627
- ai_text_full = re.sub(r"(?:(?<=^)|(?<=[\s\"'“”‘’()\[\]{}<>]))นาย(?=(?:\s|$))", "", ai_text_full)
1628
- ai_text_full = re.sub(r"[ ]{2,}", " ", ai_text_full)
1629
- ai_text_full = re.sub(r"^[ ]+", "", ai_text_full, flags=re.MULTILINE)
 
1630
 
1631
  result = {
1632
  "aiTextFull": ai_text_full,
@@ -1728,7 +1661,6 @@ def _get_float_field(msg_fields, field_num):
1728
  return b2f(v)
1729
  return None
1730
 
1731
-
1732
  def _get_points_from_geom(geom_bytes):
1733
  pts = []
1734
  height = None
@@ -1917,7 +1849,7 @@ def decode_tree(paragraphs_b64, full_text, side, img_w, img_h, want_raw=True):
1917
 
1918
  angle_deg_raw = math.degrees(math.atan2(dy, dx))
1919
  angle_deg = _normalize_angle_deg(angle_deg_raw)
1920
-
1921
  angle_deg_css = angle_deg
1922
 
1923
  height_px = height_norm * img_h
@@ -2545,7 +2477,7 @@ def ai_tree_to_tp_html(tree: dict, base_w: int, base_h: int) -> str:
2545
  item_idx = int(_tp_num(it.get("item_index")) or ii)
2546
 
2547
  fs_raw = _tp_num(it.get("font_size_px"))
2548
-
2549
  fs = int(round(fs_raw)) if fs_raw and fs_raw > 0 else max(
2550
  10, int(round(h0 * 0.85)))
2551
  fs = max(6, min(fs, max(6, int(math.floor(h0 * 0.95)))))
@@ -2903,7 +2835,7 @@ def _line_metrics_px(text: str, thai_path: str, latin_path: str, size: int):
2903
  def _item_avail_w_px(item: dict, W: int, H: int) -> float:
2904
  b = item.get("box") or {}
2905
  w_box = float(b.get("width") or 0.0) * float(W)
2906
-
2907
  L = 0.0
2908
  p1 = item.get("baseline_p1") or {}
2909
  p2 = item.get("baseline_p2") or {}
@@ -2957,7 +2889,7 @@ def _compute_fit_size_px_for_item(item: dict, thai_path: str, latin_path: str, W
2957
  if (tw2 <= avail_w * 0.999) and (th2 <= avail_h * 0.999):
2958
  break
2959
  size -= 1
2960
-
2961
  if size <= 12 and avail_h >= 24:
2962
  tw0, th0, _ = m
2963
  if tw0 > (avail_w * 1.2):
@@ -3006,7 +2938,7 @@ def fit_tree_font_sizes_for_tp_html(tree: dict, thai_path: str, latin_path: str,
3006
 
3007
  if not fits:
3008
  continue
3009
-
3010
  fits.sort()
3011
  p["para_font_size_px"] = int(fits[len(fits) // 2])
3012
 
 
187
  }
188
  }
189
 
190
+ AI_PROMPT_SYSTEM_BASE = (
191
+ "You are a professional manga translator and dialogue localizer.\n"
192
+ "Rewrite each paragraph as natural dialogue in the target language while preserving meaning, tone, intent, and character voice.\n"
193
+ "Keep lines concise for speech bubbles. Do not add new information. Do not omit meaning. Do not explain.\n"
194
+ "Preserve emphasis (… ! ?). Avoid excessive punctuation.\n"
195
+ "If the input is already in the target language, improve it (dialogue polish) without changing meaning."
196
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
 
198
+ AI_LANG_STYLE = {
199
+ "th": (
200
+ "Target language: Thai\\n"
201
+ "Write Thai manga dialogue that reads like a high-quality Thai scanlation: natural, concise, and in-character.\\n"
202
+ "Keep lines short for speech bubbles; avoid stiff, literal phrasing.\\n"
203
+ "Default: omit pronouns and omit gendered polite sentence-final particles unless the source line clearly requires them.\\n"
204
+ "Never use the word 'ฉัน'. Prefer omitting the subject.\\n"
205
+ "Never use a male-coded second-person pronoun. When addressing someone by name, do not add a second-person pronoun after the name; prefer NAME + clause.\\n"
206
+ "If a second-person reference is unavoidable, use a neutral/casual form appropriate to tone, but keep it gender-neutral and consistent with the line.\\n"
207
+ "Use particles/interjections sparingly to match tone; do not overuse.\\n"
208
+ "Keep names/terms consistent; transliterate when appropriate.\\n"
209
+ "Output only the translated text."
210
+ ),
211
+ "en": (
212
+ "Target language: English\n"
213
+ "Write natural English manga dialogue: concise, conversational, with contractions where natural.\n"
214
+ "Localize tone and character voice; keep emotion and emphasis.\n"
215
+ "Keep proper nouns consistent; do not over-explain."
216
+ ),
217
+ "ja": (
218
+ "Target language: Japanese\n"
219
+ "Write natural Japanese manga dialogue: concise, spoken.\n"
220
+ "Choose 丁寧語/タメ口 to match context; keep emotion and emphasis.\n"
221
+ "Keep proper nouns consistent; keep SFX natural in Japanese."
222
+ ),
223
+ "default": (
224
+ "Write natural manga dialogue in the target language: concise, spoken, faithful to meaning and tone."
225
+ ),
226
+ }
227
 
228
+ AI_PROMPT_USER_BY_LANG = {
229
+ "th": """
230
+ เป้าหมายภาษา: ไทย
231
+ แปลข้อความ OCR ในมังงะเป็นภาษาไทยธรรมชาติแบบบทสนทนา
232
+ คงน้ำเสียง/อารมณ์ให้เหมาะกับบริบท
233
+ สั้น กระชับ อ่านลื่น ห้ามใส่คำอธิบายหรือบรรยายเพิ่ม
234
+ คงชื่อเฉพาะ/ศัพท์เฉพาะให้สม่ำเสมอ และส่งออกเป็นข้อความไทยเท่านั้น
235
+ """.strip(),
236
+ "en": """Style preferences:
237
+ - Keep English dialogue concise and conversational.
238
+ - Keep lines short for speech bubbles.
239
+ - Keep names and recurring terms consistent.
240
+ - Keep SFX short; avoid very long repeated characters.
241
+ """.strip(),
242
+ "ja": """Style preferences:
243
+ - Keep Japanese dialogue concise and natural for manga.
244
+ - Keep lines short for speech bubbles.
245
+ - Keep names and recurring terms consistent.
246
+ - Keep SFX short; avoid very long repeated characters.
247
+ """.strip(),
248
+ "default": """Style preferences:
249
+ - Keep dialogue concise, spoken, and faithful to tone.
250
+ - Keep lines short for speech bubbles.
251
+ - Keep names and recurring terms consistent.
252
+ - Keep SFX short; avoid very long repeated characters.
253
+ """.strip(),
254
+ }
255
 
256
  AI_PROMPT_RESPONSE_CONTRACT_JSON = (
257
  "Return ONLY valid JSON (no markdown, no extra text).\n"
 
289
  _TP_HTML_EPS_PX = 0.0
290
  ZWSP = "\u200b"
291
 
292
+ def ai_prompt_user_default(lang: str, model: str = "auto") -> str:
293
  l = _normalize_lang(lang)
294
+ return (AI_PROMPT_USER_BY_LANG.get(l) or AI_PROMPT_USER_BY_LANG.get("default") or "").strip()
 
295
 
296
  def _active_ai_contract() -> str:
297
  return AI_PROMPT_RESPONSE_CONTRACT_JSON if DO_AI_JSON else AI_PROMPT_RESPONSE_CONTRACT_TEXT
 
398
  json.dump(cache, f, ensure_ascii=False)
399
  os.replace(tmp, path)
400
 
401
+ def _build_ai_prompt_packet(target_lang: str, original_text_full: str):
402
  lang = _normalize_lang(target_lang)
403
+ input_json = json.dumps(
404
+ {"target_lang": lang, "originalTextFull": original_text_full}, ensure_ascii=False)
405
  output_schema = json.dumps({"aiTextFull": "..."}, ensure_ascii=False)
406
  data_template = _active_ai_data_template()
407
  if DO_AI_JSON:
408
+ data_text = data_template.format(
409
+ input_json=input_json, output_schema=output_schema)
410
  else:
411
  data_text = data_template.format(input_json=input_json)
412
 
413
+ style = AI_LANG_STYLE.get(lang) or AI_LANG_STYLE.get("default") or ""
414
+ editable = (ai_prompt_user_default(lang) or "").strip()
 
415
 
416
+ system_parts = [AI_PROMPT_SYSTEM_BASE]
417
  if style:
418
  system_parts.append(style)
419
  system_parts.append(_active_ai_contract())
 
425
  user_parts.append(data_text)
426
  return system_text, user_parts
427
 
 
428
  def _gemini_generate_json(api_key: str, model: str, system_text: str, user_parts: list[str]):
429
  url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={api_key}"
430
  parts = [{"text": p} for p in user_parts if (p or "").strip()]
 
456
  raise Exception("Gemini returned empty text")
457
  return txt
458
 
 
459
  def _read_first_env(*names: str) -> str:
460
  for n in names:
461
  v = (os.environ.get(n) or "").strip()
 
515
 
516
  def _openai_compat_generate_json(api_key: str, base_url: str, model: str, system_text: str, user_parts: list[str]):
517
  url = (base_url.rstrip("/") + "/chat/completions")
518
+ messages = [{"role": "system", "content": system_text}]
519
+ for p in user_parts:
520
+ if (p or "").strip():
521
+ messages.append({"role": "user", "content": p})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
522
  payload = {
523
  "model": model,
524
  "messages": messages,
 
550
  fallback = _pick_hf_fallback_model(models)
551
  if fallback and fallback != model:
552
  payload["model"] = fallback
 
553
  used_model = fallback
554
  r2 = client.post(url, json=payload, headers=headers)
555
  try:
 
1499
  if not api_key:
1500
  raise Exception("AI_API_KEY is required for AI translation")
1501
 
 
1502
  lang = _normalize_lang(target_lang)
 
 
 
 
1503
  prompt_sig = _sha1(
1504
  json.dumps(
1505
  {
1506
+ "sys": AI_PROMPT_SYSTEM_BASE,
1507
+ "edit": AI_PROMPT_USER_BY_LANG,
1508
  "contract": _active_ai_contract(),
1509
  "data": _active_ai_data_template(),
1510
+ "style": AI_LANG_STYLE.get(lang) or AI_LANG_STYLE.get("default") or "",
1511
  },
1512
  ensure_ascii=False,
1513
  )
 
1519
  cache = _load_ai_cache(AI_CACHE_PATH)
1520
  cache_key = _sha1(
1521
  json.dumps(
1522
+ {"provider": provider, "m": model, "u": base_url,
1523
+ "l": lang, "p": prompt_sig, "t": original_text_full},
1524
  ensure_ascii=False,
1525
  )
1526
  )
 
1529
  if lang == "th" and cached:
1530
  t = str(cached.get("aiTextFull") or "")
1531
  if t:
1532
+ t2 = re.sub(
1533
+ r"(?:(?<=^)|(?<=[\s\"'“”‘’()\[\]{}<>]))\u0e19\u0e32\u0e22(?=(?:\s|$))", "", t)
1534
+ t2 = re.sub(r"[ \t]{2,}", " ", t2)
1535
+ t2 = re.sub(r"^[ \t]+", "", t2, flags=re.MULTILINE)
1536
  if t2 != t:
1537
  cached = dict(cached)
1538
  cached["aiTextFull"] = t2
 
1540
  _save_ai_cache(AI_CACHE_PATH, cache)
1541
  return cached
1542
 
1543
+ system_text, user_parts = _build_ai_prompt_packet(lang, original_text_full)
1544
 
1545
  started = time.time()
1546
  used_model = model
 
1549
  elif provider == "anthropic":
1550
  raw = _anthropic_generate_json(api_key, model, system_text, user_parts)
1551
  else:
1552
+ raw, used_model = _openai_compat_generate_json(
1553
+ api_key, base_url, model, system_text, user_parts)
1554
 
1555
+ ai_text_full = _parse_ai_textfull_only(
1556
+ raw) if DO_AI_JSON else _parse_ai_textfull_text_only(raw)
1557
 
1558
  if lang == "th" and ai_text_full:
1559
+ ai_text_full = re.sub(
1560
+ r"(?:(?<=^)|(?<=[\s\"'“”‘’()\[\]{}<>]))\u0e19\u0e32\u0e22(?=(?:\s|$))", "", ai_text_full)
1561
+ ai_text_full = re.sub(r"[ \t]{2,}", " ", ai_text_full)
1562
+ ai_text_full = re.sub(r"^[ \t]+", "", ai_text_full, flags=re.MULTILINE)
1563
 
1564
  result = {
1565
  "aiTextFull": ai_text_full,
 
1661
  return b2f(v)
1662
  return None
1663
 
 
1664
  def _get_points_from_geom(geom_bytes):
1665
  pts = []
1666
  height = None
 
1849
 
1850
  angle_deg_raw = math.degrees(math.atan2(dy, dx))
1851
  angle_deg = _normalize_angle_deg(angle_deg_raw)
1852
+
1853
  angle_deg_css = angle_deg
1854
 
1855
  height_px = height_norm * img_h
 
2477
  item_idx = int(_tp_num(it.get("item_index")) or ii)
2478
 
2479
  fs_raw = _tp_num(it.get("font_size_px"))
2480
+
2481
  fs = int(round(fs_raw)) if fs_raw and fs_raw > 0 else max(
2482
  10, int(round(h0 * 0.85)))
2483
  fs = max(6, min(fs, max(6, int(math.floor(h0 * 0.95)))))
 
2835
  def _item_avail_w_px(item: dict, W: int, H: int) -> float:
2836
  b = item.get("box") or {}
2837
  w_box = float(b.get("width") or 0.0) * float(W)
2838
+
2839
  L = 0.0
2840
  p1 = item.get("baseline_p1") or {}
2841
  p2 = item.get("baseline_p2") or {}
 
2889
  if (tw2 <= avail_w * 0.999) and (th2 <= avail_h * 0.999):
2890
  break
2891
  size -= 1
2892
+
2893
  if size <= 12 and avail_h >= 24:
2894
  tw0, th0, _ = m
2895
  if tw0 > (avail_w * 1.2):
 
2938
 
2939
  if not fits:
2940
  continue
2941
+
2942
  fits.sort()
2943
  p["para_font_size_px"] = int(fits[len(fits) // 2])
2944