Spaces:
Running
Running
Update backend/server.py
Browse files- backend/server.py +373 -123
backend/server.py
CHANGED
|
@@ -1,21 +1,22 @@
|
|
| 1 |
-
import asyncio, base64, copy, hashlib, io, json, os, re, tempfile, time, uuid, httpx
|
| 2 |
|
| 3 |
from backend import lens_core as core
|
| 4 |
-
|
| 5 |
from collections import OrderedDict
|
| 6 |
-
from threading import Lock
|
| 7 |
-
|
| 8 |
from dataclasses import dataclass
|
| 9 |
from typing import Any, Dict, List, Optional
|
| 10 |
-
from fastapi import FastAPI, WebSocket, WebSocketDisconnect
|
| 11 |
from fastapi.middleware.cors import CORSMiddleware
|
| 12 |
|
| 13 |
SERVER_MAX_WORKERS = int(os.environ.get('SERVER_MAX_WORKERS', '15'))
|
| 14 |
JOB_TTL_SEC = int(os.environ.get('JOB_TTL_SEC', '3600'))
|
| 15 |
-
HTTP_TIMEOUT_SEC = float(os.environ.get(
|
|
|
|
| 16 |
SUPPORTED_MODES = {"lens_images", "lens_text"}
|
| 17 |
BUILD_ID = os.environ.get('TP_BUILD_ID', 'v9-backendfix-20260129')
|
| 18 |
-
TP_DEBUG = str(os.environ.get('TP_DEBUG', '')).strip(
|
|
|
|
| 19 |
|
| 20 |
TP_PARA_MARKER_PREFIX = '<<TP_P'
|
| 21 |
TP_PARA_MARKER_SUFFIX = '>>'
|
|
@@ -31,6 +32,28 @@ _job_queue: asyncio.Queue = asyncio.Queue()
|
|
| 31 |
_result_cache_lock = Lock()
|
| 32 |
_ai_cache_lock = Lock()
|
| 33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
def _dbg(tag: str, data=None) -> None:
|
| 35 |
if not TP_DEBUG:
|
| 36 |
return
|
|
@@ -123,7 +146,7 @@ def _needs_ai_retry(ai_text_full: str, expected_paras: int) -> bool:
|
|
| 123 |
idx = _extract_marker_indices(ai_text_full)
|
| 124 |
if len(idx) >= expected_paras:
|
| 125 |
return False
|
| 126 |
-
|
| 127 |
if (TP_PARA_MARKER_PREFIX in (ai_text_full or '')) and (TP_PARA_MARKER_SUFFIX not in (ai_text_full or '')):
|
| 128 |
return True
|
| 129 |
return True
|
|
@@ -160,7 +183,8 @@ def _ai_prompt_sig(s: str) -> str:
|
|
| 160 |
return hashlib.sha256(t.encode('utf-8')).hexdigest()[:12]
|
| 161 |
|
| 162 |
def _build_cache_key(img_hash: str, lang: str, mode: str, source: str, ai_cfg: Optional["AiConfig"]) -> str:
|
| 163 |
-
parts = [img_hash, _normalize_lang(
|
|
|
|
| 164 |
if ai_cfg and (source or '').strip().lower() == 'ai':
|
| 165 |
parts.extend([
|
| 166 |
(ai_cfg.provider or '').strip(),
|
|
@@ -170,7 +194,6 @@ def _build_cache_key(img_hash: str, lang: str, mode: str, source: str, ai_cfg: O
|
|
| 170 |
])
|
| 171 |
return '|'.join([p for p in parts if p is not None])
|
| 172 |
|
| 173 |
-
|
| 174 |
def _b64_to_bytes(b64: str) -> bytes:
|
| 175 |
pad = '=' * ((4 - (len(b64) % 4)) % 4)
|
| 176 |
return base64.b64decode(b64 + pad)
|
|
@@ -189,11 +212,18 @@ def _bytes_to_datauri(blob: bytes, mime: str) -> str:
|
|
| 189 |
b64 = base64.b64encode(blob).decode('ascii')
|
| 190 |
return f"data:{mime};base64,{b64}"
|
| 191 |
|
| 192 |
-
def _download_bytes(url: str) -> tuple[bytes, str]:
|
| 193 |
u = (url or '').strip()
|
| 194 |
if not u:
|
| 195 |
return b'', ''
|
| 196 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
r = client.get(u)
|
| 198 |
r.raise_for_status()
|
| 199 |
ct = (r.headers.get('content-type') or '').split(';')[0].strip()
|
|
@@ -208,6 +238,58 @@ def _resolve_provider_defaults(provider: str) -> dict:
|
|
| 208 |
def _resolve_model(provider: str, model: str) -> str:
|
| 209 |
return core._resolve_model(provider, model)
|
| 210 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
def _normalize_lang(lang: str) -> str:
|
| 212 |
return core._normalize_lang(lang)
|
| 213 |
|
|
@@ -226,7 +308,31 @@ def _sanitize_marked_text(marked_text: str) -> str:
|
|
| 226 |
t = str(marked_text or "")
|
| 227 |
if not t:
|
| 228 |
return ""
|
| 229 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
if not indices:
|
| 231 |
return _collapse_ws(t)
|
| 232 |
out_lines: List[str] = []
|
|
@@ -241,43 +347,73 @@ def _sanitize_marked_text(marked_text: str) -> str:
|
|
| 241 |
out_lines.append("")
|
| 242 |
return "\n".join(out_lines).strip("\n")
|
| 243 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
def _build_ai_prompt_packet_custom(target_lang: str, original_text_full: str, prompt_editable: str, is_retry: bool = False) -> tuple[str, List[str]]:
|
| 245 |
lang = _normalize_lang(target_lang)
|
| 246 |
style_prompt = (prompt_editable or "").strip()
|
| 247 |
if not style_prompt:
|
| 248 |
-
style_prompt = (
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
"
|
| 263 |
-
"
|
| 264 |
-
"Keep
|
|
|
|
| 265 |
]
|
| 266 |
if is_retry:
|
| 267 |
-
|
| 268 |
-
"Retry:
|
| 269 |
)
|
| 270 |
-
system_text = "\n".join([p for p in system_parts if p])
|
| 271 |
|
| 272 |
-
|
| 273 |
-
"
|
| 274 |
-
+ input_json
|
| 275 |
-
+ "\n```\n\nOUTPUT_JSON (json):\n```json\n{\"aiTextFull\":\"...\"}\n```"
|
| 276 |
-
)
|
| 277 |
|
| 278 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
|
| 280 |
def ai_translate_text(original_text_full: str, target_lang: str, ai: AiConfig, is_retry: bool = False) -> dict:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
api_key = (ai.api_key or '').strip()
|
| 282 |
if not api_key:
|
| 283 |
raise Exception('AI api_key is required')
|
|
@@ -300,7 +436,8 @@ def ai_translate_text(original_text_full: str, target_lang: str, ai: AiConfig, i
|
|
| 300 |
'base_url') or 'https://api.openai.com/v1'
|
| 301 |
|
| 302 |
system_text, user_parts = _build_ai_prompt_packet_custom(
|
| 303 |
-
target_lang, original_text_full, ai.prompt_editable, is_retry=is_retry
|
|
|
|
| 304 |
|
| 305 |
started = _now()
|
| 306 |
used_model = model
|
|
@@ -311,8 +448,12 @@ def ai_translate_text(original_text_full: str, target_lang: str, ai: AiConfig, i
|
|
| 311 |
raw = core._anthropic_generate_json(
|
| 312 |
api_key, model, system_text, user_parts)
|
| 313 |
else:
|
| 314 |
-
|
| 315 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 316 |
|
| 317 |
ai_text_full = core._parse_ai_textfull_only(
|
| 318 |
raw) if core.DO_AI_JSON else core._parse_ai_textfull_text_only(raw)
|
|
@@ -489,73 +630,129 @@ def process_image_path(image_path: str, lang: str, mode: str, ai_cfg: Optional[A
|
|
| 489 |
src_paras = _tree_to_paragraph_texts(original_tree or {})
|
| 490 |
src_text = _apply_para_markers(src_paras) if src_paras else str(
|
| 491 |
out.get('originalTextFull') or '')
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
'
|
| 496 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 497 |
})
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
|
| 523 |
-
|
| 524 |
-
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
|
| 530 |
-
|
| 531 |
-
|
| 532 |
-
|
| 533 |
-
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
core._apply_para_font_size(translated_tree or {}, shared_para_sizes)
|
| 540 |
-
core._apply_para_font_size(ai_tree or {}, shared_para_sizes)
|
| 541 |
-
core._rebuild_ai_spans_after_font_resize(
|
| 542 |
-
ai_tree or {}, W, H, thai_font or '', latin_font or '', lang=target_lang)
|
| 543 |
-
|
| 544 |
-
out['AiTextFull'] = str(ai.get('aiTextFull') or '')
|
| 545 |
-
out['Ai'] = {
|
| 546 |
-
'aiTextFull': str(ai.get('aiTextFull') or ''),
|
| 547 |
-
'aiTree': ai_tree,
|
| 548 |
-
'meta': ai.get('meta') or {},
|
| 549 |
-
}
|
| 550 |
-
if getattr(core, 'DO_AI_HTML', True):
|
| 551 |
-
core.fit_tree_font_sizes_for_tp_html(
|
| 552 |
-
ai_tree, thai_font or '', latin_font or '', W, H)
|
| 553 |
-
out['Ai']['aihtml'] = core.ai_tree_to_tp_html(ai_tree, W, H)
|
| 554 |
-
out['Ai']['aihtmlMeta'] = {
|
| 555 |
-
'baseW': int(W),
|
| 556 |
-
'baseH': int(H),
|
| 557 |
-
'format': 'tp',
|
| 558 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 559 |
|
| 560 |
if getattr(core, 'DO_ORIGINAL', True) and getattr(core, 'DO_ORIGINAL_HTML', True) and isinstance(original_tree, dict):
|
| 561 |
core.fit_tree_font_sizes_for_tp_html(
|
|
@@ -594,6 +791,24 @@ app.add_middleware(
|
|
| 594 |
allow_headers=['*'],
|
| 595 |
)
|
| 596 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 597 |
async def _cleanup_jobs_loop():
|
| 598 |
while True:
|
| 599 |
await asyncio.sleep(60)
|
|
@@ -620,6 +835,10 @@ def _process_payload(payload: dict) -> dict:
|
|
| 620 |
mode = (payload.get('mode') or 'lens_images')
|
| 621 |
lang = (payload.get('lang') or 'en')
|
| 622 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 623 |
src = (payload.get('src') or '').strip()
|
| 624 |
img_bytes = b''
|
| 625 |
mime = ''
|
|
@@ -629,7 +848,7 @@ def _process_payload(payload: dict) -> dict:
|
|
| 629 |
elif src.startswith('data:'):
|
| 630 |
img_bytes, mime = _datauri_to_bytes(src)
|
| 631 |
else:
|
| 632 |
-
img_bytes, mime = _download_bytes(src)
|
| 633 |
|
| 634 |
t_img = time.perf_counter()
|
| 635 |
|
|
@@ -655,7 +874,9 @@ def _process_payload(payload: dict) -> dict:
|
|
| 655 |
img_hash = _sha256_hex(img_bytes)
|
| 656 |
cache_key = ''
|
| 657 |
if mode == 'lens_text' and img_hash:
|
| 658 |
-
|
|
|
|
|
|
|
| 659 |
cached = None
|
| 660 |
if source == 'ai':
|
| 661 |
cached = _lru_get(_ai_result_cache, _ai_cache_lock, cache_key)
|
|
@@ -684,9 +905,11 @@ def _process_payload(payload: dict) -> dict:
|
|
| 684 |
}
|
| 685 |
if cache_key and isinstance(out, dict):
|
| 686 |
if source == 'ai':
|
| 687 |
-
_lru_set(_ai_result_cache, _ai_cache_lock,
|
|
|
|
| 688 |
else:
|
| 689 |
-
_lru_set(_result_cache, _result_cache_lock,
|
|
|
|
| 690 |
return out
|
| 691 |
finally:
|
| 692 |
try:
|
|
@@ -730,6 +953,14 @@ async def meta():
|
|
| 730 |
@app.post('/translate')
|
| 731 |
async def translate(payload: Dict[str, Any]):
|
| 732 |
jid = str(uuid.uuid4())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 733 |
_jobs[jid] = {'status': 'queued', 'ts': _now()}
|
| 734 |
await _job_queue.put((jid, payload))
|
| 735 |
return {'id': jid}
|
|
@@ -799,9 +1030,12 @@ async def ai_resolve(payload: Dict[str, Any]):
|
|
| 799 |
if provider == 'huggingface' and not models:
|
| 800 |
models = [
|
| 801 |
'google/gemma-3-27b-it:featherless-a',
|
|
|
|
|
|
|
|
|
|
| 802 |
]
|
| 803 |
|
| 804 |
-
if not models:
|
| 805 |
fallback_models: List[str] = []
|
| 806 |
preset_model = str(preset.get('model') or '').strip()
|
| 807 |
if preset_model:
|
|
@@ -839,13 +1073,11 @@ async def ai_resolve(payload: Dict[str, Any]):
|
|
| 839 |
key=str.lower,
|
| 840 |
)
|
| 841 |
|
| 842 |
-
if models and
|
| 843 |
resolved_model = models[0]
|
| 844 |
|
| 845 |
-
defaults = core._remote_defaults()
|
| 846 |
-
|
| 847 |
prompt_default = (getattr(core, 'ai_prompt_user_default',
|
| 848 |
-
lambda _l
|
| 849 |
|
| 850 |
return {
|
| 851 |
'ok': True,
|
|
@@ -860,16 +1092,26 @@ async def ai_resolve(payload: Dict[str, Any]):
|
|
| 860 |
@app.get('/ai/prompt/default')
|
| 861 |
async def ai_prompt_default(lang: str = 'en'):
|
| 862 |
l = _normalize_lang(lang)
|
| 863 |
-
|
| 864 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 865 |
return {
|
| 866 |
'ok': True,
|
| 867 |
'lang': l,
|
| 868 |
-
'prompt_editable_default': (getattr(core, 'ai_prompt_user_default', lambda _l
|
| 869 |
-
'lang_style':
|
| 870 |
-
'system_base':
|
| 871 |
-
'contract':
|
| 872 |
-
'
|
| 873 |
}
|
| 874 |
|
| 875 |
@app.websocket('/ws')
|
|
@@ -884,6 +1126,14 @@ async def ws_endpoint(ws: WebSocket):
|
|
| 884 |
continue
|
| 885 |
jid = str(data.get('id') or '')
|
| 886 |
payload = data.get('payload') or {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 887 |
try:
|
| 888 |
result = await asyncio.to_thread(_process_payload, payload)
|
| 889 |
try:
|
|
|
|
| 1 |
+
import asyncio, base64, copy, hashlib, io, json, os, re, tempfile, time, uuid, httpx, logging
|
| 2 |
|
| 3 |
from backend import lens_core as core
|
| 4 |
+
from http import HTTPStatus
|
| 5 |
from collections import OrderedDict
|
| 6 |
+
from threading import Lock, Semaphore
|
|
|
|
| 7 |
from dataclasses import dataclass
|
| 8 |
from typing import Any, Dict, List, Optional
|
| 9 |
+
from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Request
|
| 10 |
from fastapi.middleware.cors import CORSMiddleware
|
| 11 |
|
| 12 |
SERVER_MAX_WORKERS = int(os.environ.get('SERVER_MAX_WORKERS', '15'))
|
| 13 |
JOB_TTL_SEC = int(os.environ.get('JOB_TTL_SEC', '3600'))
|
| 14 |
+
HTTP_TIMEOUT_SEC = float(os.environ.get(
|
| 15 |
+
'HTTP_TIMEOUT_SEC', str(getattr(core, 'AI_TIMEOUT_SEC', 120))))
|
| 16 |
SUPPORTED_MODES = {"lens_images", "lens_text"}
|
| 17 |
BUILD_ID = os.environ.get('TP_BUILD_ID', 'v9-backendfix-20260129')
|
| 18 |
+
TP_DEBUG = str(os.environ.get('TP_DEBUG', '')).strip(
|
| 19 |
+
).lower() in ('1', 'true', 'yes', 'on')
|
| 20 |
|
| 21 |
TP_PARA_MARKER_PREFIX = '<<TP_P'
|
| 22 |
TP_PARA_MARKER_SUFFIX = '>>'
|
|
|
|
| 32 |
_result_cache_lock = Lock()
|
| 33 |
_ai_cache_lock = Lock()
|
| 34 |
|
| 35 |
+
HF_AI_MAX_CONCURRENCY = max(
|
| 36 |
+
1, int(os.environ.get('HF_AI_MAX_CONCURRENCY', '1')))
|
| 37 |
+
HF_AI_MIN_INTERVAL_SEC = max(0.0, float(
|
| 38 |
+
os.environ.get('HF_AI_MIN_INTERVAL_SEC', '5')))
|
| 39 |
+
HF_AI_MAX_RETRIES = max(1, int(os.environ.get('HF_AI_MAX_RETRIES', '6')))
|
| 40 |
+
HF_AI_RETRY_BASE_SEC = max(0.2, float(
|
| 41 |
+
os.environ.get('HF_AI_RETRY_BASE_SEC', '2')))
|
| 42 |
+
_hf_ai_sem = Semaphore(HF_AI_MAX_CONCURRENCY)
|
| 43 |
+
_hf_ai_lock = Lock()
|
| 44 |
+
_hf_ai_last_ts = 0.0
|
| 45 |
+
_tp_marker_re = re.compile(r'<<TP_P\d+>>')
|
| 46 |
+
|
| 47 |
+
TP_ACCESS_LOG_MODE = (os.environ.get('TP_ACCESS_LOG_MODE', 'custom') or 'custom').strip().lower()
|
| 48 |
+
if TP_ACCESS_LOG_MODE in ('custom', 'tp', 'plain'):
|
| 49 |
+
try:
|
| 50 |
+
_uv = logging.getLogger('uvicorn.access')
|
| 51 |
+
_uv.disabled = True
|
| 52 |
+
_uv.propagate = False
|
| 53 |
+
_uv.setLevel(logging.CRITICAL)
|
| 54 |
+
except Exception:
|
| 55 |
+
pass
|
| 56 |
+
|
| 57 |
def _dbg(tag: str, data=None) -> None:
|
| 58 |
if not TP_DEBUG:
|
| 59 |
return
|
|
|
|
| 146 |
idx = _extract_marker_indices(ai_text_full)
|
| 147 |
if len(idx) >= expected_paras:
|
| 148 |
return False
|
| 149 |
+
|
| 150 |
if (TP_PARA_MARKER_PREFIX in (ai_text_full or '')) and (TP_PARA_MARKER_SUFFIX not in (ai_text_full or '')):
|
| 151 |
return True
|
| 152 |
return True
|
|
|
|
| 183 |
return hashlib.sha256(t.encode('utf-8')).hexdigest()[:12]
|
| 184 |
|
| 185 |
def _build_cache_key(img_hash: str, lang: str, mode: str, source: str, ai_cfg: Optional["AiConfig"]) -> str:
|
| 186 |
+
parts = [img_hash, _normalize_lang(
|
| 187 |
+
lang), (mode or '').strip(), (source or '').strip()]
|
| 188 |
if ai_cfg and (source or '').strip().lower() == 'ai':
|
| 189 |
parts.extend([
|
| 190 |
(ai_cfg.provider or '').strip(),
|
|
|
|
| 194 |
])
|
| 195 |
return '|'.join([p for p in parts if p is not None])
|
| 196 |
|
|
|
|
| 197 |
def _b64_to_bytes(b64: str) -> bytes:
|
| 198 |
pad = '=' * ((4 - (len(b64) % 4)) % 4)
|
| 199 |
return base64.b64decode(b64 + pad)
|
|
|
|
| 212 |
b64 = base64.b64encode(blob).decode('ascii')
|
| 213 |
return f"data:{mime};base64,{b64}"
|
| 214 |
|
| 215 |
+
def _download_bytes(url: str, referer: str = '') -> tuple[bytes, str]:
|
| 216 |
u = (url or '').strip()
|
| 217 |
if not u:
|
| 218 |
return b'', ''
|
| 219 |
+
headers = {
|
| 220 |
+
'user-agent': 'Mozilla/5.0 (TextPhantomOCR; +https://huggingface.co/spaces)',
|
| 221 |
+
}
|
| 222 |
+
ref = (referer or '').strip()
|
| 223 |
+
if ref:
|
| 224 |
+
headers['referer'] = ref
|
| 225 |
+
|
| 226 |
+
with httpx.Client(timeout=HTTP_TIMEOUT_SEC, follow_redirects=True, headers=headers) as client:
|
| 227 |
r = client.get(u)
|
| 228 |
r.raise_for_status()
|
| 229 |
ct = (r.headers.get('content-type') or '').split(';')[0].strip()
|
|
|
|
| 238 |
def _resolve_model(provider: str, model: str) -> str:
|
| 239 |
return core._resolve_model(provider, model)
|
| 240 |
|
| 241 |
+
def _has_meaningful_text(s: str) -> bool:
|
| 242 |
+
t = _tp_marker_re.sub('', str(s or ''))
|
| 243 |
+
return bool(t.strip())
|
| 244 |
+
|
| 245 |
+
def _is_hf_provider(provider: str, base_url: str) -> bool:
|
| 246 |
+
p = (provider or '').strip().lower()
|
| 247 |
+
b = (base_url or '').strip().lower()
|
| 248 |
+
return p == 'huggingface' or 'router.huggingface.co' in b
|
| 249 |
+
|
| 250 |
+
def _is_hf_rate_limited_error(msg: str) -> bool:
|
| 251 |
+
t = (msg or '').lower()
|
| 252 |
+
if 'rate limit' in t or 'ratelimit' in t or 'too many requests' in t:
|
| 253 |
+
return True
|
| 254 |
+
if 'http 429' in t or ' 429' in t:
|
| 255 |
+
return True
|
| 256 |
+
if 'http 503' in t or ' 503' in t or 'overloaded' in t or 'temporarily' in t:
|
| 257 |
+
return True
|
| 258 |
+
return False
|
| 259 |
+
|
| 260 |
+
def _hf_throttle_before_call() -> None:
|
| 261 |
+
if HF_AI_MIN_INTERVAL_SEC <= 0:
|
| 262 |
+
return
|
| 263 |
+
global _hf_ai_last_ts
|
| 264 |
+
with _hf_ai_lock:
|
| 265 |
+
now = _now()
|
| 266 |
+
dt = now - float(_hf_ai_last_ts or 0.0)
|
| 267 |
+
wait = HF_AI_MIN_INTERVAL_SEC - dt
|
| 268 |
+
if wait > 0:
|
| 269 |
+
time.sleep(wait)
|
| 270 |
+
_hf_ai_last_ts = _now()
|
| 271 |
+
|
| 272 |
+
def _openai_compat_generate_with_hf_backoff(api_key: str, base_url: str, model: str, system_text: str, user_parts: List[str]):
|
| 273 |
+
last_err: Optional[Exception] = None
|
| 274 |
+
for attempt in range(int(HF_AI_MAX_RETRIES)):
|
| 275 |
+
try:
|
| 276 |
+
with _hf_ai_sem:
|
| 277 |
+
_hf_throttle_before_call()
|
| 278 |
+
return core._openai_compat_generate_json(api_key, base_url, model, system_text, user_parts)
|
| 279 |
+
except Exception as e:
|
| 280 |
+
last_err = e
|
| 281 |
+
if not _is_hf_rate_limited_error(str(e)):
|
| 282 |
+
raise
|
| 283 |
+
delay = min(15.0, max(float(HF_AI_MIN_INTERVAL_SEC), float(
|
| 284 |
+
HF_AI_RETRY_BASE_SEC) * (2 ** min(attempt, 4))))
|
| 285 |
+
_dbg('ai.hf.backoff', {
|
| 286 |
+
'attempt': attempt + 1, 'delay_sec': round(delay, 2), 'err': str(e)[:240]})
|
| 287 |
+
time.sleep(delay)
|
| 288 |
+
continue
|
| 289 |
+
if last_err is not None:
|
| 290 |
+
raise last_err
|
| 291 |
+
raise Exception('hf_backoff_failed')
|
| 292 |
+
|
| 293 |
def _normalize_lang(lang: str) -> str:
|
| 294 |
return core._normalize_lang(lang)
|
| 295 |
|
|
|
|
| 308 |
t = str(marked_text or "")
|
| 309 |
if not t:
|
| 310 |
return ""
|
| 311 |
+
t = t.replace("\r\n", "\n").replace("\r", "\n")
|
| 312 |
+
t = re.sub(r"<<TP_P(?!\d+>>)[^\s>]*>?", "", t)
|
| 313 |
+
t = re.sub(r"(?m)^\s*(<<TP_P\d+>>)\s*(\S)", r"\1\n\2", t)
|
| 314 |
+
|
| 315 |
+
lines = t.split("\n")
|
| 316 |
+
out0: List[str] = []
|
| 317 |
+
for line in lines:
|
| 318 |
+
if "<<TP_P" not in line:
|
| 319 |
+
out0.append(line)
|
| 320 |
+
continue
|
| 321 |
+
m = re.match(r"^\s*(<<TP_P\d+>>)\s*$", line)
|
| 322 |
+
if m:
|
| 323 |
+
out0.append(m.group(1))
|
| 324 |
+
continue
|
| 325 |
+
m2 = re.match(r"^\s*(<<TP_P\d+>>)\s*(.*)$", line)
|
| 326 |
+
if m2:
|
| 327 |
+
out0.append(m2.group(1))
|
| 328 |
+
rest = (m2.group(2) or "").strip()
|
| 329 |
+
if rest:
|
| 330 |
+
out0.append(rest)
|
| 331 |
+
continue
|
| 332 |
+
out0.append(re.sub(r"<<TP_P\d+>>", "", line))
|
| 333 |
+
t = "\n".join(out0)
|
| 334 |
+
|
| 335 |
+
indices = sorted(_extract_marker_indices(t))
|
| 336 |
if not indices:
|
| 337 |
return _collapse_ws(t)
|
| 338 |
out_lines: List[str] = []
|
|
|
|
| 347 |
out_lines.append("")
|
| 348 |
return "\n".join(out_lines).strip("\n")
|
| 349 |
|
| 350 |
+
|
| 351 |
+
def _has_complete_marker_sequence(ai_text_full: str, expected_paras: int) -> bool:
|
| 352 |
+
if expected_paras <= 0:
|
| 353 |
+
return True
|
| 354 |
+
t = str(ai_text_full or "")
|
| 355 |
+
need = list(range(int(expected_paras)))
|
| 356 |
+
idx = sorted(_extract_marker_indices(t))
|
| 357 |
+
if len(idx) < len(need):
|
| 358 |
+
return False
|
| 359 |
+
if idx[:len(need)] != need:
|
| 360 |
+
return False
|
| 361 |
+
last = -1
|
| 362 |
+
for i in need:
|
| 363 |
+
m = f"<<TP_P{i}>>"
|
| 364 |
+
p = t.find(m)
|
| 365 |
+
if p < 0 or p <= last:
|
| 366 |
+
return False
|
| 367 |
+
last = p
|
| 368 |
+
return True
|
| 369 |
+
|
| 370 |
def _build_ai_prompt_packet_custom(target_lang: str, original_text_full: str, prompt_editable: str, is_retry: bool = False) -> tuple[str, List[str]]:
|
| 371 |
lang = _normalize_lang(target_lang)
|
| 372 |
style_prompt = (prompt_editable or "").strip()
|
| 373 |
if not style_prompt:
|
| 374 |
+
style_prompt = (
|
| 375 |
+
getattr(core, "ai_prompt_user_default",
|
| 376 |
+
lambda _l, _m=None: "")(lang)
|
| 377 |
+
or ""
|
| 378 |
+
).strip()
|
| 379 |
+
|
| 380 |
+
base = (getattr(core, "AI_PROMPT_SYSTEM_BASE", "") or "").strip()
|
| 381 |
+
style = (
|
| 382 |
+
(getattr(core, "AI_LANG_STYLE", {}) or {}).get(lang)
|
| 383 |
+
or (getattr(core, "AI_LANG_STYLE", {}) or {}).get("default")
|
| 384 |
+
or ""
|
| 385 |
+
).strip()
|
| 386 |
+
|
| 387 |
+
contract_parts: List[str] = [
|
| 388 |
+
"Follow the user's StylePrompt as hard constraints (unless it would break marker rules).",
|
| 389 |
+
"Output ONLY the translated text (no JSON, no markdown, no extra commentary).",
|
| 390 |
+
"Markers: Keep every paragraph marker like <<TP_P0>> unchanged and in order. Do not remove, rename, or add markers.",
|
| 391 |
+
"For each marker, output the marker followed by that paragraph's translated text.",
|
| 392 |
]
|
| 393 |
if is_retry:
|
| 394 |
+
contract_parts.append(
|
| 395 |
+
"Retry: You MUST output ALL markers from the first to the last marker in the input."
|
| 396 |
)
|
|
|
|
| 397 |
|
| 398 |
+
system_text = "\n\n".join(
|
| 399 |
+
[p for p in [base, style, "\n".join(contract_parts)] if p])
|
|
|
|
|
|
|
|
|
|
| 400 |
|
| 401 |
+
user_parts: List[str] = []
|
| 402 |
+
if style_prompt:
|
| 403 |
+
user_parts.append("StylePrompt:\n" + style_prompt)
|
| 404 |
+
user_parts.append("Input:\n" + str(original_text_full or ""))
|
| 405 |
+
return system_text, user_parts
|
| 406 |
|
| 407 |
def ai_translate_text(original_text_full: str, target_lang: str, ai: AiConfig, is_retry: bool = False) -> dict:
|
| 408 |
+
if not _has_meaningful_text(original_text_full):
|
| 409 |
+
return {
|
| 410 |
+
'aiTextFull': '',
|
| 411 |
+
'meta': {
|
| 412 |
+
'skipped': True,
|
| 413 |
+
'skipped_reason': 'no_text',
|
| 414 |
+
},
|
| 415 |
+
}
|
| 416 |
+
|
| 417 |
api_key = (ai.api_key or '').strip()
|
| 418 |
if not api_key:
|
| 419 |
raise Exception('AI api_key is required')
|
|
|
|
| 436 |
'base_url') or 'https://api.openai.com/v1'
|
| 437 |
|
| 438 |
system_text, user_parts = _build_ai_prompt_packet_custom(
|
| 439 |
+
target_lang, original_text_full, ai.prompt_editable, is_retry=is_retry
|
| 440 |
+
)
|
| 441 |
|
| 442 |
started = _now()
|
| 443 |
used_model = model
|
|
|
|
| 448 |
raw = core._anthropic_generate_json(
|
| 449 |
api_key, model, system_text, user_parts)
|
| 450 |
else:
|
| 451 |
+
if _is_hf_provider(provider, base_url):
|
| 452 |
+
raw, used_model = _openai_compat_generate_with_hf_backoff(
|
| 453 |
+
api_key, base_url, model, system_text, user_parts)
|
| 454 |
+
else:
|
| 455 |
+
raw, used_model = core._openai_compat_generate_json(
|
| 456 |
+
api_key, base_url, model, system_text, user_parts)
|
| 457 |
|
| 458 |
ai_text_full = core._parse_ai_textfull_only(
|
| 459 |
raw) if core.DO_AI_JSON else core._parse_ai_textfull_text_only(raw)
|
|
|
|
| 630 |
src_paras = _tree_to_paragraph_texts(original_tree or {})
|
| 631 |
src_text = _apply_para_markers(src_paras) if src_paras else str(
|
| 632 |
out.get('originalTextFull') or '')
|
| 633 |
+
if not _has_meaningful_text(src_text):
|
| 634 |
+
out['AiTextFull'] = ''
|
| 635 |
+
out['Ai'] = {
|
| 636 |
+
'meta': {
|
| 637 |
+
'skipped': True,
|
| 638 |
+
'skipped_reason': 'no_text',
|
| 639 |
+
}
|
| 640 |
+
}
|
| 641 |
+
else:
|
| 642 |
+
ai = ai_translate_text(src_text, target_lang, ai_cfg)
|
| 643 |
+
if src_paras and _needs_ai_retry(str(ai.get('aiTextFull') or ''), len(src_paras)):
|
| 644 |
+
_dbg('ai.retry', {
|
| 645 |
+
'expected_paras': len(src_paras),
|
| 646 |
+
'found_markers': len(_extract_marker_indices(str(ai.get('aiTextFull') or ''))),
|
| 647 |
+
})
|
| 648 |
+
retry_paras = [_clamp_runaway_repeats(p) for p in src_paras]
|
| 649 |
+
retry_text = _apply_para_markers(retry_paras) or src_text
|
| 650 |
+
ai = ai_translate_text(
|
| 651 |
+
retry_text, target_lang, ai_cfg, is_retry=True)
|
| 652 |
+
|
| 653 |
+
ai_text_full = str(ai.get('aiTextFull') or '')
|
| 654 |
+
meta0 = ai.get('meta') or {}
|
| 655 |
+
if src_paras:
|
| 656 |
+
expected = len(src_paras)
|
| 657 |
+
if not _has_complete_marker_sequence(ai_text_full, expected):
|
| 658 |
+
fallback_paras = _tree_to_paragraph_texts(translated_tree or {})
|
| 659 |
+
if len(fallback_paras) < expected:
|
| 660 |
+
fallback_paras = (fallback_paras + src_paras)[:expected]
|
| 661 |
+
else:
|
| 662 |
+
fallback_paras = fallback_paras[:expected]
|
| 663 |
+
|
| 664 |
+
found = sorted(_extract_marker_indices(ai_text_full))
|
| 665 |
+
seg_map: Dict[int, str] = {}
|
| 666 |
+
for idx in found:
|
| 667 |
+
if idx < 0 or idx >= expected:
|
| 668 |
+
continue
|
| 669 |
+
marker = f"<<TP_P{idx}>>"
|
| 670 |
+
m = re.search(rf"{re.escape(marker)}\s*([\s\S]*?)(?=<<TP_P\d+>>|\Z)", ai_text_full)
|
| 671 |
+
seg = _collapse_ws(m.group(1) if m else '')
|
| 672 |
+
if seg and idx not in seg_map:
|
| 673 |
+
seg_map[idx] = seg
|
| 674 |
+
|
| 675 |
+
missing = 0
|
| 676 |
+
out_lines: List[str] = []
|
| 677 |
+
for i in range(expected):
|
| 678 |
+
seg = seg_map.get(i) or _collapse_ws(fallback_paras[i] if i < len(fallback_paras) else '')
|
| 679 |
+
if not seg_map.get(i):
|
| 680 |
+
missing += 1
|
| 681 |
+
out_lines.append(f"<<TP_P{i}>>")
|
| 682 |
+
out_lines.append(seg)
|
| 683 |
+
out_lines.append('')
|
| 684 |
+
ai_text_full = "\n".join(out_lines).strip("\n")
|
| 685 |
+
_dbg('ai.marker.repaired', {
|
| 686 |
+
'expected_paras': expected,
|
| 687 |
+
'found_markers': len(seg_map),
|
| 688 |
+
'missing': missing,
|
| 689 |
+
})
|
| 690 |
+
|
| 691 |
+
meta0 = {
|
| 692 |
+
**meta0,
|
| 693 |
+
'marker_repaired': True,
|
| 694 |
+
'marker_expected': expected,
|
| 695 |
+
'marker_found': len(seg_map),
|
| 696 |
+
'marker_missing': missing,
|
| 697 |
+
}
|
| 698 |
+
|
| 699 |
+
template_tree = _pick_ai_template_tree()
|
| 700 |
+
_dbg('ai.template.pick', {
|
| 701 |
+
'score_original': _tree_score(original_tree),
|
| 702 |
+
'score_translated': _tree_score(translated_tree),
|
| 703 |
+
'picked': 'original' if template_tree is original_tree else ('translated' if template_tree is translated_tree else 'none'),
|
| 704 |
})
|
| 705 |
+
if not isinstance(template_tree, dict):
|
| 706 |
+
template_tree = original_tree if isinstance(original_tree, dict) else (
|
| 707 |
+
translated_tree if isinstance(translated_tree, dict) else {})
|
| 708 |
+
patched = core.patch(
|
| 709 |
+
{'Ai': {'aiTextFull': str(
|
| 710 |
+
ai_text_full or ''), 'aiTree': template_tree}},
|
| 711 |
+
W,
|
| 712 |
+
H,
|
| 713 |
+
thai_font or '',
|
| 714 |
+
latin_font or '',
|
| 715 |
+
lang=target_lang,
|
| 716 |
+
)
|
| 717 |
+
ai_tree = (patched.get('Ai') or {}).get('aiTree') or {}
|
| 718 |
+
_dbg('ai.patched', {
|
| 719 |
+
'ai_text_len': len(ai_text_full),
|
| 720 |
+
'stats_ai': _tree_stats(ai_tree),
|
| 721 |
+
'stats_original': _tree_stats(original_tree or {}),
|
| 722 |
+
'stats_translated': _tree_stats(translated_tree or {}),
|
| 723 |
+
'mode': mode_id,
|
| 724 |
+
'lang': target_lang,
|
| 725 |
+
})
|
| 726 |
+
|
| 727 |
+
shared_para_sizes = core._compute_shared_para_sizes(
|
| 728 |
+
[original_tree or {}, translated_tree or {}, ai_tree or {}],
|
| 729 |
+
thai_font or '',
|
| 730 |
+
latin_font or '',
|
| 731 |
+
W,
|
| 732 |
+
H,
|
| 733 |
+
)
|
| 734 |
+
core._apply_para_font_size(original_tree or {}, shared_para_sizes)
|
| 735 |
+
core._apply_para_font_size(
|
| 736 |
+
translated_tree or {}, shared_para_sizes)
|
| 737 |
+
core._apply_para_font_size(ai_tree or {}, shared_para_sizes)
|
| 738 |
+
core._rebuild_ai_spans_after_font_resize(
|
| 739 |
+
ai_tree or {}, W, H, thai_font or '', latin_font or '', lang=target_lang)
|
| 740 |
+
|
| 741 |
+
out['AiTextFull'] = ai_text_full
|
| 742 |
+
out['Ai'] = {
|
| 743 |
+
'aiTextFull': ai_text_full,
|
| 744 |
+
'aiTree': ai_tree,
|
| 745 |
+
'meta': meta0,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 746 |
}
|
| 747 |
+
if getattr(core, 'DO_AI_HTML', True):
|
| 748 |
+
core.fit_tree_font_sizes_for_tp_html(
|
| 749 |
+
ai_tree, thai_font or '', latin_font or '', W, H)
|
| 750 |
+
out['Ai']['aihtml'] = core.ai_tree_to_tp_html(ai_tree, W, H)
|
| 751 |
+
out['Ai']['aihtmlMeta'] = {
|
| 752 |
+
'baseW': int(W),
|
| 753 |
+
'baseH': int(H),
|
| 754 |
+
'format': 'tp',
|
| 755 |
+
}
|
| 756 |
|
| 757 |
if getattr(core, 'DO_ORIGINAL', True) and getattr(core, 'DO_ORIGINAL_HTML', True) and isinstance(original_tree, dict):
|
| 758 |
core.fit_tree_font_sizes_for_tp_html(
|
|
|
|
| 791 |
allow_headers=['*'],
|
| 792 |
)
|
| 793 |
|
| 794 |
+
@app.middleware("http")
|
| 795 |
+
async def _tp_access_log(request: Request, call_next):
|
| 796 |
+
resp = await call_next(request)
|
| 797 |
+
if TP_ACCESS_LOG_MODE in ('uvicorn', 'off', 'none'):
|
| 798 |
+
return resp
|
| 799 |
+
try:
|
| 800 |
+
path = request.url.path
|
| 801 |
+
if request.method == 'GET' and path.startswith("/translate/"):
|
| 802 |
+
client = request.client
|
| 803 |
+
host = client.host if client else "-"
|
| 804 |
+
port = client.port if client else 0
|
| 805 |
+
ver = request.scope.get("http_version") or "1.1"
|
| 806 |
+
phrase = HTTPStatus(resp.status_code).phrase
|
| 807 |
+
print(f'{host}:{port} - "{request.method} {path} HTTP/{ver}" {resp.status_code} {phrase}', flush=True)
|
| 808 |
+
except Exception:
|
| 809 |
+
pass
|
| 810 |
+
return resp
|
| 811 |
+
|
| 812 |
async def _cleanup_jobs_loop():
|
| 813 |
while True:
|
| 814 |
await asyncio.sleep(60)
|
|
|
|
| 835 |
mode = (payload.get('mode') or 'lens_images')
|
| 836 |
lang = (payload.get('lang') or 'en')
|
| 837 |
|
| 838 |
+
context = payload.get('context') if isinstance(
|
| 839 |
+
payload.get('context'), dict) else {}
|
| 840 |
+
page_url = str((context or {}).get('page_url') or '').strip()
|
| 841 |
+
|
| 842 |
src = (payload.get('src') or '').strip()
|
| 843 |
img_bytes = b''
|
| 844 |
mime = ''
|
|
|
|
| 848 |
elif src.startswith('data:'):
|
| 849 |
img_bytes, mime = _datauri_to_bytes(src)
|
| 850 |
else:
|
| 851 |
+
img_bytes, mime = _download_bytes(src, page_url)
|
| 852 |
|
| 853 |
t_img = time.perf_counter()
|
| 854 |
|
|
|
|
| 874 |
img_hash = _sha256_hex(img_bytes)
|
| 875 |
cache_key = ''
|
| 876 |
if mode == 'lens_text' and img_hash:
|
| 877 |
+
cache_source = 'ai' if source == 'ai' else 'text'
|
| 878 |
+
cache_key = _build_cache_key(
|
| 879 |
+
img_hash, lang, mode, cache_source, ai_cfg)
|
| 880 |
cached = None
|
| 881 |
if source == 'ai':
|
| 882 |
cached = _lru_get(_ai_result_cache, _ai_cache_lock, cache_key)
|
|
|
|
| 905 |
}
|
| 906 |
if cache_key and isinstance(out, dict):
|
| 907 |
if source == 'ai':
|
| 908 |
+
_lru_set(_ai_result_cache, _ai_cache_lock,
|
| 909 |
+
cache_key, out, TP_AI_RESULT_CACHE_MAX)
|
| 910 |
else:
|
| 911 |
+
_lru_set(_result_cache, _result_cache_lock,
|
| 912 |
+
cache_key, out, TP_RESULT_CACHE_MAX)
|
| 913 |
return out
|
| 914 |
finally:
|
| 915 |
try:
|
|
|
|
| 953 |
@app.post('/translate')
|
| 954 |
async def translate(payload: Dict[str, Any]):
|
| 955 |
jid = str(uuid.uuid4())
|
| 956 |
+
_dbg('rest.enqueue', {
|
| 957 |
+
'id': jid,
|
| 958 |
+
'mode': str(payload.get('mode') or ''),
|
| 959 |
+
'lang': str(payload.get('lang') or ''),
|
| 960 |
+
'source': str(payload.get('source') or ''),
|
| 961 |
+
'has_datauri': bool(payload.get('imageDataUri')),
|
| 962 |
+
'has_src': bool(payload.get('src')),
|
| 963 |
+
})
|
| 964 |
_jobs[jid] = {'status': 'queued', 'ts': _now()}
|
| 965 |
await _job_queue.put((jid, payload))
|
| 966 |
return {'id': jid}
|
|
|
|
| 1030 |
if provider == 'huggingface' and not models:
|
| 1031 |
models = [
|
| 1032 |
'google/gemma-3-27b-it:featherless-a',
|
| 1033 |
+
'google/gemma-3-27b-it',
|
| 1034 |
+
'google/gemma-2-2b-it',
|
| 1035 |
+
'google/gemma-2-9b-it',
|
| 1036 |
]
|
| 1037 |
|
| 1038 |
+
if provider != 'huggingface' and not models:
|
| 1039 |
fallback_models: List[str] = []
|
| 1040 |
preset_model = str(preset.get('model') or '').strip()
|
| 1041 |
if preset_model:
|
|
|
|
| 1073 |
key=str.lower,
|
| 1074 |
)
|
| 1075 |
|
| 1076 |
+
if models and resolved_model not in models:
|
| 1077 |
resolved_model = models[0]
|
| 1078 |
|
|
|
|
|
|
|
| 1079 |
prompt_default = (getattr(core, 'ai_prompt_user_default',
|
| 1080 |
+
lambda _l: '')(lang) or '').strip()
|
| 1081 |
|
| 1082 |
return {
|
| 1083 |
'ok': True,
|
|
|
|
| 1092 |
@app.get('/ai/prompt/default')
|
| 1093 |
async def ai_prompt_default(lang: str = 'en'):
|
| 1094 |
l = _normalize_lang(lang)
|
| 1095 |
+
base = (getattr(core, 'AI_PROMPT_SYSTEM_BASE', '') or '').strip()
|
| 1096 |
+
style = (getattr(core, 'AI_LANG_STYLE', {}) or {}).get(l) or (
|
| 1097 |
+
getattr(core, 'AI_LANG_STYLE', {}) or {}).get('default') or ''
|
| 1098 |
+
style = (style or '').strip()
|
| 1099 |
+
contract = "\n".join([
|
| 1100 |
+
'Return ONLY valid JSON (no markdown, no extra text).',
|
| 1101 |
+
'Output JSON MUST have exactly one key: "aiTextFull".',
|
| 1102 |
+
'Schema example: {"aiTextFull":"..."}',
|
| 1103 |
+
'Markers: Keep every paragraph marker like <<TP_P0>> unchanged and in order. Do not remove or add markers.',
|
| 1104 |
+
"aiTextFull must include all markers, each followed by that paragraph's translated text.",
|
| 1105 |
+
])
|
| 1106 |
+
system_text = "\n\n".join([p for p in [base, style, contract] if p])
|
| 1107 |
return {
|
| 1108 |
'ok': True,
|
| 1109 |
'lang': l,
|
| 1110 |
+
'prompt_editable_default': (getattr(core, 'ai_prompt_user_default', lambda _l: '')(l) or '').strip(),
|
| 1111 |
+
'lang_style': style,
|
| 1112 |
+
'system_base': base,
|
| 1113 |
+
'contract': contract,
|
| 1114 |
+
'system_text': system_text,
|
| 1115 |
}
|
| 1116 |
|
| 1117 |
@app.websocket('/ws')
|
|
|
|
| 1126 |
continue
|
| 1127 |
jid = str(data.get('id') or '')
|
| 1128 |
payload = data.get('payload') or {}
|
| 1129 |
+
_dbg('ws.job', {
|
| 1130 |
+
'id': jid,
|
| 1131 |
+
'mode': str(payload.get('mode') or ''),
|
| 1132 |
+
'lang': str(payload.get('lang') or ''),
|
| 1133 |
+
'source': str(payload.get('source') or ''),
|
| 1134 |
+
'has_datauri': bool(payload.get('imageDataUri')),
|
| 1135 |
+
'has_src': bool(payload.get('src')),
|
| 1136 |
+
})
|
| 1137 |
try:
|
| 1138 |
result = await asyncio.to_thread(_process_payload, payload)
|
| 1139 |
try:
|