plan291037 commited on
Commit
e645cd4
·
verified ·
1 Parent(s): ebe8bfa

Upload 7 files

Browse files
.dockerignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ fontend/
Dockerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ RUN apt-get update \
6
+ && apt-get install -y --no-install-recommends libgl1 libglib2.0-0 \
7
+ && rm -rf /var/lib/apt/lists/*
8
+
9
+ COPY requirements.txt ./requirements.txt
10
+ RUN pip install --no-cache-dir -r requirements.txt
11
+
12
+ COPY backend ./backend
13
+
14
+ ENV PORT=7860
15
+ EXPOSE 7860
16
+
17
+ CMD ["bash", "-lc", "uvicorn backend.server:app --host 0.0.0.0 --port ${PORT}"]
README.md CHANGED
@@ -1,10 +1,31 @@
1
  ---
2
- title: TextPhantom OCR API2
3
- emoji: 🐠
4
- colorFrom: purple
5
- colorTo: indigo
6
  sdk: docker
 
7
  pinned: false
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: TextPhantom OCR API
3
+ emoji: "🪄"
4
+ colorFrom: indigo
5
+ colorTo: pink
6
  sdk: docker
7
+ app_port: 7860
8
  pinned: false
9
  ---
10
 
11
+ FastAPI backend for TextPhantom (Chrome extension).
12
+
13
+ ## Endpoints
14
+
15
+ - `GET /health`
16
+ - `GET /version`
17
+ - `GET /warmup?lang=th`
18
+ - `POST /translate`
19
+ - `GET /translate/{job_id}`
20
+ - `POST /ai/resolve`
21
+ - `GET /ai/models`
22
+ - `POST /ws` (WebSocket)
23
+
24
+ ## Environment
25
+
26
+ - `AI_API_KEY` (optional)
27
+ - `SERVER_MAX_WORKERS` (default: 15)
28
+ - `JOB_TTL_SEC` (default: 3600)
29
+ - `TP_DEBUG` (set to `1` to enable debug logs)
30
+
31
+ For Spaces on small CPU, setting `SERVER_MAX_WORKERS=2` is usually enough.
backend/__init__.py ADDED
File without changes
backend/lens_core.py ADDED
The diff for this file is too large to render. See raw diff
 
backend/server.py ADDED
@@ -0,0 +1,911 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio, base64, copy, hashlib, io, json, os, re, tempfile, time, uuid, httpx
2
+
3
+ from backend import lens_core as core
4
+
5
+ from collections import OrderedDict
6
+ from threading import Lock
7
+
8
+ from dataclasses import dataclass
9
+ from typing import Any, Dict, List, Optional
10
+ from fastapi import FastAPI, WebSocket, WebSocketDisconnect
11
+ from fastapi.middleware.cors import CORSMiddleware
12
+
13
+ SERVER_MAX_WORKERS = int(os.environ.get('SERVER_MAX_WORKERS', '15'))
14
+ JOB_TTL_SEC = int(os.environ.get('JOB_TTL_SEC', '3600'))
15
+ HTTP_TIMEOUT_SEC = float(os.environ.get('HTTP_TIMEOUT_SEC', str(getattr(core, 'AI_TIMEOUT_SEC', 120))))
16
+ SUPPORTED_MODES = {"lens_images", "lens_text"}
17
+ BUILD_ID = os.environ.get('TP_BUILD_ID', 'v9-backendfix-20260129')
18
+ TP_DEBUG = str(os.environ.get('TP_DEBUG', '')).strip().lower() in ('1', 'true', 'yes', 'on')
19
+
20
+ TP_PARA_MARKER_PREFIX = '<<TP_P'
21
+ TP_PARA_MARKER_SUFFIX = '>>'
22
+
23
+ TP_RESULT_CACHE_MAX = int(os.environ.get('TP_RESULT_CACHE_MAX', '24'))
24
+ TP_AI_RESULT_CACHE_MAX = int(os.environ.get('TP_AI_RESULT_CACHE_MAX', '16'))
25
+ TP_WARMUP_LANG = (os.environ.get('TP_WARMUP_LANG', 'th') or 'th').strip()
26
+
27
+ _result_cache: OrderedDict[str, Dict[str, Any]] = OrderedDict()
28
+ _ai_result_cache: OrderedDict[str, Dict[str, Any]] = OrderedDict()
29
+ _jobs: Dict[str, Dict[str, Any]] = {}
30
+ _job_queue: asyncio.Queue = asyncio.Queue()
31
+ _result_cache_lock = Lock()
32
+ _ai_cache_lock = Lock()
33
+
34
+ def _dbg(tag: str, data=None) -> None:
35
+ if not TP_DEBUG:
36
+ return
37
+ try:
38
+ if data is None:
39
+ print(f'[TextPhantom][dbg] {tag}')
40
+ else:
41
+ s = json.dumps(data, ensure_ascii=False)
42
+ if len(s) > 2000:
43
+ s = s[:2000] + '…'
44
+ print(f'[TextPhantom][dbg] {tag} {s}')
45
+ except Exception:
46
+ try:
47
+ print(f'[TextPhantom][dbg] {tag} {data}')
48
+ except Exception:
49
+ pass
50
+
51
+ def _tree_stats(tree) -> dict:
52
+ if not isinstance(tree, dict):
53
+ return {'paras': 0, 'items': 0, 'spans': 0}
54
+ paras = tree.get('paragraphs') or []
55
+ if not isinstance(paras, list):
56
+ return {'paras': 0, 'items': 0, 'spans': 0}
57
+ items = 0
58
+ spans = 0
59
+ for p in paras:
60
+ if not isinstance(p, dict):
61
+ continue
62
+ its = p.get('items') or []
63
+ if not isinstance(its, list):
64
+ continue
65
+ items += len(its)
66
+ for it in its:
67
+ if not isinstance(it, dict):
68
+ continue
69
+ sp = it.get('spans') or []
70
+ if isinstance(sp, list):
71
+ spans += len(sp)
72
+ return {'paras': len(paras), 'items': items, 'spans': spans}
73
+
74
+ def _tree_to_paragraph_texts(tree: Any) -> List[str]:
75
+ if not isinstance(tree, dict):
76
+ return []
77
+ paras = tree.get('paragraphs') or []
78
+ if not isinstance(paras, list) or not paras:
79
+ return []
80
+ out: List[str] = []
81
+ for p in paras:
82
+ if not isinstance(p, dict):
83
+ out.append('')
84
+ continue
85
+ t = str(p.get('text') or '').strip()
86
+ if not t:
87
+ items = p.get('items') or []
88
+ if isinstance(items, list) and items:
89
+ t = ' '.join(str(it.get('text') or '').strip() for it in items if isinstance(
90
+ it, dict) and str(it.get('text') or '').strip())
91
+ out.append(t)
92
+ return out
93
+
94
+ def _apply_para_markers(paras: List[str]) -> str:
95
+ if not paras:
96
+ return ''
97
+ parts: List[str] = []
98
+ for i, t in enumerate(paras):
99
+ parts.append(
100
+ f"{TP_PARA_MARKER_PREFIX}{i}{TP_PARA_MARKER_SUFFIX}\n{(t or '').strip()}")
101
+ return '\n\n'.join(parts)
102
+
103
+ def _clamp_runaway_repeats(s: str, max_repeat: int = 12) -> str:
104
+ if not s:
105
+ return ''
106
+ pat = re.compile(r"(.)\1{" + str(max_repeat) + r",}")
107
+ return pat.sub(lambda m: m.group(1) * max_repeat, s)
108
+
109
+ def _extract_marker_indices(s: str) -> set[int]:
110
+ if not s:
111
+ return set()
112
+ out: set[int] = set()
113
+ for m in re.finditer(r"<<TP_P(\d+)>>", s):
114
+ try:
115
+ out.add(int(m.group(1)))
116
+ except Exception:
117
+ continue
118
+ return out
119
+
120
+ def _needs_ai_retry(ai_text_full: str, expected_paras: int) -> bool:
121
+ if expected_paras <= 0:
122
+ return False
123
+ idx = _extract_marker_indices(ai_text_full)
124
+ if len(idx) >= expected_paras:
125
+ return False
126
+
127
+ if (TP_PARA_MARKER_PREFIX in (ai_text_full or '')) and (TP_PARA_MARKER_SUFFIX not in (ai_text_full or '')):
128
+ return True
129
+ return True
130
+
131
+ def _now() -> float:
132
+ return time.time()
133
+
134
+ def _lru_get(cache: OrderedDict, lock: Lock, key: str) -> Optional[Dict[str, Any]]:
135
+ if not key:
136
+ return None
137
+ with lock:
138
+ v = cache.get(key)
139
+ if v is None:
140
+ return None
141
+ cache.move_to_end(key)
142
+ return copy.deepcopy(v)
143
+
144
+ def _lru_set(cache: OrderedDict, lock: Lock, key: str, value: Dict[str, Any], max_items: int) -> None:
145
+ if not key or not isinstance(value, dict) or max_items <= 0:
146
+ return
147
+ with lock:
148
+ cache[key] = copy.deepcopy(value)
149
+ cache.move_to_end(key)
150
+ while len(cache) > max_items:
151
+ cache.popitem(last=False)
152
+
153
+ def _sha256_hex(blob: bytes) -> str:
154
+ return hashlib.sha256(blob).hexdigest() if blob else ''
155
+
156
+ def _ai_prompt_sig(s: str) -> str:
157
+ t = (s or '').strip()
158
+ if not t:
159
+ return ''
160
+ return hashlib.sha256(t.encode('utf-8')).hexdigest()[:12]
161
+
162
+ def _build_cache_key(img_hash: str, lang: str, mode: str, source: str, ai_cfg: Optional["AiConfig"]) -> str:
163
+ parts = [img_hash, _normalize_lang(lang), (mode or '').strip(), (source or '').strip()]
164
+ if ai_cfg and (source or '').strip().lower() == 'ai':
165
+ parts.extend([
166
+ (ai_cfg.provider or '').strip(),
167
+ (ai_cfg.model or '').strip(),
168
+ (ai_cfg.base_url or '').strip(),
169
+ _ai_prompt_sig(ai_cfg.prompt_editable),
170
+ ])
171
+ return '|'.join([p for p in parts if p is not None])
172
+
173
+
174
+ def _b64_to_bytes(b64: str) -> bytes:
175
+ pad = '=' * ((4 - (len(b64) % 4)) % 4)
176
+ return base64.b64decode(b64 + pad)
177
+
178
+ def _datauri_to_bytes(data_uri: str) -> tuple[bytes, str]:
179
+ s = (data_uri or '').strip()
180
+ if not s.startswith('data:'):
181
+ return b'', ''
182
+ head, _, b64 = s.partition(',')
183
+ mime = ''
184
+ if ';' in head:
185
+ mime = head[5:head.index(';')]
186
+ return _b64_to_bytes(b64), mime or 'application/octet-stream'
187
+
188
+ def _bytes_to_datauri(blob: bytes, mime: str) -> str:
189
+ b64 = base64.b64encode(blob).decode('ascii')
190
+ return f"data:{mime};base64,{b64}"
191
+
192
+ def _download_bytes(url: str) -> tuple[bytes, str]:
193
+ u = (url or '').strip()
194
+ if not u:
195
+ return b'', ''
196
+ with httpx.Client(timeout=HTTP_TIMEOUT_SEC, follow_redirects=True) as client:
197
+ r = client.get(u)
198
+ r.raise_for_status()
199
+ ct = (r.headers.get('content-type') or '').split(';')[0].strip()
200
+ return r.content, ct
201
+
202
+ def _detect_provider_from_key(api_key: str) -> str:
203
+ return core._canonical_provider(core._detect_ai_provider_from_key(api_key))
204
+
205
+ def _resolve_provider_defaults(provider: str) -> dict:
206
+ return (getattr(core, 'AI_PROVIDER_DEFAULTS', {}) or {}).get(provider, {})
207
+
208
+ def _resolve_model(provider: str, model: str) -> str:
209
+ return core._resolve_model(provider, model)
210
+
211
+ def _normalize_lang(lang: str) -> str:
212
+ return core._normalize_lang(lang)
213
+
214
+ @dataclass
215
+ class AiConfig:
216
+ api_key: str
217
+ model: str = 'auto'
218
+ provider: str = 'auto'
219
+ base_url: str = 'auto'
220
+ prompt_editable: str = ''
221
+
222
+ def _collapse_ws(text: str) -> str:
223
+ return re.sub(r"\s+", " ", str(text or "")).strip()
224
+
225
+ def _sanitize_marked_text(marked_text: str) -> str:
226
+ t = str(marked_text or "")
227
+ if not t:
228
+ return ""
229
+ indices = _extract_marker_indices(t)
230
+ if not indices:
231
+ return _collapse_ws(t)
232
+ out_lines: List[str] = []
233
+ for idx in indices:
234
+ marker = f"<<TP_P{idx}>>"
235
+ m = re.search(
236
+ rf"{re.escape(marker)}\s*([\s\S]*?)(?=<<TP_P\d+>>|\Z)", t)
237
+ seg = m.group(1) if m else ""
238
+ seg = _collapse_ws(seg)
239
+ out_lines.append(marker)
240
+ out_lines.append(seg)
241
+ out_lines.append("")
242
+ return "\n".join(out_lines).strip("\n")
243
+
244
+ def _build_ai_prompt_packet_custom(target_lang: str, original_text_full: str, prompt_editable: str, is_retry: bool = False) -> tuple[str, List[str]]:
245
+ lang = _normalize_lang(target_lang)
246
+ style_prompt = (prompt_editable or "").strip()
247
+ if not style_prompt:
248
+ style_prompt = (getattr(core, "ai_prompt_user_default",
249
+ lambda _l: "")(lang) or "").strip()
250
+
251
+ input_json = json.dumps(
252
+ {"target_lang": lang, "stylePrompt": style_prompt,
253
+ "originalTextFull": str(original_text_full or "")},
254
+ ensure_ascii=False,
255
+ )
256
+
257
+ system_parts: List[str] = [
258
+ "SYSTEM: You translate manga dialogue.",
259
+ "Task: Translate originalTextFull into target_lang. Apply stylePrompt.",
260
+ "Markers: Keep every paragraph marker like <<TP_P0>> unchanged and in order. Do not remove or add markers.",
261
+ "Output: Return ONLY JSON (no markdown, no extra text).",
262
+ "OUTPUT_JSON schema: {\"aiTextFull\":\"...\"}",
263
+ "aiTextFull must include all the same markers, each followed by that paragraph's translated text.",
264
+ "Keep text concise for speech bubbles. Avoid long repeated characters (max 12).",
265
+ ]
266
+ if is_retry:
267
+ system_parts.append(
268
+ "Retry: Your previous output may have been truncated. You MUST output ALL markers from the first to the last marker in the input."
269
+ )
270
+ system_text = "\n".join([p for p in system_parts if p])
271
+
272
+ user_text = (
273
+ "INPUT_JSON (json):\n```json\n"
274
+ + input_json
275
+ + "\n```\n\nOUTPUT_JSON (json):\n```json\n{\"aiTextFull\":\"...\"}\n```"
276
+ )
277
+
278
+ return system_text, [user_text]
279
+
280
+ def ai_translate_text(original_text_full: str, target_lang: str, ai: AiConfig, is_retry: bool = False) -> dict:
281
+ api_key = (ai.api_key or '').strip()
282
+ if not api_key:
283
+ raise Exception('AI api_key is required')
284
+
285
+ provider = core._canonical_provider((ai.provider or 'auto'))
286
+ if provider in ('', 'auto'):
287
+ provider = _detect_provider_from_key(api_key)
288
+
289
+ preset = _resolve_provider_defaults(provider) or {}
290
+
291
+ model = _resolve_model(provider, (ai.model or 'auto'))
292
+
293
+ base_url = (ai.base_url or 'auto').strip()
294
+ if base_url in ('', 'auto'):
295
+ base_url = (preset.get('base_url') or '').strip()
296
+
297
+ if provider not in ('gemini', 'anthropic'):
298
+ if not base_url:
299
+ base_url = (_resolve_provider_defaults('openai') or {}).get(
300
+ 'base_url') or 'https://api.openai.com/v1'
301
+
302
+ system_text, user_parts = _build_ai_prompt_packet_custom(
303
+ target_lang, original_text_full, ai.prompt_editable, is_retry=is_retry)
304
+
305
+ started = _now()
306
+ used_model = model
307
+ if provider == 'gemini':
308
+ raw = core._gemini_generate_json(
309
+ api_key, model, system_text, user_parts)
310
+ elif provider == 'anthropic':
311
+ raw = core._anthropic_generate_json(
312
+ api_key, model, system_text, user_parts)
313
+ else:
314
+ raw, used_model = core._openai_compat_generate_json(
315
+ api_key, base_url, model, system_text, user_parts)
316
+
317
+ ai_text_full = core._parse_ai_textfull_only(
318
+ raw) if core.DO_AI_JSON else core._parse_ai_textfull_text_only(raw)
319
+
320
+ ai_text_full = _sanitize_marked_text(ai_text_full)
321
+
322
+ return {
323
+ 'aiTextFull': ai_text_full,
324
+ 'meta': {
325
+ 'model': used_model,
326
+ 'provider': provider,
327
+ 'base_url': base_url,
328
+ 'latency_sec': round(_now() - started, 3),
329
+ },
330
+ }
331
+
332
+ def process_image_path(image_path: str, lang: str, mode: str, ai_cfg: Optional[AiConfig]) -> dict:
333
+ mode_id = (mode or '').strip()
334
+ if mode_id not in SUPPORTED_MODES:
335
+ mode_id = 'lens_images'
336
+
337
+ target_lang = _normalize_lang(lang)
338
+
339
+ data = core.get_lens_data_from_image(
340
+ image_path, getattr(core, 'FIREBASE_URL', ''), target_lang)
341
+ img = core.Image.open(image_path).convert('RGB')
342
+ W, H = img.size
343
+
344
+ thai_font = getattr(core, 'FONT_THAI_PATH', 'NotoSansThai-Regular.ttf')
345
+ latin_font = getattr(core, 'FONT_LATIN_PATH', 'NotoSans-Regular.ttf')
346
+
347
+ if target_lang == 'ja':
348
+ latin_font = getattr(core, 'FONT_JA_PATH', latin_font)
349
+ elif target_lang in ('zh', 'zh-hans', 'zh_cn', 'zh-cn', 'zh_hans'):
350
+ latin_font = getattr(core, 'FONT_ZH_SC_PATH', latin_font)
351
+ elif target_lang in ('zh-hant', 'zh_tw', 'zh-tw', 'zh_hant'):
352
+ latin_font = getattr(core, 'FONT_ZH_TC_PATH', latin_font)
353
+
354
+ if getattr(core, 'FONT_DOWNLOD', True):
355
+ thai_font = core.ensure_font(
356
+ thai_font, getattr(core, 'FONT_THAI_URLS', []))
357
+ if target_lang == 'ja':
358
+ latin_font = core.ensure_font(
359
+ latin_font, getattr(core, 'FONT_JA_URLS', []))
360
+ elif target_lang in ('zh', 'zh-hans', 'zh_cn', 'zh-cn', 'zh_hans'):
361
+ latin_font = core.ensure_font(
362
+ latin_font, getattr(core, 'FONT_ZH_SC_URLS', []))
363
+ elif target_lang in ('zh-hant', 'zh_tw', 'zh-tw', 'zh_hant'):
364
+ latin_font = core.ensure_font(
365
+ latin_font, getattr(core, 'FONT_ZH_TC_URLS', []))
366
+ else:
367
+ latin_font = core.ensure_font(
368
+ latin_font, getattr(core, 'FONT_LATIN_URLS', []))
369
+
370
+ image_url = data.get('imageUrl') if isinstance(data, dict) else None
371
+
372
+ out: Dict[str, Any] = {
373
+ 'mode': mode_id,
374
+ 'imageUrl': image_url,
375
+ 'imageDataUri': '',
376
+ 'originalContentLanguage': data.get('originalContentLanguage') if isinstance(data, dict) else None,
377
+ 'originalTextFull': data.get('originalTextFull') if isinstance(data, dict) else None,
378
+ 'translatedTextFull': data.get('translatedTextFull') if isinstance(data, dict) else None,
379
+ 'AiTextFull': '',
380
+ 'originalParagraphs': (data.get('originalParagraphs') or []) if isinstance(data, dict) else [],
381
+ 'translatedParagraphs': (data.get('translatedParagraphs') or []) if isinstance(data, dict) else [],
382
+ 'original': {},
383
+ 'translated': {},
384
+ 'Ai': {},
385
+ }
386
+
387
+ if mode_id == 'lens_images':
388
+ if image_url:
389
+ decoded = core.decode_imageurl_to_datauri(str(image_url))
390
+ if decoded:
391
+ out['imageDataUri'] = decoded
392
+ elif isinstance(image_url, str) and image_url.startswith(('http://', 'https://')):
393
+ blob, mime2 = _download_bytes(image_url)
394
+ out['imageDataUri'] = _bytes_to_datauri(
395
+ blob, mime2 or 'image/jpeg')
396
+
397
+ if not out.get('imageDataUri'):
398
+ with open(image_path, 'rb') as f:
399
+ blob = f.read()
400
+ out['imageDataUri'] = _bytes_to_datauri(blob, 'image/jpeg')
401
+ return out
402
+
403
+ original_span_tokens = None
404
+ original_tree = None
405
+ translated_tree = None
406
+
407
+ def _base_img_for_overlay() -> core.Image.Image:
408
+ if not (getattr(core, 'ERASE_OLD_TEXT_WITH_ORIGINAL_BOXES', True) and original_span_tokens):
409
+ return img
410
+ return core.erase_text_with_boxes(
411
+ img,
412
+ original_span_tokens,
413
+ pad_px=getattr(core, 'ERASE_PADDING_PX', 2),
414
+ sample_margin_px=getattr(core, 'ERASE_SAMPLE_MARGIN_PX', 6),
415
+ )
416
+
417
+ if getattr(core, 'DO_ORIGINAL', True):
418
+ tree, _ = core.decode_tree(
419
+ out.get('originalParagraphs') or [],
420
+ out.get('originalTextFull') or '',
421
+ 'original',
422
+ W,
423
+ H,
424
+ want_raw=False,
425
+ )
426
+ original_tree = tree
427
+ original_span_tokens = core.flatten_tree_spans(tree)
428
+ _dbg('tree.original', _tree_stats(original_tree))
429
+ out['original'] = {
430
+ 'originalTree': tree,
431
+ 'originalTextFull': out.get('originalTextFull') or '',
432
+ }
433
+
434
+ if getattr(core, 'DO_TRANSLATED', True):
435
+ tree, _ = core.decode_tree(
436
+ out.get('translatedParagraphs') or [],
437
+ out.get('translatedTextFull') or '',
438
+ 'translated',
439
+ W,
440
+ H,
441
+ want_raw=False,
442
+ )
443
+ translated_tree = tree
444
+ translated_span_tokens = core.flatten_tree_spans(tree)
445
+ _dbg('tree.translated', _tree_stats(translated_tree))
446
+ out['translated'] = {
447
+ 'translatedTree': tree,
448
+ 'translatedTextFull': out.get('translatedTextFull') or '',
449
+ }
450
+
451
+ def _tree_score(tree: Any) -> int:
452
+ if not isinstance(tree, dict):
453
+ return -1
454
+ paragraphs = tree.get('paragraphs') or []
455
+ if not isinstance(paragraphs, list) or not paragraphs:
456
+ return -1
457
+
458
+ para_count = len(paragraphs)
459
+ item_count = 0
460
+ span_count = 0
461
+ for p in paragraphs:
462
+ if not isinstance(p, dict):
463
+ continue
464
+ items = p.get('items') or []
465
+ if not isinstance(items, list):
466
+ continue
467
+ item_count += len(items)
468
+ for it in items:
469
+ if not isinstance(it, dict):
470
+ continue
471
+ spans = it.get('spans') or []
472
+ if isinstance(spans, list):
473
+ span_count += len(spans)
474
+
475
+ return item_count * 10000 + para_count * 100 + span_count
476
+
477
+ def _pick_ai_template_tree() -> Optional[Dict[str, Any]]:
478
+ tr_score = _tree_score(translated_tree)
479
+ og_score = _tree_score(original_tree)
480
+
481
+ if tr_score < 0 and og_score < 0:
482
+ return None
483
+ if og_score > tr_score:
484
+ return original_tree
485
+ return translated_tree or original_tree
486
+
487
+ ai_tree = None
488
+ if ai_cfg and (ai_cfg.api_key or '').strip() and getattr(core, 'DO_AI', True):
489
+ src_paras = _tree_to_paragraph_texts(original_tree or {})
490
+ src_text = _apply_para_markers(src_paras) if src_paras else str(
491
+ out.get('originalTextFull') or '')
492
+ ai = ai_translate_text(src_text, target_lang, ai_cfg)
493
+ if src_paras and _needs_ai_retry(str(ai.get('aiTextFull') or ''), len(src_paras)):
494
+ _dbg('ai.retry', {
495
+ 'expected_paras': len(src_paras),
496
+ 'found_markers': len(_extract_marker_indices(str(ai.get('aiTextFull') or ''))),
497
+ })
498
+ retry_paras = [_clamp_runaway_repeats(p) for p in src_paras]
499
+ retry_text = _apply_para_markers(retry_paras) or src_text
500
+ ai = ai_translate_text(
501
+ retry_text, target_lang, ai_cfg, is_retry=True)
502
+
503
+ template_tree = _pick_ai_template_tree()
504
+ _dbg('ai.template.pick', {
505
+ 'score_original': _tree_score(original_tree),
506
+ 'score_translated': _tree_score(translated_tree),
507
+ 'picked': 'original' if template_tree is original_tree else ('translated' if template_tree is translated_tree else 'none'),
508
+ })
509
+ if not isinstance(template_tree, dict):
510
+ template_tree = original_tree if isinstance(original_tree, dict) else (
511
+ translated_tree if isinstance(translated_tree, dict) else {})
512
+ patched = core.patch(
513
+ {'Ai': {'aiTextFull': str(
514
+ ai.get('aiTextFull') or ''), 'aiTree': template_tree}},
515
+ W,
516
+ H,
517
+ thai_font or '',
518
+ latin_font or '',
519
+ lang=target_lang,
520
+ )
521
+ ai_tree = (patched.get('Ai') or {}).get('aiTree') or {}
522
+ _dbg('ai.patched', {
523
+ 'ai_text_len': len(str(ai.get('aiTextFull') or '')),
524
+ 'stats_ai': _tree_stats(ai_tree),
525
+ 'stats_original': _tree_stats(original_tree or {}),
526
+ 'stats_translated': _tree_stats(translated_tree or {}),
527
+ 'mode': mode_id,
528
+ 'lang': target_lang,
529
+ })
530
+
531
+ shared_para_sizes = core._compute_shared_para_sizes(
532
+ [original_tree or {}, translated_tree or {}, ai_tree or {}],
533
+ thai_font or '',
534
+ latin_font or '',
535
+ W,
536
+ H,
537
+ )
538
+ core._apply_para_font_size(original_tree or {}, shared_para_sizes)
539
+ core._apply_para_font_size(translated_tree or {}, shared_para_sizes)
540
+ core._apply_para_font_size(ai_tree or {}, shared_para_sizes)
541
+ core._rebuild_ai_spans_after_font_resize(
542
+ ai_tree or {}, W, H, thai_font or '', latin_font or '', lang=target_lang)
543
+
544
+ out['AiTextFull'] = str(ai.get('aiTextFull') or '')
545
+ out['Ai'] = {
546
+ 'aiTextFull': str(ai.get('aiTextFull') or ''),
547
+ 'aiTree': ai_tree,
548
+ 'meta': ai.get('meta') or {},
549
+ }
550
+ if getattr(core, 'DO_AI_HTML', True):
551
+ core.fit_tree_font_sizes_for_tp_html(
552
+ ai_tree, thai_font or '', latin_font or '', W, H)
553
+ out['Ai']['aihtml'] = core.ai_tree_to_tp_html(ai_tree, W, H)
554
+ out['Ai']['aihtmlMeta'] = {
555
+ 'baseW': int(W),
556
+ 'baseH': int(H),
557
+ 'format': 'tp',
558
+ }
559
+
560
+ if getattr(core, 'DO_ORIGINAL', True) and getattr(core, 'DO_ORIGINAL_HTML', True) and isinstance(original_tree, dict):
561
+ core.fit_tree_font_sizes_for_tp_html(
562
+ original_tree, thai_font or '', latin_font or '', W, H)
563
+ if isinstance(out.get('original'), dict):
564
+ out['original']['originalhtml'] = core.ai_tree_to_tp_html(
565
+ original_tree or {}, W, H)
566
+
567
+ if getattr(core, 'DO_TRANSLATED', True) and getattr(core, 'DO_TRANSLATED_HTML', True) and isinstance(translated_tree, dict):
568
+ core.fit_tree_font_sizes_for_tp_html(
569
+ translated_tree, thai_font or '', latin_font or '', W, H)
570
+ if isinstance(out.get('translated'), dict):
571
+ out['translated']['translatedhtml'] = core.ai_tree_to_tp_html(
572
+ translated_tree or {}, W, H)
573
+
574
+ if getattr(core, 'HTML_INCLUDE_CSS', True) and (getattr(core, 'DO_ORIGINAL_HTML', True) or getattr(core, 'DO_TRANSLATED_HTML', True) or getattr(core, 'DO_AI_HTML', True)):
575
+ out['htmlCss'] = core.tp_overlay_css()
576
+ out['htmlMeta'] = {
577
+ 'baseW': int(W),
578
+ 'baseH': int(H),
579
+ 'format': 'tp',
580
+ }
581
+ base_img = _base_img_for_overlay()
582
+ buf = io.BytesIO()
583
+ base_img.save(buf, format='PNG')
584
+ out['imageDataUri'] = _bytes_to_datauri(buf.getvalue(), 'image/png')
585
+
586
+ return out
587
+
588
+ app = FastAPI(title='TextPhantom OCR API', version='1.0')
589
+ app.add_middleware(
590
+ CORSMiddleware,
591
+ allow_origins=['*'],
592
+ allow_credentials=True,
593
+ allow_methods=['*'],
594
+ allow_headers=['*'],
595
+ )
596
+
597
+ async def _cleanup_jobs_loop():
598
+ while True:
599
+ await asyncio.sleep(60)
600
+ cutoff = _now() - JOB_TTL_SEC
601
+ dead = [jid for jid, j in _jobs.items() if float(
602
+ j.get('ts', 0)) < cutoff]
603
+ for jid in dead:
604
+ _jobs.pop(jid, None)
605
+
606
+ async def _worker_loop(worker_id: int):
607
+ while True:
608
+ jid, payload = await _job_queue.get()
609
+ try:
610
+ _jobs[jid] = {'status': 'running', 'ts': _now()}
611
+ result = await asyncio.to_thread(_process_payload, payload)
612
+ _jobs[jid] = {'status': 'done', 'result': result, 'ts': _now()}
613
+ except Exception as e:
614
+ _jobs[jid] = {'status': 'error', 'result': str(e), 'ts': _now()}
615
+ finally:
616
+ _job_queue.task_done()
617
+
618
+ def _process_payload(payload: dict) -> dict:
619
+ t_all = time.perf_counter()
620
+ mode = (payload.get('mode') or 'lens_images')
621
+ lang = (payload.get('lang') or 'en')
622
+
623
+ src = (payload.get('src') or '').strip()
624
+ img_bytes = b''
625
+ mime = ''
626
+
627
+ if payload.get('imageDataUri'):
628
+ img_bytes, mime = _datauri_to_bytes(payload.get('imageDataUri'))
629
+ elif src.startswith('data:'):
630
+ img_bytes, mime = _datauri_to_bytes(src)
631
+ else:
632
+ img_bytes, mime = _download_bytes(src)
633
+
634
+ t_img = time.perf_counter()
635
+
636
+ if not img_bytes:
637
+ raise Exception('No image data')
638
+
639
+ ai_cfg = None
640
+ ai = payload.get('ai') or None
641
+ source = str(payload.get('source') or '').strip().lower() or 'translated'
642
+ if mode == 'lens_text' and source == 'ai' and isinstance(ai, dict):
643
+ api_key = str(ai.get('api_key') or '').strip() or (
644
+ os.getenv('AI_API_KEY') or '').strip()
645
+ ai_cfg = AiConfig(
646
+ api_key=api_key,
647
+ model=str(ai.get('model') or 'auto').strip() or 'auto',
648
+ provider=str(ai.get('provider') or 'auto').strip() or 'auto',
649
+ base_url=str(ai.get('base_url') or 'auto').strip() or 'auto',
650
+ prompt_editable=str(ai.get('prompt') or '').strip(),
651
+ )
652
+
653
+ core.DO_AI_JSON = False
654
+
655
+ img_hash = _sha256_hex(img_bytes)
656
+ cache_key = ''
657
+ if mode == 'lens_text' and img_hash:
658
+ cache_key = _build_cache_key(img_hash, lang, mode, source, ai_cfg)
659
+ cached = None
660
+ if source == 'ai':
661
+ cached = _lru_get(_ai_result_cache, _ai_cache_lock, cache_key)
662
+ else:
663
+ cached = _lru_get(_result_cache, _result_cache_lock, cache_key)
664
+ if cached:
665
+ cached['perf'] = {
666
+ 'cache': 'hit',
667
+ 'total_ms': round((time.perf_counter() - t_all) * 1000, 1),
668
+ 'img_ms': round((t_img - t_all) * 1000, 1),
669
+ }
670
+ return cached
671
+
672
+ suffix = '.png' if (mime or '').endswith('png') else '.jpg'
673
+ with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as f:
674
+ f.write(img_bytes)
675
+ tmp_path = f.name
676
+ t_tmp = time.perf_counter()
677
+ try:
678
+ out = process_image_path(tmp_path, lang, mode, ai_cfg)
679
+ out['perf'] = {
680
+ 'cache': 'miss' if cache_key else 'off',
681
+ 'total_ms': round((time.perf_counter() - t_all) * 1000, 1),
682
+ 'img_ms': round((t_img - t_all) * 1000, 1),
683
+ 'tmp_ms': round((t_tmp - t_img) * 1000, 1),
684
+ }
685
+ if cache_key and isinstance(out, dict):
686
+ if source == 'ai':
687
+ _lru_set(_ai_result_cache, _ai_cache_lock, cache_key, out, TP_AI_RESULT_CACHE_MAX)
688
+ else:
689
+ _lru_set(_result_cache, _result_cache_lock, cache_key, out, TP_RESULT_CACHE_MAX)
690
+ return out
691
+ finally:
692
+ try:
693
+ os.unlink(tmp_path)
694
+ except Exception:
695
+ pass
696
+
697
+ @app.on_event('startup')
698
+ async def _startup():
699
+ print(
700
+ f'[TextPhantom][api] starting build={BUILD_ID} workers={SERVER_MAX_WORKERS}')
701
+ for i in range(max(1, SERVER_MAX_WORKERS)):
702
+ asyncio.create_task(_worker_loop(i))
703
+ asyncio.create_task(_cleanup_jobs_loop())
704
+
705
+ @app.get('/health')
706
+ async def health():
707
+ return {'ok': True, 'build': BUILD_ID}
708
+
709
+ @app.get('/version')
710
+ async def version():
711
+ return {'ok': True, 'build': BUILD_ID, 'core': 'lens_core'}
712
+
713
+ @app.get('/warmup')
714
+ async def warmup(lang: str = TP_WARMUP_LANG):
715
+ t0 = time.perf_counter()
716
+ r = core.warmup(lang)
717
+ return {'ok': True, 'build': BUILD_ID, 'dt_ms': round((time.perf_counter() - t0) * 1000, 1), 'result': r}
718
+
719
+ @app.get('/meta')
720
+ async def meta():
721
+ langs = getattr(core, 'UI_LANGUAGES', None) or []
722
+ sources = [
723
+ {'id': 'original', 'name': 'Original'},
724
+ {'id': 'translated', 'name': 'Translated'},
725
+ {'id': 'ai', 'name': 'Ai'},
726
+ ]
727
+ env_key = (os.getenv('AI_API_KEY') or '').strip()
728
+ return {'ok': True, 'languages': langs, 'sources': sources, 'has_env_ai_key': bool(env_key)}
729
+
730
+ @app.post('/translate')
731
+ async def translate(payload: Dict[str, Any]):
732
+ jid = str(uuid.uuid4())
733
+ _jobs[jid] = {'status': 'queued', 'ts': _now()}
734
+ await _job_queue.put((jid, payload))
735
+ return {'id': jid}
736
+
737
+ @app.get('/translate/{job_id}')
738
+ async def translate_status(job_id: str):
739
+ j = _jobs.get(job_id)
740
+ if not j:
741
+ return {'status': 'error', 'result': 'job_not_found'}
742
+ return j
743
+
744
+ @app.post('/ai/resolve')
745
+ async def ai_resolve(payload: Dict[str, Any]):
746
+ api_key = str(payload.get('api_key') or '').strip() or (
747
+ os.getenv('AI_API_KEY') or '').strip()
748
+ lang = _normalize_lang(str(payload.get('lang') or 'en'))
749
+ if not api_key:
750
+ return {
751
+ 'ok': False,
752
+ 'error': 'missing_api_key',
753
+ 'provider': '',
754
+ 'default_model': '',
755
+ 'models': [],
756
+ 'lang': lang,
757
+ 'prompt_editable_default': (getattr(core, 'ai_prompt_user_default', lambda _l: '')(lang) or '').strip(),
758
+ }
759
+
760
+ provider = core._canonical_provider(str(payload.get('provider') or 'auto'))
761
+ if provider in ('', 'auto'):
762
+ provider = _detect_provider_from_key(api_key)
763
+
764
+ preset = _resolve_provider_defaults(provider) or {}
765
+ requested_model = str(payload.get('model') or 'auto').strip() or 'auto'
766
+ resolved_model = _resolve_model(provider, requested_model)
767
+
768
+ models: List[str] = []
769
+ base_url = (str(payload.get('base_url') or 'auto')).strip()
770
+ if base_url in ('', 'auto'):
771
+ base_url = (preset.get('base_url') or '').strip()
772
+
773
+ if provider == 'huggingface':
774
+ if base_url:
775
+ models = core._hf_router_available_models(api_key, base_url)
776
+ if requested_model.lower() in ('', 'auto'):
777
+ fallback = core._pick_hf_fallback_model(models)
778
+ if fallback:
779
+ resolved_model = fallback
780
+
781
+ elif provider == 'gemini':
782
+ models = getattr(core, '_gemini_available_models',
783
+ lambda _k: [])(api_key)
784
+ if not models:
785
+ models = ['gemini-2.5-flash', 'gemini-2.5-flash-lite', 'gemini-2.5-pro',
786
+ 'gemini-2.0-flash', 'gemini-3-flash-preview', 'gemini-3-pro-preview']
787
+
788
+ elif provider == 'anthropic':
789
+ models = getattr(core, '_anthropic_available_models',
790
+ lambda _k, _b=None: [])(api_key, base_url)
791
+
792
+ else:
793
+ if not base_url:
794
+ base_url = (core.AI_PROVIDER_DEFAULTS.get('openai') or {}).get(
795
+ 'base_url') or 'https://api.openai.com/v1'
796
+ models = getattr(core, '_openai_compat_available_models',
797
+ lambda _k, _b: [])(api_key, base_url)
798
+
799
+ if provider == 'huggingface' and not models:
800
+ models = [
801
+ 'google/gemma-3-27b-it:featherless-a',
802
+ ]
803
+
804
+ if not models:
805
+ fallback_models: List[str] = []
806
+ preset_model = str(preset.get('model') or '').strip()
807
+ if preset_model:
808
+ fallback_models.append(preset_model)
809
+
810
+ provider_defaults = (getattr(core, 'AI_PROVIDER_DEFAULTS', {}) or {}).get(
811
+ provider, {}) or {}
812
+ provider_model = str(provider_defaults.get('model') or '').strip()
813
+ if provider_model:
814
+ fallback_models.append(provider_model)
815
+
816
+ if provider == 'gemini':
817
+ fallback_models.extend([
818
+ 'gemini-2.5-flash',
819
+ 'gemini-2.5-flash-lite',
820
+ 'gemini-2.5-pro',
821
+ 'gemini-2.0-flash',
822
+ 'gemini-3-flash-preview',
823
+ 'gemini-3-pro-preview',
824
+ ])
825
+
826
+ models = sorted(set([m for m in fallback_models if m]), key=str.lower)
827
+
828
+ if not models:
829
+ all_models: List[str] = []
830
+ for _, v in (getattr(core, 'AI_PROVIDER_DEFAULTS', {}) or {}).items():
831
+ m2 = str((v or {}).get('model') or '').strip()
832
+ if m2:
833
+ all_models.append(m2)
834
+ models = sorted(set(all_models), key=str.lower)
835
+
836
+ if models:
837
+ models = sorted(
838
+ {m.strip() for m in models if isinstance(m, str) and m.strip()},
839
+ key=str.lower,
840
+ )
841
+
842
+ if models and requested_model.lower() in ('', 'auto') and resolved_model not in models:
843
+ resolved_model = models[0]
844
+
845
+ prompt_default = (getattr(core, 'ai_prompt_user_default',
846
+ lambda _l: '')(lang) or '').strip()
847
+
848
+ return {
849
+ 'ok': True,
850
+ 'provider': provider,
851
+ 'base_url': base_url,
852
+ 'default_model': (preset.get('model') or ''),
853
+ 'model': resolved_model,
854
+ 'models': models,
855
+ 'prompt_editable_default': prompt_default,
856
+ }
857
+
858
+ @app.get('/ai/prompt/default')
859
+ async def ai_prompt_default(lang: str = 'en'):
860
+ l = _normalize_lang(lang)
861
+ return {
862
+ 'ok': True,
863
+ 'lang': l,
864
+ 'prompt_editable_default': (getattr(core, 'ai_prompt_user_default', lambda _l: '')(l) or '').strip(),
865
+ 'lang_style': (getattr(core, 'AI_LANG_STYLE', {}) or {}).get(l) or (getattr(core, 'AI_LANG_STYLE', {}) or {}).get('default') or '',
866
+ 'system_base': (getattr(core, 'AI_PROMPT_SYSTEM_BASE', '') or '').strip(),
867
+ 'contract': core._active_ai_contract(),
868
+ 'data_template': core._active_ai_data_template(),
869
+ }
870
+
871
+ @app.websocket('/ws')
872
+ async def ws_endpoint(ws: WebSocket):
873
+ await ws.accept()
874
+ await ws.send_text(json.dumps({'type': 'ack'}))
875
+ try:
876
+ while True:
877
+ msg = await ws.receive_text()
878
+ data = json.loads(msg)
879
+ if data.get('type') != 'job':
880
+ continue
881
+ jid = str(data.get('id') or '')
882
+ payload = data.get('payload') or {}
883
+ try:
884
+ result = await asyncio.to_thread(_process_payload, payload)
885
+ try:
886
+ await ws.send_text(json.dumps({'type': 'result', 'id': jid, 'result': result}))
887
+ except WebSocketDisconnect:
888
+ return
889
+ except Exception as e:
890
+ try:
891
+ await ws.send_text(json.dumps({'type': 'error', 'id': jid, 'error': str(e)}))
892
+ except (WebSocketDisconnect, RuntimeError):
893
+ return
894
+ except WebSocketDisconnect:
895
+ return
896
+
897
+ def main():
898
+ image_path = getattr(core, 'IMAGE_PATH', '')
899
+ lang = getattr(core, 'LANG', 'en')
900
+ mode = os.environ.get('MODE', 'lens_text')
901
+ ai_key = os.environ.get('AI_API_KEY', getattr(core, 'AI_API_KEY', ''))
902
+ ai_model = os.environ.get('AI_MODEL', getattr(core, 'AI_MODEL', 'auto'))
903
+ ai_prompt = os.environ.get('AI_PROMPT', '')
904
+
905
+ ai_cfg = AiConfig(api_key=ai_key, model=ai_model,
906
+ prompt_editable=ai_prompt) if ai_key and mode == 'lens_text' else None
907
+ out = process_image_path(image_path, lang, mode, ai_cfg)
908
+ print(json.dumps(out, ensure_ascii=False, indent=2))
909
+
910
+ if __name__ == '__main__':
911
+ main()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi>=0.110
2
+ uvicorn[standard]>=0.23
3
+ httpx>=0.24
4
+ numpy
5
+ opencv-python-headless
6
+ Pillow
7
+ budoux