Spaces:
Sleeping
Sleeping
File size: 1,808 Bytes
e82864c ea10b3a e82864c ea10b3a e82864c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import io
from typing import Any, Dict, List, Optional
from PIL import Image
import pytesseract
from deep_translator import GoogleTranslator
from src.app.config import get_user_dir # keep this if you use it
# ❗ REMOVED invalid placeholder import:
# from .flashcards_tools import ...
def _simple_ocr(image_bytes: bytes) -> str:
"""
Fallback OCR using pytesseract.
"""
img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
text = pytesseract.image_to_string(img)
return text.strip()
def ocr_and_translate_batch(
images: List[bytes],
target_lang: str = "en",
prefer_ocr_local: bool = True,
) -> List[Dict]:
"""
Runs OCR on a batch of images. For now, we always use the
simple pytesseract-based OCR, but the 'prefer_ocr_local'
flag is kept for compatibility with previous versions that
used a local PaddleOCR pipeline.
Returns: list of dicts with keys:
- "text": original OCR text
- "translation": translation into target_lang
- "target_lang": target_lang
"""
translator = GoogleTranslator(source="auto", target=target_lang)
results: List[Dict] = []
for img_bytes in images:
text = _simple_ocr(img_bytes)
if text:
try:
translated = translator.translate(text)
except Exception:
translated = ""
results.append(
{
"text": text,
"translation": translated,
"target_lang": target_lang,
}
)
else:
results.append(
{
"text": "",
"translation": "",
"target_lang": target_lang,
}
)
return results
|