mastefan's picture
Update src/app/ocr_tools.py
ea10b3a verified
import io
from typing import Any, Dict, List, Optional
from PIL import Image
import pytesseract
from deep_translator import GoogleTranslator
from src.app.config import get_user_dir # keep this if you use it
# ❗ REMOVED invalid placeholder import:
# from .flashcards_tools import ...
def _simple_ocr(image_bytes: bytes) -> str:
"""
Fallback OCR using pytesseract.
"""
img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
text = pytesseract.image_to_string(img)
return text.strip()
def ocr_and_translate_batch(
images: List[bytes],
target_lang: str = "en",
prefer_ocr_local: bool = True,
) -> List[Dict]:
"""
Runs OCR on a batch of images. For now, we always use the
simple pytesseract-based OCR, but the 'prefer_ocr_local'
flag is kept for compatibility with previous versions that
used a local PaddleOCR pipeline.
Returns: list of dicts with keys:
- "text": original OCR text
- "translation": translation into target_lang
- "target_lang": target_lang
"""
translator = GoogleTranslator(source="auto", target=target_lang)
results: List[Dict] = []
for img_bytes in images:
text = _simple_ocr(img_bytes)
if text:
try:
translated = translator.translate(text)
except Exception:
translated = ""
results.append(
{
"text": text,
"translation": translated,
"target_lang": target_lang,
}
)
else:
results.append(
{
"text": "",
"translation": "",
"target_lang": target_lang,
}
)
return results