Spaces:

mastefan
/

agentic-language-partner

Sleeping

File size: 1,808 Bytes

e82864c
 
 
 
 
 
 
ea10b3a
e82864c
ea10b3a
 
e82864c


import io
from typing import Any, Dict, List, Optional

from PIL import Image
import pytesseract
from deep_translator import GoogleTranslator
from src.app.config import get_user_dir   # keep this if you use it

# ❗ REMOVED invalid placeholder import:
# from .flashcards_tools import ...

def _simple_ocr(image_bytes: bytes) -> str:
    """
    Fallback OCR using pytesseract.
    """
    img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
    text = pytesseract.image_to_string(img)
    return text.strip()


def ocr_and_translate_batch(
    images: List[bytes],
    target_lang: str = "en",
    prefer_ocr_local: bool = True,
) -> List[Dict]:
    """
    Runs OCR on a batch of images. For now, we always use the
    simple pytesseract-based OCR, but the 'prefer_ocr_local'
    flag is kept for compatibility with previous versions that
    used a local PaddleOCR pipeline.

    Returns: list of dicts with keys:
      - "text": original OCR text
      - "translation": translation into target_lang
      - "target_lang": target_lang
    """
    translator = GoogleTranslator(source="auto", target=target_lang)

    results: List[Dict] = []
    for img_bytes in images:
        text = _simple_ocr(img_bytes)
        if text:
            try:
                translated = translator.translate(text)
            except Exception:
                translated = ""

            results.append(
                {
                    "text": text,
                    "translation": translated,
                    "target_lang": target_lang,
                }
            )
        else:
            results.append(
                {
                    "text": "",
                    "translation": "",
                    "target_lang": target_lang,
                }
            )
    return results