Spaces:

mastefan
/

agentic-language-partner

Sleeping

Update src/app/ocr_tools.py

ea10b3a verified about 1 month ago

1.81 kB


	import io
	from typing import Any, Dict, List, Optional

	from PIL import Image
	import pytesseract
	from deep_translator import GoogleTranslator
	from src.app.config import get_user_dir # keep this if you use it

	# ❗ REMOVED invalid placeholder import:
	# from .flashcards_tools import ...

	def _simple_ocr(image_bytes: bytes) -> str:
	"""
	Fallback OCR using pytesseract.
	"""
	img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
	text = pytesseract.image_to_string(img)
	return text.strip()


	def ocr_and_translate_batch(
	images: List[bytes],
	target_lang: str = "en",
	prefer_ocr_local: bool = True,
	) -> List[Dict]:
	"""
	Runs OCR on a batch of images. For now, we always use the
	simple pytesseract-based OCR, but the 'prefer_ocr_local'
	flag is kept for compatibility with previous versions that
	used a local PaddleOCR pipeline.

	Returns: list of dicts with keys:
	- "text": original OCR text
	- "translation": translation into target_lang
	- "target_lang": target_lang
	"""
	translator = GoogleTranslator(source="auto", target=target_lang)

	results: List[Dict] = []
	for img_bytes in images:
	text = _simple_ocr(img_bytes)
	if text:
	try:
	translated = translator.translate(text)
	except Exception:
	translated = ""

	results.append(
	{
	"text": text,
	"translation": translated,
	"target_lang": target_lang,
	}
	)
	else:
	results.append(
	{
	"text": "",
	"translation": "",
	"target_lang": target_lang,
	}
	)
	return results