Spaces:

MinhTai
/

ai-agent-app

Running

deploy: ead37d0 wiki overhaul + animation v2 + Oracle fixes

c036214 about 8 hours ago

1.05 kB

	"""OCR text extraction using Tesseract (Vietnamese language model required)."""
	from __future__ import annotations
	import sys


	def ocr_image(path: str) -> str:
	try:
	import pytesseract
	from PIL import Image
	except ImportError:
	print("[WARN] pytesseract/Pillow not installed", file=sys.stderr)
	return ""
	try:
	img = Image.open(path)
	return pytesseract.image_to_string(img, lang="vie")
	except Exception as e:
	print(f"[WARN] OCR failed for {path}: {e}", file=sys.stderr)
	return ""


	def ocr_pdf(path: str) -> list[str]:
	try:
	import pytesseract
	from pdf2image import convert_from_path
	except ImportError:
	print("[WARN] pytesseract/pdf2image not installed", file=sys.stderr)
	return []
	try:
	images = convert_from_path(path)
	return [pytesseract.image_to_string(img, lang="vie") for img in images]
	except Exception as e:
	print(f"[WARN] OCR PDF failed for {path}: {e}", file=sys.stderr)
	return []