"""OCR text extraction using Tesseract (Vietnamese language model required)."""
from __future__ import annotations
import sys


def ocr_image(path: str) -> str:
    try:
        import pytesseract
        from PIL import Image
    except ImportError:
        print("[WARN] pytesseract/Pillow not installed", file=sys.stderr)
        return ""
    try:
        img = Image.open(path)
        return pytesseract.image_to_string(img, lang="vie")
    except Exception as e:
        print(f"[WARN] OCR failed for {path}: {e}", file=sys.stderr)
        return ""


def ocr_pdf(path: str) -> list[str]:
    try:
        import pytesseract
        from pdf2image import convert_from_path
    except ImportError:
        print("[WARN] pytesseract/pdf2image not installed", file=sys.stderr)
        return []
    try:
        images = convert_from_path(path)
        return [pytesseract.image_to_string(img, lang="vie") for img in images]
    except Exception as e:
        print(f"[WARN] OCR PDF failed for {path}: {e}", file=sys.stderr)
        return []