oneocr

File size: 7,605 Bytes

be4a6f1

"""OCR engine — unified wrapper providing 100% accuracy on any platform.

Backend selection (automatic):
    1. Windows → native DLL via ctypes (fastest, 100% accuracy)
    2. Linux/macOS with Wine → DLL via Wine subprocess (100% accuracy)
    3. Fallback → pure Python/ONNX reimplementation (~53% match rate)

Usage:
    from ocr.engine_unified import OcrEngineUnified
    engine = OcrEngineUnified()
    result = engine.recognize_pil(pil_image)
    print(result.text)
    print(f"Backend: {engine.backend_name}")
"""

from __future__ import annotations

import json
import logging
import platform
import sys
from pathlib import Path
from typing import TYPE_CHECKING

from ocr.models import BoundingRect, OcrLine, OcrResult, OcrWord

if TYPE_CHECKING:
    from PIL import Image

logger = logging.getLogger(__name__)


class OcrEngineUnified:
    """Unified OCR engine — auto-selects the best available backend.
    
    Priority order:
        1. Native Windows DLL (100%, fastest)
        2. Wine bridge on Linux (100%, ~2x slower due to subprocess)
        3. ONNX reimplementation (~53%, fully cross-platform)
    
    Args:
        ocr_data_dir: Path to directory with DLL/model files. 
                      Defaults to PROJECT_ROOT/ocr_data/.
        force_backend: Force a specific backend: 'dll', 'wine', 'onnx', or None (auto).
    """

    BACKENDS = ("dll", "wine", "onnx")

    def __init__(
        self,
        ocr_data_dir: str | Path | None = None,
        force_backend: str | None = None,
    ) -> None:
        if ocr_data_dir is None:
            ocr_data_dir = Path(__file__).resolve().parent.parent / "ocr_data"
        self._ocr_data = Path(ocr_data_dir)
        self._backend_name: str = "none"
        self._engine = None

        if force_backend:
            if force_backend not in self.BACKENDS:
                raise ValueError(f"Unknown backend: {force_backend!r}. Choose from {self.BACKENDS}")
            self._init_backend(force_backend)
        else:
            self._auto_select()

    @property
    def backend_name(self) -> str:
        """Name of the active backend."""
        return self._backend_name

    def recognize_pil(self, image: "Image.Image") -> OcrResult:
        """Run OCR on a PIL Image. Returns OcrResult with text, lines, words."""
        if self._backend_name == "dll":
            return self._engine.recognize_pil(image)
        elif self._backend_name == "wine":
            return self._recognize_wine(image)
        elif self._backend_name == "onnx":
            return self._engine.recognize_pil(image)
        else:
            return OcrResult(error="No OCR backend available")

    def recognize_bytes(self, image_bytes: bytes) -> OcrResult:
        """Run OCR on raw image bytes (PNG/JPEG/etc)."""
        from io import BytesIO
        from PIL import Image as PILImage
        img = PILImage.open(BytesIO(image_bytes))
        return self.recognize_pil(img)

    # ── Backend initialization ──────────────────────────────────

    def _auto_select(self) -> None:
        """Try backends in priority order."""
        for backend in self.BACKENDS:
            try:
                self._init_backend(backend)
                logger.info("OCR backend: %s", self._backend_name)
                return
            except Exception as e:
                logger.debug("Backend %s unavailable: %s", backend, e)

        logger.warning("No OCR backend available!")
        self._backend_name = "none"

    def _init_backend(self, name: str) -> None:
        """Initialize a specific backend."""
        if name == "dll":
            self._init_dll()
        elif name == "wine":
            self._init_wine()
        elif name == "onnx":
            self._init_onnx()

    def _init_dll(self) -> None:
        """Initialize native Windows DLL backend."""
        if platform.system() != "Windows":
            raise RuntimeError("DLL backend requires Windows")
        from ocr.engine import OcrEngine
        self._engine = OcrEngine(ocr_data_dir=self._ocr_data)
        self._backend_name = "dll"

    def _init_wine(self) -> None:
        """Initialize Wine bridge backend."""
        if platform.system() == "Windows":
            raise RuntimeError("Wine backend is for Linux/macOS only")

        # Import and check requirements
        sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "tools"))
        from wine_bridge import WineBridge

        bridge = WineBridge(ocr_data_dir=self._ocr_data)
        checks = bridge.check_requirements()

        if not checks["wine_found"]:
            raise RuntimeError("Wine not installed")
        if not checks["dll_exists"]:
            raise RuntimeError(f"oneocr.dll not found in {self._ocr_data}")
        if not checks["model_exists"]:
            raise RuntimeError(f"oneocr.onemodel not found in {self._ocr_data}")

        # Compile loader if needed
        if not checks["loader_compiled"]:
            if not checks["mingw_found"]:
                raise RuntimeError(
                    "MinGW cross-compiler needed to build Wine loader. "
                    "Install: sudo apt install mingw-w64"
                )
            bridge.compile_loader()

        self._engine = bridge
        self._backend_name = "wine"

    def _init_onnx(self) -> None:
        """Initialize pure ONNX backend (fallback)."""
        from ocr.engine_onnx import OcrEngineOnnx
        self._engine = OcrEngineOnnx(ocr_data_dir=self._ocr_data)
        self._backend_name = "onnx"

    # ── Wine result conversion ─────────────────────────────────

    def _recognize_wine(self, image: "Image.Image") -> OcrResult:
        """Run OCR via Wine bridge and convert JSON → OcrResult."""
        try:
            raw = self._engine.recognize_pil(image)
        except Exception as e:
            return OcrResult(error=f"Wine bridge error: {e}")

        return self._json_to_ocr_result(raw)

    @staticmethod
    def _json_to_ocr_result(data: dict) -> OcrResult:
        """Convert Wine bridge JSON output to OcrResult dataclass."""
        if "error" in data:
            return OcrResult(error=data["error"])

        lines = []
        for line_data in data.get("lines", []):
            words = []
            for word_data in line_data.get("words", []):
                bbox = word_data.get("bbox", [0]*8)
                words.append(OcrWord(
                    text=word_data.get("text", ""),
                    confidence=word_data.get("confidence", 0.0),
                    bounding_rect=BoundingRect(
                        x1=bbox[0], y1=bbox[1], x2=bbox[2], y2=bbox[3],
                        x3=bbox[4], y3=bbox[5], x4=bbox[6], y4=bbox[7],
                    ),
                ))

            line_bbox = line_data.get("bbox", [0]*8)
            lines.append(OcrLine(
                text=line_data.get("text", ""),
                words=words,
                bounding_rect=BoundingRect(
                    x1=line_bbox[0], y1=line_bbox[1],
                    x2=line_bbox[2], y2=line_bbox[3],
                    x3=line_bbox[4], y3=line_bbox[5],
                    x4=line_bbox[6] if len(line_bbox) > 6 else 0,
                    y4=line_bbox[7] if len(line_bbox) > 7 else 0,
                ),
            ))

        full_text = "\n".join(line.text for line in lines if line.text)
        text_angle = data.get("text_angle")

        return OcrResult(text=full_text, text_angle=text_angle, lines=lines)