| | """OCR engine — unified wrapper providing 100% accuracy on any platform. |
| | |
| | Backend selection (automatic): |
| | 1. Windows → native DLL via ctypes (fastest, 100% accuracy) |
| | 2. Linux/macOS with Wine → DLL via Wine subprocess (100% accuracy) |
| | 3. Fallback → pure Python/ONNX reimplementation (~53% match rate) |
| | |
| | Usage: |
| | from ocr.engine_unified import OcrEngineUnified |
| | engine = OcrEngineUnified() |
| | result = engine.recognize_pil(pil_image) |
| | print(result.text) |
| | print(f"Backend: {engine.backend_name}") |
| | """ |
| |
|
| | from __future__ import annotations |
| |
|
| | import json |
| | import logging |
| | import platform |
| | import sys |
| | from pathlib import Path |
| | from typing import TYPE_CHECKING |
| |
|
| | from ocr.models import BoundingRect, OcrLine, OcrResult, OcrWord |
| |
|
| | if TYPE_CHECKING: |
| | from PIL import Image |
| |
|
| | logger = logging.getLogger(__name__) |
| |
|
| |
|
| | class OcrEngineUnified: |
| | """Unified OCR engine — auto-selects the best available backend. |
| | |
| | Priority order: |
| | 1. Native Windows DLL (100%, fastest) |
| | 2. Wine bridge on Linux (100%, ~2x slower due to subprocess) |
| | 3. ONNX reimplementation (~53%, fully cross-platform) |
| | |
| | Args: |
| | ocr_data_dir: Path to directory with DLL/model files. |
| | Defaults to PROJECT_ROOT/ocr_data/. |
| | force_backend: Force a specific backend: 'dll', 'wine', 'onnx', or None (auto). |
| | """ |
| |
|
| | BACKENDS = ("dll", "wine", "onnx") |
| |
|
| | def __init__( |
| | self, |
| | ocr_data_dir: str | Path | None = None, |
| | force_backend: str | None = None, |
| | ) -> None: |
| | if ocr_data_dir is None: |
| | ocr_data_dir = Path(__file__).resolve().parent.parent / "ocr_data" |
| | self._ocr_data = Path(ocr_data_dir) |
| | self._backend_name: str = "none" |
| | self._engine = None |
| |
|
| | if force_backend: |
| | if force_backend not in self.BACKENDS: |
| | raise ValueError(f"Unknown backend: {force_backend!r}. Choose from {self.BACKENDS}") |
| | self._init_backend(force_backend) |
| | else: |
| | self._auto_select() |
| |
|
| | @property |
| | def backend_name(self) -> str: |
| | """Name of the active backend.""" |
| | return self._backend_name |
| |
|
| | def recognize_pil(self, image: "Image.Image") -> OcrResult: |
| | """Run OCR on a PIL Image. Returns OcrResult with text, lines, words.""" |
| | if self._backend_name == "dll": |
| | return self._engine.recognize_pil(image) |
| | elif self._backend_name == "wine": |
| | return self._recognize_wine(image) |
| | elif self._backend_name == "onnx": |
| | return self._engine.recognize_pil(image) |
| | else: |
| | return OcrResult(error="No OCR backend available") |
| |
|
| | def recognize_bytes(self, image_bytes: bytes) -> OcrResult: |
| | """Run OCR on raw image bytes (PNG/JPEG/etc).""" |
| | from io import BytesIO |
| | from PIL import Image as PILImage |
| | img = PILImage.open(BytesIO(image_bytes)) |
| | return self.recognize_pil(img) |
| |
|
| | |
| |
|
| | def _auto_select(self) -> None: |
| | """Try backends in priority order.""" |
| | for backend in self.BACKENDS: |
| | try: |
| | self._init_backend(backend) |
| | logger.info("OCR backend: %s", self._backend_name) |
| | return |
| | except Exception as e: |
| | logger.debug("Backend %s unavailable: %s", backend, e) |
| |
|
| | logger.warning("No OCR backend available!") |
| | self._backend_name = "none" |
| |
|
| | def _init_backend(self, name: str) -> None: |
| | """Initialize a specific backend.""" |
| | if name == "dll": |
| | self._init_dll() |
| | elif name == "wine": |
| | self._init_wine() |
| | elif name == "onnx": |
| | self._init_onnx() |
| |
|
| | def _init_dll(self) -> None: |
| | """Initialize native Windows DLL backend.""" |
| | if platform.system() != "Windows": |
| | raise RuntimeError("DLL backend requires Windows") |
| | from ocr.engine import OcrEngine |
| | self._engine = OcrEngine(ocr_data_dir=self._ocr_data) |
| | self._backend_name = "dll" |
| |
|
| | def _init_wine(self) -> None: |
| | """Initialize Wine bridge backend.""" |
| | if platform.system() == "Windows": |
| | raise RuntimeError("Wine backend is for Linux/macOS only") |
| |
|
| | |
| | sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "tools")) |
| | from wine_bridge import WineBridge |
| |
|
| | bridge = WineBridge(ocr_data_dir=self._ocr_data) |
| | checks = bridge.check_requirements() |
| |
|
| | if not checks["wine_found"]: |
| | raise RuntimeError("Wine not installed") |
| | if not checks["dll_exists"]: |
| | raise RuntimeError(f"oneocr.dll not found in {self._ocr_data}") |
| | if not checks["model_exists"]: |
| | raise RuntimeError(f"oneocr.onemodel not found in {self._ocr_data}") |
| |
|
| | |
| | if not checks["loader_compiled"]: |
| | if not checks["mingw_found"]: |
| | raise RuntimeError( |
| | "MinGW cross-compiler needed to build Wine loader. " |
| | "Install: sudo apt install mingw-w64" |
| | ) |
| | bridge.compile_loader() |
| |
|
| | self._engine = bridge |
| | self._backend_name = "wine" |
| |
|
| | def _init_onnx(self) -> None: |
| | """Initialize pure ONNX backend (fallback).""" |
| | from ocr.engine_onnx import OcrEngineOnnx |
| | self._engine = OcrEngineOnnx(ocr_data_dir=self._ocr_data) |
| | self._backend_name = "onnx" |
| |
|
| | |
| |
|
| | def _recognize_wine(self, image: "Image.Image") -> OcrResult: |
| | """Run OCR via Wine bridge and convert JSON → OcrResult.""" |
| | try: |
| | raw = self._engine.recognize_pil(image) |
| | except Exception as e: |
| | return OcrResult(error=f"Wine bridge error: {e}") |
| |
|
| | return self._json_to_ocr_result(raw) |
| |
|
| | @staticmethod |
| | def _json_to_ocr_result(data: dict) -> OcrResult: |
| | """Convert Wine bridge JSON output to OcrResult dataclass.""" |
| | if "error" in data: |
| | return OcrResult(error=data["error"]) |
| |
|
| | lines = [] |
| | for line_data in data.get("lines", []): |
| | words = [] |
| | for word_data in line_data.get("words", []): |
| | bbox = word_data.get("bbox", [0]*8) |
| | words.append(OcrWord( |
| | text=word_data.get("text", ""), |
| | confidence=word_data.get("confidence", 0.0), |
| | bounding_rect=BoundingRect( |
| | x1=bbox[0], y1=bbox[1], x2=bbox[2], y2=bbox[3], |
| | x3=bbox[4], y3=bbox[5], x4=bbox[6], y4=bbox[7], |
| | ), |
| | )) |
| |
|
| | line_bbox = line_data.get("bbox", [0]*8) |
| | lines.append(OcrLine( |
| | text=line_data.get("text", ""), |
| | words=words, |
| | bounding_rect=BoundingRect( |
| | x1=line_bbox[0], y1=line_bbox[1], |
| | x2=line_bbox[2], y2=line_bbox[3], |
| | x3=line_bbox[4], y3=line_bbox[5], |
| | x4=line_bbox[6] if len(line_bbox) > 6 else 0, |
| | y4=line_bbox[7] if len(line_bbox) > 7 else 0, |
| | ), |
| | )) |
| |
|
| | full_text = "\n".join(line.text for line in lines if line.text) |
| | text_angle = data.get("text_angle") |
| |
|
| | return OcrResult(text=full_text, text_angle=text_angle, lines=lines) |
| |
|