|
|
"""OCR engine — unified wrapper providing 100% accuracy on any platform. |
|
|
|
|
|
Backend selection (automatic): |
|
|
1. Windows → native DLL via ctypes (fastest, 100% accuracy) |
|
|
2. Linux/macOS with Wine → DLL via Wine subprocess (100% accuracy) |
|
|
3. Fallback → pure Python/ONNX reimplementation (~53% match rate) |
|
|
|
|
|
Usage: |
|
|
from ocr.engine_unified import OcrEngineUnified |
|
|
engine = OcrEngineUnified() |
|
|
result = engine.recognize_pil(pil_image) |
|
|
print(result.text) |
|
|
print(f"Backend: {engine.backend_name}") |
|
|
""" |
|
|
|
|
|
from __future__ import annotations |
|
|
|
|
|
import json |
|
|
import logging |
|
|
import platform |
|
|
import sys |
|
|
from pathlib import Path |
|
|
from typing import TYPE_CHECKING |
|
|
|
|
|
from ocr.models import BoundingRect, OcrLine, OcrResult, OcrWord |
|
|
|
|
|
if TYPE_CHECKING: |
|
|
from PIL import Image |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
class OcrEngineUnified: |
|
|
"""Unified OCR engine — auto-selects the best available backend. |
|
|
|
|
|
Priority order: |
|
|
1. Native Windows DLL (100%, fastest) |
|
|
2. Wine bridge on Linux (100%, ~2x slower due to subprocess) |
|
|
3. ONNX reimplementation (~53%, fully cross-platform) |
|
|
|
|
|
Args: |
|
|
ocr_data_dir: Path to directory with DLL/model files. |
|
|
Defaults to PROJECT_ROOT/ocr_data/. |
|
|
force_backend: Force a specific backend: 'dll', 'wine', 'onnx', or None (auto). |
|
|
""" |
|
|
|
|
|
BACKENDS = ("dll", "wine", "onnx") |
|
|
|
|
|
def __init__( |
|
|
self, |
|
|
ocr_data_dir: str | Path | None = None, |
|
|
force_backend: str | None = None, |
|
|
) -> None: |
|
|
if ocr_data_dir is None: |
|
|
ocr_data_dir = Path(__file__).resolve().parent.parent / "ocr_data" |
|
|
self._ocr_data = Path(ocr_data_dir) |
|
|
self._backend_name: str = "none" |
|
|
self._engine = None |
|
|
|
|
|
if force_backend: |
|
|
if force_backend not in self.BACKENDS: |
|
|
raise ValueError(f"Unknown backend: {force_backend!r}. Choose from {self.BACKENDS}") |
|
|
self._init_backend(force_backend) |
|
|
else: |
|
|
self._auto_select() |
|
|
|
|
|
@property |
|
|
def backend_name(self) -> str: |
|
|
"""Name of the active backend.""" |
|
|
return self._backend_name |
|
|
|
|
|
def recognize_pil(self, image: "Image.Image") -> OcrResult: |
|
|
"""Run OCR on a PIL Image. Returns OcrResult with text, lines, words.""" |
|
|
if self._backend_name == "dll": |
|
|
return self._engine.recognize_pil(image) |
|
|
elif self._backend_name == "wine": |
|
|
return self._recognize_wine(image) |
|
|
elif self._backend_name == "onnx": |
|
|
return self._engine.recognize_pil(image) |
|
|
else: |
|
|
return OcrResult(error="No OCR backend available") |
|
|
|
|
|
def recognize_bytes(self, image_bytes: bytes) -> OcrResult: |
|
|
"""Run OCR on raw image bytes (PNG/JPEG/etc).""" |
|
|
from io import BytesIO |
|
|
from PIL import Image as PILImage |
|
|
img = PILImage.open(BytesIO(image_bytes)) |
|
|
return self.recognize_pil(img) |
|
|
|
|
|
|
|
|
|
|
|
def _auto_select(self) -> None: |
|
|
"""Try backends in priority order.""" |
|
|
for backend in self.BACKENDS: |
|
|
try: |
|
|
self._init_backend(backend) |
|
|
logger.info("OCR backend: %s", self._backend_name) |
|
|
return |
|
|
except Exception as e: |
|
|
logger.debug("Backend %s unavailable: %s", backend, e) |
|
|
|
|
|
logger.warning("No OCR backend available!") |
|
|
self._backend_name = "none" |
|
|
|
|
|
def _init_backend(self, name: str) -> None: |
|
|
"""Initialize a specific backend.""" |
|
|
if name == "dll": |
|
|
self._init_dll() |
|
|
elif name == "wine": |
|
|
self._init_wine() |
|
|
elif name == "onnx": |
|
|
self._init_onnx() |
|
|
|
|
|
def _init_dll(self) -> None: |
|
|
"""Initialize native Windows DLL backend.""" |
|
|
if platform.system() != "Windows": |
|
|
raise RuntimeError("DLL backend requires Windows") |
|
|
from ocr.engine import OcrEngine |
|
|
self._engine = OcrEngine(ocr_data_dir=self._ocr_data) |
|
|
self._backend_name = "dll" |
|
|
|
|
|
def _init_wine(self) -> None: |
|
|
"""Initialize Wine bridge backend.""" |
|
|
if platform.system() == "Windows": |
|
|
raise RuntimeError("Wine backend is for Linux/macOS only") |
|
|
|
|
|
|
|
|
sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "tools")) |
|
|
from wine_bridge import WineBridge |
|
|
|
|
|
bridge = WineBridge(ocr_data_dir=self._ocr_data) |
|
|
checks = bridge.check_requirements() |
|
|
|
|
|
if not checks["wine_found"]: |
|
|
raise RuntimeError("Wine not installed") |
|
|
if not checks["dll_exists"]: |
|
|
raise RuntimeError(f"oneocr.dll not found in {self._ocr_data}") |
|
|
if not checks["model_exists"]: |
|
|
raise RuntimeError(f"oneocr.onemodel not found in {self._ocr_data}") |
|
|
|
|
|
|
|
|
if not checks["loader_compiled"]: |
|
|
if not checks["mingw_found"]: |
|
|
raise RuntimeError( |
|
|
"MinGW cross-compiler needed to build Wine loader. " |
|
|
"Install: sudo apt install mingw-w64" |
|
|
) |
|
|
bridge.compile_loader() |
|
|
|
|
|
self._engine = bridge |
|
|
self._backend_name = "wine" |
|
|
|
|
|
def _init_onnx(self) -> None: |
|
|
"""Initialize pure ONNX backend (fallback).""" |
|
|
from ocr.engine_onnx import OcrEngineOnnx |
|
|
self._engine = OcrEngineOnnx(ocr_data_dir=self._ocr_data) |
|
|
self._backend_name = "onnx" |
|
|
|
|
|
|
|
|
|
|
|
def _recognize_wine(self, image: "Image.Image") -> OcrResult: |
|
|
"""Run OCR via Wine bridge and convert JSON → OcrResult.""" |
|
|
try: |
|
|
raw = self._engine.recognize_pil(image) |
|
|
except Exception as e: |
|
|
return OcrResult(error=f"Wine bridge error: {e}") |
|
|
|
|
|
return self._json_to_ocr_result(raw) |
|
|
|
|
|
@staticmethod |
|
|
def _json_to_ocr_result(data: dict) -> OcrResult: |
|
|
"""Convert Wine bridge JSON output to OcrResult dataclass.""" |
|
|
if "error" in data: |
|
|
return OcrResult(error=data["error"]) |
|
|
|
|
|
lines = [] |
|
|
for line_data in data.get("lines", []): |
|
|
words = [] |
|
|
for word_data in line_data.get("words", []): |
|
|
bbox = word_data.get("bbox", [0]*8) |
|
|
words.append(OcrWord( |
|
|
text=word_data.get("text", ""), |
|
|
confidence=word_data.get("confidence", 0.0), |
|
|
bounding_rect=BoundingRect( |
|
|
x1=bbox[0], y1=bbox[1], x2=bbox[2], y2=bbox[3], |
|
|
x3=bbox[4], y3=bbox[5], x4=bbox[6], y4=bbox[7], |
|
|
), |
|
|
)) |
|
|
|
|
|
line_bbox = line_data.get("bbox", [0]*8) |
|
|
lines.append(OcrLine( |
|
|
text=line_data.get("text", ""), |
|
|
words=words, |
|
|
bounding_rect=BoundingRect( |
|
|
x1=line_bbox[0], y1=line_bbox[1], |
|
|
x2=line_bbox[2], y2=line_bbox[3], |
|
|
x3=line_bbox[4], y3=line_bbox[5], |
|
|
x4=line_bbox[6] if len(line_bbox) > 6 else 0, |
|
|
y4=line_bbox[7] if len(line_bbox) > 7 else 0, |
|
|
), |
|
|
)) |
|
|
|
|
|
full_text = "\n".join(line.text for line in lines if line.text) |
|
|
text_angle = data.get("text_angle") |
|
|
|
|
|
return OcrResult(text=full_text, text_angle=text_angle, lines=lines) |
|
|
|