oneocr / ocr /engine_unified.py
OneOCR Dev
feat: Wine bridge - run DLL on Linux via Wine (100% accuracy)
be4a6f1
"""OCR engine — unified wrapper providing 100% accuracy on any platform.
Backend selection (automatic):
1. Windows → native DLL via ctypes (fastest, 100% accuracy)
2. Linux/macOS with Wine → DLL via Wine subprocess (100% accuracy)
3. Fallback → pure Python/ONNX reimplementation (~53% match rate)
Usage:
from ocr.engine_unified import OcrEngineUnified
engine = OcrEngineUnified()
result = engine.recognize_pil(pil_image)
print(result.text)
print(f"Backend: {engine.backend_name}")
"""
from __future__ import annotations
import json
import logging
import platform
import sys
from pathlib import Path
from typing import TYPE_CHECKING
from ocr.models import BoundingRect, OcrLine, OcrResult, OcrWord
if TYPE_CHECKING:
from PIL import Image
logger = logging.getLogger(__name__)
class OcrEngineUnified:
"""Unified OCR engine — auto-selects the best available backend.
Priority order:
1. Native Windows DLL (100%, fastest)
2. Wine bridge on Linux (100%, ~2x slower due to subprocess)
3. ONNX reimplementation (~53%, fully cross-platform)
Args:
ocr_data_dir: Path to directory with DLL/model files.
Defaults to PROJECT_ROOT/ocr_data/.
force_backend: Force a specific backend: 'dll', 'wine', 'onnx', or None (auto).
"""
BACKENDS = ("dll", "wine", "onnx")
def __init__(
self,
ocr_data_dir: str | Path | None = None,
force_backend: str | None = None,
) -> None:
if ocr_data_dir is None:
ocr_data_dir = Path(__file__).resolve().parent.parent / "ocr_data"
self._ocr_data = Path(ocr_data_dir)
self._backend_name: str = "none"
self._engine = None
if force_backend:
if force_backend not in self.BACKENDS:
raise ValueError(f"Unknown backend: {force_backend!r}. Choose from {self.BACKENDS}")
self._init_backend(force_backend)
else:
self._auto_select()
@property
def backend_name(self) -> str:
"""Name of the active backend."""
return self._backend_name
def recognize_pil(self, image: "Image.Image") -> OcrResult:
"""Run OCR on a PIL Image. Returns OcrResult with text, lines, words."""
if self._backend_name == "dll":
return self._engine.recognize_pil(image)
elif self._backend_name == "wine":
return self._recognize_wine(image)
elif self._backend_name == "onnx":
return self._engine.recognize_pil(image)
else:
return OcrResult(error="No OCR backend available")
def recognize_bytes(self, image_bytes: bytes) -> OcrResult:
"""Run OCR on raw image bytes (PNG/JPEG/etc)."""
from io import BytesIO
from PIL import Image as PILImage
img = PILImage.open(BytesIO(image_bytes))
return self.recognize_pil(img)
# ── Backend initialization ──────────────────────────────────
def _auto_select(self) -> None:
"""Try backends in priority order."""
for backend in self.BACKENDS:
try:
self._init_backend(backend)
logger.info("OCR backend: %s", self._backend_name)
return
except Exception as e:
logger.debug("Backend %s unavailable: %s", backend, e)
logger.warning("No OCR backend available!")
self._backend_name = "none"
def _init_backend(self, name: str) -> None:
"""Initialize a specific backend."""
if name == "dll":
self._init_dll()
elif name == "wine":
self._init_wine()
elif name == "onnx":
self._init_onnx()
def _init_dll(self) -> None:
"""Initialize native Windows DLL backend."""
if platform.system() != "Windows":
raise RuntimeError("DLL backend requires Windows")
from ocr.engine import OcrEngine
self._engine = OcrEngine(ocr_data_dir=self._ocr_data)
self._backend_name = "dll"
def _init_wine(self) -> None:
"""Initialize Wine bridge backend."""
if platform.system() == "Windows":
raise RuntimeError("Wine backend is for Linux/macOS only")
# Import and check requirements
sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "tools"))
from wine_bridge import WineBridge
bridge = WineBridge(ocr_data_dir=self._ocr_data)
checks = bridge.check_requirements()
if not checks["wine_found"]:
raise RuntimeError("Wine not installed")
if not checks["dll_exists"]:
raise RuntimeError(f"oneocr.dll not found in {self._ocr_data}")
if not checks["model_exists"]:
raise RuntimeError(f"oneocr.onemodel not found in {self._ocr_data}")
# Compile loader if needed
if not checks["loader_compiled"]:
if not checks["mingw_found"]:
raise RuntimeError(
"MinGW cross-compiler needed to build Wine loader. "
"Install: sudo apt install mingw-w64"
)
bridge.compile_loader()
self._engine = bridge
self._backend_name = "wine"
def _init_onnx(self) -> None:
"""Initialize pure ONNX backend (fallback)."""
from ocr.engine_onnx import OcrEngineOnnx
self._engine = OcrEngineOnnx(ocr_data_dir=self._ocr_data)
self._backend_name = "onnx"
# ── Wine result conversion ─────────────────────────────────
def _recognize_wine(self, image: "Image.Image") -> OcrResult:
"""Run OCR via Wine bridge and convert JSON → OcrResult."""
try:
raw = self._engine.recognize_pil(image)
except Exception as e:
return OcrResult(error=f"Wine bridge error: {e}")
return self._json_to_ocr_result(raw)
@staticmethod
def _json_to_ocr_result(data: dict) -> OcrResult:
"""Convert Wine bridge JSON output to OcrResult dataclass."""
if "error" in data:
return OcrResult(error=data["error"])
lines = []
for line_data in data.get("lines", []):
words = []
for word_data in line_data.get("words", []):
bbox = word_data.get("bbox", [0]*8)
words.append(OcrWord(
text=word_data.get("text", ""),
confidence=word_data.get("confidence", 0.0),
bounding_rect=BoundingRect(
x1=bbox[0], y1=bbox[1], x2=bbox[2], y2=bbox[3],
x3=bbox[4], y3=bbox[5], x4=bbox[6], y4=bbox[7],
),
))
line_bbox = line_data.get("bbox", [0]*8)
lines.append(OcrLine(
text=line_data.get("text", ""),
words=words,
bounding_rect=BoundingRect(
x1=line_bbox[0], y1=line_bbox[1],
x2=line_bbox[2], y2=line_bbox[3],
x3=line_bbox[4], y3=line_bbox[5],
x4=line_bbox[6] if len(line_bbox) > 6 else 0,
y4=line_bbox[7] if len(line_bbox) > 7 else 0,
),
))
full_text = "\n".join(line.text for line in lines if line.text)
text_angle = data.get("text_angle")
return OcrResult(text=full_text, text_angle=text_angle, lines=lines)