File size: 7,605 Bytes
be4a6f1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
"""OCR engine — unified wrapper providing 100% accuracy on any platform.

Backend selection (automatic):
    1. Windows → native DLL via ctypes (fastest, 100% accuracy)
    2. Linux/macOS with Wine → DLL via Wine subprocess (100% accuracy)
    3. Fallback → pure Python/ONNX reimplementation (~53% match rate)

Usage:
    from ocr.engine_unified import OcrEngineUnified
    engine = OcrEngineUnified()
    result = engine.recognize_pil(pil_image)
    print(result.text)
    print(f"Backend: {engine.backend_name}")
"""

from __future__ import annotations

import json
import logging
import platform
import sys
from pathlib import Path
from typing import TYPE_CHECKING

from ocr.models import BoundingRect, OcrLine, OcrResult, OcrWord

if TYPE_CHECKING:
    from PIL import Image

logger = logging.getLogger(__name__)


class OcrEngineUnified:
    """Unified OCR engine — auto-selects the best available backend.
    
    Priority order:
        1. Native Windows DLL (100%, fastest)
        2. Wine bridge on Linux (100%, ~2x slower due to subprocess)
        3. ONNX reimplementation (~53%, fully cross-platform)
    
    Args:
        ocr_data_dir: Path to directory with DLL/model files. 
                      Defaults to PROJECT_ROOT/ocr_data/.
        force_backend: Force a specific backend: 'dll', 'wine', 'onnx', or None (auto).
    """

    BACKENDS = ("dll", "wine", "onnx")

    def __init__(
        self,
        ocr_data_dir: str | Path | None = None,
        force_backend: str | None = None,
    ) -> None:
        if ocr_data_dir is None:
            ocr_data_dir = Path(__file__).resolve().parent.parent / "ocr_data"
        self._ocr_data = Path(ocr_data_dir)
        self._backend_name: str = "none"
        self._engine = None

        if force_backend:
            if force_backend not in self.BACKENDS:
                raise ValueError(f"Unknown backend: {force_backend!r}. Choose from {self.BACKENDS}")
            self._init_backend(force_backend)
        else:
            self._auto_select()

    @property
    def backend_name(self) -> str:
        """Name of the active backend."""
        return self._backend_name

    def recognize_pil(self, image: "Image.Image") -> OcrResult:
        """Run OCR on a PIL Image. Returns OcrResult with text, lines, words."""
        if self._backend_name == "dll":
            return self._engine.recognize_pil(image)
        elif self._backend_name == "wine":
            return self._recognize_wine(image)
        elif self._backend_name == "onnx":
            return self._engine.recognize_pil(image)
        else:
            return OcrResult(error="No OCR backend available")

    def recognize_bytes(self, image_bytes: bytes) -> OcrResult:
        """Run OCR on raw image bytes (PNG/JPEG/etc)."""
        from io import BytesIO
        from PIL import Image as PILImage
        img = PILImage.open(BytesIO(image_bytes))
        return self.recognize_pil(img)

    # ── Backend initialization ──────────────────────────────────

    def _auto_select(self) -> None:
        """Try backends in priority order."""
        for backend in self.BACKENDS:
            try:
                self._init_backend(backend)
                logger.info("OCR backend: %s", self._backend_name)
                return
            except Exception as e:
                logger.debug("Backend %s unavailable: %s", backend, e)

        logger.warning("No OCR backend available!")
        self._backend_name = "none"

    def _init_backend(self, name: str) -> None:
        """Initialize a specific backend."""
        if name == "dll":
            self._init_dll()
        elif name == "wine":
            self._init_wine()
        elif name == "onnx":
            self._init_onnx()

    def _init_dll(self) -> None:
        """Initialize native Windows DLL backend."""
        if platform.system() != "Windows":
            raise RuntimeError("DLL backend requires Windows")
        from ocr.engine import OcrEngine
        self._engine = OcrEngine(ocr_data_dir=self._ocr_data)
        self._backend_name = "dll"

    def _init_wine(self) -> None:
        """Initialize Wine bridge backend."""
        if platform.system() == "Windows":
            raise RuntimeError("Wine backend is for Linux/macOS only")

        # Import and check requirements
        sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "tools"))
        from wine_bridge import WineBridge

        bridge = WineBridge(ocr_data_dir=self._ocr_data)
        checks = bridge.check_requirements()

        if not checks["wine_found"]:
            raise RuntimeError("Wine not installed")
        if not checks["dll_exists"]:
            raise RuntimeError(f"oneocr.dll not found in {self._ocr_data}")
        if not checks["model_exists"]:
            raise RuntimeError(f"oneocr.onemodel not found in {self._ocr_data}")

        # Compile loader if needed
        if not checks["loader_compiled"]:
            if not checks["mingw_found"]:
                raise RuntimeError(
                    "MinGW cross-compiler needed to build Wine loader. "
                    "Install: sudo apt install mingw-w64"
                )
            bridge.compile_loader()

        self._engine = bridge
        self._backend_name = "wine"

    def _init_onnx(self) -> None:
        """Initialize pure ONNX backend (fallback)."""
        from ocr.engine_onnx import OcrEngineOnnx
        self._engine = OcrEngineOnnx(ocr_data_dir=self._ocr_data)
        self._backend_name = "onnx"

    # ── Wine result conversion ─────────────────────────────────

    def _recognize_wine(self, image: "Image.Image") -> OcrResult:
        """Run OCR via Wine bridge and convert JSON → OcrResult."""
        try:
            raw = self._engine.recognize_pil(image)
        except Exception as e:
            return OcrResult(error=f"Wine bridge error: {e}")

        return self._json_to_ocr_result(raw)

    @staticmethod
    def _json_to_ocr_result(data: dict) -> OcrResult:
        """Convert Wine bridge JSON output to OcrResult dataclass."""
        if "error" in data:
            return OcrResult(error=data["error"])

        lines = []
        for line_data in data.get("lines", []):
            words = []
            for word_data in line_data.get("words", []):
                bbox = word_data.get("bbox", [0]*8)
                words.append(OcrWord(
                    text=word_data.get("text", ""),
                    confidence=word_data.get("confidence", 0.0),
                    bounding_rect=BoundingRect(
                        x1=bbox[0], y1=bbox[1], x2=bbox[2], y2=bbox[3],
                        x3=bbox[4], y3=bbox[5], x4=bbox[6], y4=bbox[7],
                    ),
                ))

            line_bbox = line_data.get("bbox", [0]*8)
            lines.append(OcrLine(
                text=line_data.get("text", ""),
                words=words,
                bounding_rect=BoundingRect(
                    x1=line_bbox[0], y1=line_bbox[1],
                    x2=line_bbox[2], y2=line_bbox[3],
                    x3=line_bbox[4], y3=line_bbox[5],
                    x4=line_bbox[6] if len(line_bbox) > 6 else 0,
                    y4=line_bbox[7] if len(line_bbox) > 7 else 0,
                ),
            ))

        full_text = "\n".join(line.text for line in lines if line.text)
        text_angle = data.get("text_angle")

        return OcrResult(text=full_text, text_angle=text_angle, lines=lines)