File size: 12,813 Bytes
78431ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
"""
HTR Engine Plugin System - Base Classes and Registry

This module defines the plugin architecture for HTR (Handwritten Text Recognition) engines.
All HTR engines (TrOCR, Qwen3, CRNN-CTC, Kraken, etc.) implement the HTREngine interface.

Design principles:
- Abstraction: Each engine is self-contained and interchangeable
- Scalability: New engines can be added without modifying existing code
- Consistency: All engines expose the same interface to the GUI
- Flexibility: Each engine can have custom configuration widgets
"""

from abc import ABC, abstractmethod
from typing import Dict, Any, Optional, List
from dataclasses import dataclass
import os
import numpy as np

try:
    from PyQt6.QtWidgets import QWidget, QVBoxLayout, QLabel
    PYQT_AVAILABLE = True
except ImportError:
    PYQT_AVAILABLE = False
    QWidget = object


@dataclass
class TranscriptionResult:
    """Result from HTR engine transcription."""
    text: str
    confidence: float = 1.0
    metadata: Dict[str, Any] = None

    def __post_init__(self):
        if self.metadata is None:
            self.metadata = {}


class HTREngine(ABC):
    """Abstract base class for HTR engines.

    All HTR engines must implement this interface to be compatible
    with the GUI and batch processing systems.
    """

    @abstractmethod
    def get_name(self) -> str:
        """Get display name for the engine.

        Returns:
            str: Human-readable engine name (e.g., "TrOCR", "Qwen3 VLM")
        """
        pass

    @abstractmethod
    def get_description(self) -> str:
        """Get brief description of the engine.

        Returns:
            str: One-line description (e.g., "Transformer-based OCR for manuscripts")
        """
        pass

    @abstractmethod
    def is_available(self) -> bool:
        """Check if engine dependencies are installed and functional.

        Returns:
            bool: True if engine can be used, False otherwise
        """
        pass

    @abstractmethod
    def get_unavailable_reason(self) -> str:
        """Get reason why engine is unavailable (if is_available() == False).

        Returns:
            str: Explanation and installation instructions
        """
        pass

    @abstractmethod
    def get_config_widget(self) -> QWidget:
        """Create and return configuration widget for this engine.

        The widget should contain all engine-specific controls (model selection,
        beam search, preprocessing options, etc.). The GUI will embed this widget
        in the configuration panel.

        Returns:
            QWidget: Qt widget with engine configuration controls
        """
        pass

    @abstractmethod
    def get_config(self) -> Dict[str, Any]:
        """Get current configuration from the config widget.

        This method extracts values from the widget controls and returns
        them as a dictionary that can be passed to transcribe_line().

        Returns:
            Dict[str, Any]: Configuration parameters
        """
        pass

    @abstractmethod
    def set_config(self, config: Dict[str, Any]):
        """Set configuration values in the config widget.

        Used to restore saved settings when switching engines.

        Args:
            config: Configuration parameters
        """
        pass

    @abstractmethod
    def load_model(self, config: Dict[str, Any]) -> bool:
        """Load the HTR model with given configuration.

        Args:
            config: Configuration parameters (from get_config())

        Returns:
            bool: True if model loaded successfully, False otherwise
        """
        pass

    @abstractmethod
    def unload_model(self):
        """Unload model from memory to free resources.

        Called when switching to a different engine or closing the application.
        """
        pass

    @abstractmethod
    def is_model_loaded(self) -> bool:
        """Check if model is currently loaded.

        Returns:
            bool: True if model is ready for inference
        """
        pass

    @abstractmethod
    def transcribe_line(self, image: np.ndarray, config: Optional[Dict[str, Any]] = None) -> TranscriptionResult:
        """Transcribe a single line image.

        Args:
            image: Line image as numpy array (RGB, shape: H x W x 3)
            config: Optional configuration overrides

        Returns:
            TranscriptionResult: Transcription text and metadata
        """
        pass

    def requires_line_segmentation(self) -> bool:
        """Check if engine requires pre-segmented lines or can process full pages.

        Returns:
            bool: True if lines must be segmented first (TrOCR, CRNN-CTC),
                  False if engine handles full pages (Qwen3, Commercial APIs)
        """
        return True  # Default: most engines need line segmentation

    def transcribe_lines(self, images: List[np.ndarray], config: Optional[Dict[str, Any]] = None) -> List[TranscriptionResult]:
        """Transcribe multiple line images (batch processing).

        Default implementation calls transcribe_line() for each image.
        Engines can override this for optimized batch processing.

        Args:
            images: List of line images
            config: Optional configuration overrides

        Returns:
            List[TranscriptionResult]: Transcriptions for each image
        """
        return [self.transcribe_line(img, config) for img in images]

    def supports_batch(self) -> bool:
        """Check if engine supports optimized batch processing.

        Returns:
            bool: True if transcribe_lines() is optimized, False if it just loops
        """
        return False

    def get_aliases(self) -> List[str]:
        """Get alternative names for this engine (e.g., short CLI aliases).

        Returns:
            List[str]: Alternative names accepted by the registry (default: none)
        """
        return []

    def get_capabilities(self) -> Dict[str, bool]:
        """Get engine capabilities.

        Returns:
            Dict with capability flags:
            - batch_processing: Supports batch inference
            - confidence_scores: Returns confidence scores
            - beam_search: Supports beam search decoding
            - language_model: Uses language model for post-processing
            - preprocessing: Has built-in preprocessing
        """
        return {
            "batch_processing": self.supports_batch(),
            "confidence_scores": False,
            "beam_search": False,
            "language_model": False,
            "preprocessing": False,
        }


class HTREngineRegistry:
    """Registry of available HTR engines.

    Manages discovery, registration, and instantiation of HTR engines.
    """

    def __init__(self):
        self.engines: List[HTREngine] = []
        self._engine_cache: Dict[str, HTREngine] = {}

    def register(self, engine: HTREngine):
        """Register an HTR engine.

        Args:
            engine: HTREngine instance to register
        """
        self.engines.append(engine)
        self._engine_cache[engine.get_name()] = engine
        for alias in engine.get_aliases():
            self._engine_cache[alias] = engine

    def discover_engines(self):
        """Automatically discover and register all available engines.

        Tries to import each engine module and registers it if available.
        """
        if os.environ.get("POLYSCRIPTOR_DEMO_MODE") == "hf_space":
            demo_engines = [
                ("CRNN-CTC", "engines.pylaia_engine", "PyLaiaEngine"),
                ("Commercial APIs", "engines.commercial_api_engine", "CommercialAPIEngine"),
                ("OpenWebUI", "engines.openwebui_engine", "OpenWebUIEngine"),
            ]
            for label, module_name, class_name in demo_engines:
                try:
                    module = __import__(module_name, fromlist=[class_name])
                    self.register(getattr(module, class_name)())
                except ImportError as e:
                    print(f"Warning: Failed to load {label} engine: {e}")
            return

        # Import and register TrOCR engine
        try:
            from engines.trocr_engine import TrOCREngine
            self.register(TrOCREngine())
        except ImportError as e:
            print(f"Warning: Failed to load TrOCR engine: {e}")

        # Import and register Qwen3 engine
        try:
            from engines.qwen3_engine import Qwen3Engine
            self.register(Qwen3Engine())
        except ImportError as e:
            print(f"Warning: Failed to load Qwen3 engine: {e}")

        # Import and register Churro engine
        try:
            from engines.churro_engine import ChurroEngine
            self.register(ChurroEngine())
        except ImportError as e:
            print(f"Warning: Failed to load Churro engine: {e}")

        # Import and register CRNN-CTC engine
        try:
            from engines.pylaia_engine import PyLaiaEngine
            self.register(PyLaiaEngine())
        except ImportError as e:
            print(f"Warning: Failed to load CRNN-CTC engine: {e}")

        # Import and register Kraken engine
        try:
            from engines.kraken_engine import KrakenEngine
            self.register(KrakenEngine())
        except ImportError as e:
            print(f"Warning: Failed to load Kraken engine: {e}")

        # Import and register Commercial API engine
        try:
            from engines.commercial_api_engine import CommercialAPIEngine
            self.register(CommercialAPIEngine())
        except ImportError as e:
            print(f"Warning: Failed to load Commercial API engine: {e}")

        # Import and register Party engine
        try:
            from engines.party_engine import PartyEngine
            self.register(PartyEngine())
        except ImportError as e:
            print(f"Warning: Failed to load Party engine: {e}")

        # Import and register OpenWebUI engine
        try:
            from engines.openwebui_engine import OpenWebUIEngine
            self.register(OpenWebUIEngine())
        except ImportError as e:
            print(f"Warning: Failed to load OpenWebUI engine: {e}")

        # Import and register DeepSeek-OCR engine
        try:
            from engines.deepseek_ocr_engine import DeepSeekOCREngine
            self.register(DeepSeekOCREngine())
        except ImportError as e:
            print(f"Warning: Failed to load DeepSeek-OCR engine: {e}")

        # Import and register LightOnOCR engine
        try:
            from engines.lighton_ocr_engine import LightOnOCREngine
            self.register(LightOnOCREngine())
        except ImportError as e:
            print(f"Warning: Failed to load LightOnOCR engine: {e}")

        # Import and register PaddleOCR engine
        try:
            from engines.paddle_engine import PaddleOCREngine
            self.register(PaddleOCREngine())
        except ImportError as e:
            print(f"Warning: Failed to load PaddleOCR engine: {e}")

    def get_available_engines(self) -> List[HTREngine]:
        """Get list of engines with satisfied dependencies.

        Returns:
            List[HTREngine]: Engines that can be used
        """
        return [e for e in self.engines if e.is_available()]

    def get_all_engines(self) -> List[HTREngine]:
        """Get all registered engines (including unavailable ones).

        Returns:
            List[HTREngine]: All registered engines
        """
        return self.engines

    def get_engine_by_name(self, name: str) -> Optional[HTREngine]:
        """Get engine by display name.

        Args:
            name: Engine display name

        Returns:
            Optional[HTREngine]: Engine instance or None if not found
        """
        return self._engine_cache.get(name)

    def get_engine_names(self) -> List[str]:
        """Get list of available engine names.

        Returns:
            List[str]: Engine display names
        """
        return [e.get_name() for e in self.get_available_engines()]


# Global registry instance (singleton pattern)
_global_registry: Optional[HTREngineRegistry] = None


def get_global_registry() -> HTREngineRegistry:
    """Get global HTR engine registry (singleton).

    Returns:
        HTREngineRegistry: Global registry instance
    """
    global _global_registry
    if _global_registry is None:
        _global_registry = HTREngineRegistry()
        _global_registry.discover_engines()
    return _global_registry


# Convenience function for GUI
def get_available_engine_names() -> List[str]:
    """Get list of available engine names (convenience function).

    Returns:
        List[str]: Engine display names
    """
    return get_global_registry().get_engine_names()