| """ |
| HTR Engine Plugin System - Base Classes and Registry |
| |
| This module defines the plugin architecture for HTR (Handwritten Text Recognition) engines. |
| All HTR engines (TrOCR, Qwen3, CRNN-CTC, Kraken, etc.) implement the HTREngine interface. |
| |
| Design principles: |
| - Abstraction: Each engine is self-contained and interchangeable |
| - Scalability: New engines can be added without modifying existing code |
| - Consistency: All engines expose the same interface to the GUI |
| - Flexibility: Each engine can have custom configuration widgets |
| """ |
|
|
| from abc import ABC, abstractmethod |
| from typing import Dict, Any, Optional, List |
| from dataclasses import dataclass |
| import os |
| import numpy as np |
|
|
| try: |
| from PyQt6.QtWidgets import QWidget, QVBoxLayout, QLabel |
| PYQT_AVAILABLE = True |
| except ImportError: |
| PYQT_AVAILABLE = False |
| QWidget = object |
|
|
|
|
| @dataclass |
| class TranscriptionResult: |
| """Result from HTR engine transcription.""" |
| text: str |
| confidence: float = 1.0 |
| metadata: Dict[str, Any] = None |
|
|
| def __post_init__(self): |
| if self.metadata is None: |
| self.metadata = {} |
|
|
|
|
| class HTREngine(ABC): |
| """Abstract base class for HTR engines. |
| |
| All HTR engines must implement this interface to be compatible |
| with the GUI and batch processing systems. |
| """ |
|
|
| @abstractmethod |
| def get_name(self) -> str: |
| """Get display name for the engine. |
| |
| Returns: |
| str: Human-readable engine name (e.g., "TrOCR", "Qwen3 VLM") |
| """ |
| pass |
|
|
| @abstractmethod |
| def get_description(self) -> str: |
| """Get brief description of the engine. |
| |
| Returns: |
| str: One-line description (e.g., "Transformer-based OCR for manuscripts") |
| """ |
| pass |
|
|
| @abstractmethod |
| def is_available(self) -> bool: |
| """Check if engine dependencies are installed and functional. |
| |
| Returns: |
| bool: True if engine can be used, False otherwise |
| """ |
| pass |
|
|
| @abstractmethod |
| def get_unavailable_reason(self) -> str: |
| """Get reason why engine is unavailable (if is_available() == False). |
| |
| Returns: |
| str: Explanation and installation instructions |
| """ |
| pass |
|
|
| @abstractmethod |
| def get_config_widget(self) -> QWidget: |
| """Create and return configuration widget for this engine. |
| |
| The widget should contain all engine-specific controls (model selection, |
| beam search, preprocessing options, etc.). The GUI will embed this widget |
| in the configuration panel. |
| |
| Returns: |
| QWidget: Qt widget with engine configuration controls |
| """ |
| pass |
|
|
| @abstractmethod |
| def get_config(self) -> Dict[str, Any]: |
| """Get current configuration from the config widget. |
| |
| This method extracts values from the widget controls and returns |
| them as a dictionary that can be passed to transcribe_line(). |
| |
| Returns: |
| Dict[str, Any]: Configuration parameters |
| """ |
| pass |
|
|
| @abstractmethod |
| def set_config(self, config: Dict[str, Any]): |
| """Set configuration values in the config widget. |
| |
| Used to restore saved settings when switching engines. |
| |
| Args: |
| config: Configuration parameters |
| """ |
| pass |
|
|
| @abstractmethod |
| def load_model(self, config: Dict[str, Any]) -> bool: |
| """Load the HTR model with given configuration. |
| |
| Args: |
| config: Configuration parameters (from get_config()) |
| |
| Returns: |
| bool: True if model loaded successfully, False otherwise |
| """ |
| pass |
|
|
| @abstractmethod |
| def unload_model(self): |
| """Unload model from memory to free resources. |
| |
| Called when switching to a different engine or closing the application. |
| """ |
| pass |
|
|
| @abstractmethod |
| def is_model_loaded(self) -> bool: |
| """Check if model is currently loaded. |
| |
| Returns: |
| bool: True if model is ready for inference |
| """ |
| pass |
|
|
| @abstractmethod |
| def transcribe_line(self, image: np.ndarray, config: Optional[Dict[str, Any]] = None) -> TranscriptionResult: |
| """Transcribe a single line image. |
| |
| Args: |
| image: Line image as numpy array (RGB, shape: H x W x 3) |
| config: Optional configuration overrides |
| |
| Returns: |
| TranscriptionResult: Transcription text and metadata |
| """ |
| pass |
|
|
| def requires_line_segmentation(self) -> bool: |
| """Check if engine requires pre-segmented lines or can process full pages. |
| |
| Returns: |
| bool: True if lines must be segmented first (TrOCR, CRNN-CTC), |
| False if engine handles full pages (Qwen3, Commercial APIs) |
| """ |
| return True |
|
|
| def transcribe_lines(self, images: List[np.ndarray], config: Optional[Dict[str, Any]] = None) -> List[TranscriptionResult]: |
| """Transcribe multiple line images (batch processing). |
| |
| Default implementation calls transcribe_line() for each image. |
| Engines can override this for optimized batch processing. |
| |
| Args: |
| images: List of line images |
| config: Optional configuration overrides |
| |
| Returns: |
| List[TranscriptionResult]: Transcriptions for each image |
| """ |
| return [self.transcribe_line(img, config) for img in images] |
|
|
| def supports_batch(self) -> bool: |
| """Check if engine supports optimized batch processing. |
| |
| Returns: |
| bool: True if transcribe_lines() is optimized, False if it just loops |
| """ |
| return False |
|
|
| def get_aliases(self) -> List[str]: |
| """Get alternative names for this engine (e.g., short CLI aliases). |
| |
| Returns: |
| List[str]: Alternative names accepted by the registry (default: none) |
| """ |
| return [] |
|
|
| def get_capabilities(self) -> Dict[str, bool]: |
| """Get engine capabilities. |
| |
| Returns: |
| Dict with capability flags: |
| - batch_processing: Supports batch inference |
| - confidence_scores: Returns confidence scores |
| - beam_search: Supports beam search decoding |
| - language_model: Uses language model for post-processing |
| - preprocessing: Has built-in preprocessing |
| """ |
| return { |
| "batch_processing": self.supports_batch(), |
| "confidence_scores": False, |
| "beam_search": False, |
| "language_model": False, |
| "preprocessing": False, |
| } |
|
|
|
|
| class HTREngineRegistry: |
| """Registry of available HTR engines. |
| |
| Manages discovery, registration, and instantiation of HTR engines. |
| """ |
|
|
| def __init__(self): |
| self.engines: List[HTREngine] = [] |
| self._engine_cache: Dict[str, HTREngine] = {} |
|
|
| def register(self, engine: HTREngine): |
| """Register an HTR engine. |
| |
| Args: |
| engine: HTREngine instance to register |
| """ |
| self.engines.append(engine) |
| self._engine_cache[engine.get_name()] = engine |
| for alias in engine.get_aliases(): |
| self._engine_cache[alias] = engine |
|
|
| def discover_engines(self): |
| """Automatically discover and register all available engines. |
| |
| Tries to import each engine module and registers it if available. |
| """ |
| if os.environ.get("POLYSCRIPTOR_DEMO_MODE") == "hf_space": |
| demo_engines = [ |
| ("CRNN-CTC", "engines.pylaia_engine", "PyLaiaEngine"), |
| ("Commercial APIs", "engines.commercial_api_engine", "CommercialAPIEngine"), |
| ("OpenWebUI", "engines.openwebui_engine", "OpenWebUIEngine"), |
| ] |
| for label, module_name, class_name in demo_engines: |
| try: |
| module = __import__(module_name, fromlist=[class_name]) |
| self.register(getattr(module, class_name)()) |
| except ImportError as e: |
| print(f"Warning: Failed to load {label} engine: {e}") |
| return |
|
|
| |
| try: |
| from engines.trocr_engine import TrOCREngine |
| self.register(TrOCREngine()) |
| except ImportError as e: |
| print(f"Warning: Failed to load TrOCR engine: {e}") |
|
|
| |
| try: |
| from engines.qwen3_engine import Qwen3Engine |
| self.register(Qwen3Engine()) |
| except ImportError as e: |
| print(f"Warning: Failed to load Qwen3 engine: {e}") |
|
|
| |
| try: |
| from engines.churro_engine import ChurroEngine |
| self.register(ChurroEngine()) |
| except ImportError as e: |
| print(f"Warning: Failed to load Churro engine: {e}") |
|
|
| |
| try: |
| from engines.pylaia_engine import PyLaiaEngine |
| self.register(PyLaiaEngine()) |
| except ImportError as e: |
| print(f"Warning: Failed to load CRNN-CTC engine: {e}") |
|
|
| |
| try: |
| from engines.kraken_engine import KrakenEngine |
| self.register(KrakenEngine()) |
| except ImportError as e: |
| print(f"Warning: Failed to load Kraken engine: {e}") |
|
|
| |
| try: |
| from engines.commercial_api_engine import CommercialAPIEngine |
| self.register(CommercialAPIEngine()) |
| except ImportError as e: |
| print(f"Warning: Failed to load Commercial API engine: {e}") |
|
|
| |
| try: |
| from engines.party_engine import PartyEngine |
| self.register(PartyEngine()) |
| except ImportError as e: |
| print(f"Warning: Failed to load Party engine: {e}") |
|
|
| |
| try: |
| from engines.openwebui_engine import OpenWebUIEngine |
| self.register(OpenWebUIEngine()) |
| except ImportError as e: |
| print(f"Warning: Failed to load OpenWebUI engine: {e}") |
|
|
| |
| try: |
| from engines.deepseek_ocr_engine import DeepSeekOCREngine |
| self.register(DeepSeekOCREngine()) |
| except ImportError as e: |
| print(f"Warning: Failed to load DeepSeek-OCR engine: {e}") |
|
|
| |
| try: |
| from engines.lighton_ocr_engine import LightOnOCREngine |
| self.register(LightOnOCREngine()) |
| except ImportError as e: |
| print(f"Warning: Failed to load LightOnOCR engine: {e}") |
|
|
| |
| try: |
| from engines.paddle_engine import PaddleOCREngine |
| self.register(PaddleOCREngine()) |
| except ImportError as e: |
| print(f"Warning: Failed to load PaddleOCR engine: {e}") |
|
|
| def get_available_engines(self) -> List[HTREngine]: |
| """Get list of engines with satisfied dependencies. |
| |
| Returns: |
| List[HTREngine]: Engines that can be used |
| """ |
| return [e for e in self.engines if e.is_available()] |
|
|
| def get_all_engines(self) -> List[HTREngine]: |
| """Get all registered engines (including unavailable ones). |
| |
| Returns: |
| List[HTREngine]: All registered engines |
| """ |
| return self.engines |
|
|
| def get_engine_by_name(self, name: str) -> Optional[HTREngine]: |
| """Get engine by display name. |
| |
| Args: |
| name: Engine display name |
| |
| Returns: |
| Optional[HTREngine]: Engine instance or None if not found |
| """ |
| return self._engine_cache.get(name) |
|
|
| def get_engine_names(self) -> List[str]: |
| """Get list of available engine names. |
| |
| Returns: |
| List[str]: Engine display names |
| """ |
| return [e.get_name() for e in self.get_available_engines()] |
|
|
|
|
| |
| _global_registry: Optional[HTREngineRegistry] = None |
|
|
|
|
| def get_global_registry() -> HTREngineRegistry: |
| """Get global HTR engine registry (singleton). |
| |
| Returns: |
| HTREngineRegistry: Global registry instance |
| """ |
| global _global_registry |
| if _global_registry is None: |
| _global_registry = HTREngineRegistry() |
| _global_registry.discover_engines() |
| return _global_registry |
|
|
|
|
| |
| def get_available_engine_names() -> List[str]: |
| """Get list of available engine names (convenience function). |
| |
| Returns: |
| List[str]: Engine display names |
| """ |
| return get_global_registry().get_engine_names() |
|
|