DocUA's picture
feat: update ggml kernels, webui components, model templates, and build configurations
eb133b8
"""
Backend interface for LightOnOCR-1B inference.
Supports both PyTorch and GGUF backends.
"""
from abc import ABC, abstractmethod
from typing import List, Tuple
from PIL import Image
class OCRBackend(ABC):
"""Abstract base class for OCR backends."""
@abstractmethod
def load_model(self):
"""Load the OCR model."""
pass
@abstractmethod
def process_image(self, image: Image.Image, temperature: float = 0.1) -> str:
"""
Process a single image and return extracted text.
Args:
image: PIL Image to process
temperature: Sampling temperature (0 = greedy)
Returns:
Extracted text as string
"""
pass
@abstractmethod
def get_backend_info(self) -> dict:
"""Return backend information (name, device, memory usage, etc.)."""
pass
def get_available_backends() -> List[str]:
"""Return list of available backend names."""
backends = ["pytorch"]
# Check for GGUF support (binary or python package)
from pathlib import Path
project_root = Path(__file__).parent.parent
cli_path = project_root / "llama.cpp" / "build" / "bin" / "llama-mtmd-cli"
if cli_path.exists():
backends.append("gguf")
else:
# Fallback check for python package (though we prefer CLI now)
try:
import llama_cpp
backends.append("gguf")
except ImportError:
pass
return backends
def create_backend(backend_name: str) -> OCRBackend:
"""
Factory function to create backend instance.
Args:
backend_name: "pytorch" or "gguf"
Returns:
OCRBackend instance
"""
if backend_name == "pytorch":
from .pytorch_backend import PyTorchBackend
return PyTorchBackend()
elif backend_name == "gguf":
from .gguf_backend import GGUFBackend
return GGUFBackend()
else:
raise ValueError(f"Unknown backend: {backend_name}. Available: {get_available_backends()}")