|
|
""" |
|
|
FinEE Backends - Abstract interface for LLM backends. |
|
|
|
|
|
All LLM backends must implement this interface. |
|
|
""" |
|
|
|
|
|
from abc import ABC, abstractmethod |
|
|
from typing import Optional, List, Dict, Any |
|
|
import logging |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
class BaseBackend(ABC): |
|
|
""" |
|
|
Abstract base class for LLM backends. |
|
|
|
|
|
Any backend (MLX, Transformers, llama.cpp) must implement these methods. |
|
|
""" |
|
|
|
|
|
def __init__(self, model_id: str = "Ranjit0034/finance-entity-extractor"): |
|
|
""" |
|
|
Initialize backend. |
|
|
|
|
|
Args: |
|
|
model_id: Hugging Face model ID or local path |
|
|
""" |
|
|
self.model_id = model_id |
|
|
self._model = None |
|
|
self._tokenizer = None |
|
|
self._loaded = False |
|
|
|
|
|
@property |
|
|
def name(self) -> str: |
|
|
"""Return backend name.""" |
|
|
return self.__class__.__name__ |
|
|
|
|
|
@abstractmethod |
|
|
def is_available(self) -> bool: |
|
|
""" |
|
|
Check if this backend can be used on the current system. |
|
|
|
|
|
Returns: |
|
|
True if all dependencies are installed and hardware is compatible |
|
|
""" |
|
|
raise NotImplementedError |
|
|
|
|
|
@abstractmethod |
|
|
def load_model(self, model_path: Optional[str] = None) -> bool: |
|
|
""" |
|
|
Load the model into memory. |
|
|
|
|
|
Args: |
|
|
model_path: Optional local path (overrides model_id) |
|
|
|
|
|
Returns: |
|
|
True if model loaded successfully |
|
|
""" |
|
|
raise NotImplementedError |
|
|
|
|
|
@abstractmethod |
|
|
def generate(self, prompt: str, max_tokens: int = 200, |
|
|
temperature: float = 0.1, **kwargs) -> str: |
|
|
""" |
|
|
Generate text from prompt. |
|
|
|
|
|
Args: |
|
|
prompt: Input prompt |
|
|
max_tokens: Maximum tokens to generate |
|
|
temperature: Sampling temperature |
|
|
**kwargs: Additional generation parameters |
|
|
|
|
|
Returns: |
|
|
Generated text |
|
|
""" |
|
|
raise NotImplementedError |
|
|
|
|
|
def generate_batch(self, prompts: List[str], max_tokens: int = 200, |
|
|
temperature: float = 0.1, **kwargs) -> List[str]: |
|
|
""" |
|
|
Generate text for multiple prompts. |
|
|
|
|
|
Default implementation calls generate() in a loop. |
|
|
Backends may override for batch optimization. |
|
|
|
|
|
Args: |
|
|
prompts: List of input prompts |
|
|
max_tokens: Maximum tokens to generate |
|
|
temperature: Sampling temperature |
|
|
**kwargs: Additional generation parameters |
|
|
|
|
|
Returns: |
|
|
List of generated texts |
|
|
""" |
|
|
return [self.generate(p, max_tokens, temperature, **kwargs) for p in prompts] |
|
|
|
|
|
def unload(self) -> None: |
|
|
""" |
|
|
Free model from memory. |
|
|
|
|
|
Call this when done with the model to free GPU/system memory. |
|
|
""" |
|
|
self._model = None |
|
|
self._tokenizer = None |
|
|
self._loaded = False |
|
|
logger.info(f"{self.name}: Model unloaded") |
|
|
|
|
|
@property |
|
|
def is_loaded(self) -> bool: |
|
|
"""Check if model is currently loaded.""" |
|
|
return self._loaded |
|
|
|
|
|
def get_info(self) -> Dict[str, Any]: |
|
|
"""Get backend information.""" |
|
|
return { |
|
|
'name': self.name, |
|
|
'model_id': self.model_id, |
|
|
'available': self.is_available(), |
|
|
'loaded': self.is_loaded, |
|
|
} |
|
|
|
|
|
def __repr__(self) -> str: |
|
|
status = "loaded" if self.is_loaded else "not loaded" |
|
|
return f"{self.name}(model={self.model_id}, {status})" |
|
|
|
|
|
|
|
|
class NoBackendError(Exception): |
|
|
"""Raised when no LLM backend is available.""" |
|
|
pass |
|
|
|
|
|
|
|
|
class BackendLoadError(Exception): |
|
|
"""Raised when backend fails to load model.""" |
|
|
pass |
|
|
|