| | """ |
| | FinEE llama.cpp Backend - CPU fallback using llama-cpp-python. |
| | |
| | Works on any platform (Linux, Windows, macOS) without GPU. |
| | """ |
| |
|
| | import logging |
| | from typing import Optional |
| | from pathlib import Path |
| |
|
| | from .base import BaseBackend, BackendLoadError |
| |
|
| | logger = logging.getLogger(__name__) |
| |
|
| | |
| | try: |
| | from llama_cpp import Llama |
| | HAS_LLAMA_CPP = True |
| | except ImportError: |
| | HAS_LLAMA_CPP = False |
| |
|
| |
|
| | class LlamaCppBackend(BaseBackend): |
| | """ |
| | llama.cpp backend for CPU inference. |
| | |
| | Works on any platform without GPU requirements. |
| | Uses GGUF format models. |
| | |
| | Requirements: |
| | - llama-cpp-python package |
| | - GGUF format model file |
| | """ |
| | |
| | def __init__(self, model_id: str = "Ranjit0034/finance-entity-extractor", |
| | n_ctx: int = 4096, |
| | n_threads: Optional[int] = None): |
| | """ |
| | Initialize llama.cpp backend. |
| | |
| | Args: |
| | model_id: Hugging Face model ID or local GGUF path |
| | n_ctx: Context length |
| | n_threads: Number of CPU threads (None = auto) |
| | """ |
| | super().__init__(model_id) |
| | self.n_ctx = n_ctx |
| | self.n_threads = n_threads |
| | self._gguf_path: Optional[str] = None |
| | |
| | def is_available(self) -> bool: |
| | """Check if llama-cpp-python is available.""" |
| | return HAS_LLAMA_CPP |
| | |
| | def _find_gguf_file(self, path: str) -> Optional[str]: |
| | """ |
| | Find GGUF file in a directory or verify path. |
| | |
| | Args: |
| | path: Directory or file path |
| | |
| | Returns: |
| | Path to GGUF file or None |
| | """ |
| | path_obj = Path(path) |
| | |
| | |
| | if path_obj.is_file() and path_obj.suffix == '.gguf': |
| | return str(path_obj) |
| | |
| | |
| | if path_obj.is_dir(): |
| | gguf_files = list(path_obj.glob('*.gguf')) |
| | if gguf_files: |
| | |
| | for pattern in ['*q4_k_m*', '*f16*', '*']: |
| | for f in gguf_files: |
| | if pattern == '*' or pattern.replace('*', '') in f.name.lower(): |
| | return str(f) |
| | |
| | return None |
| | |
| | def load_model(self, model_path: Optional[str] = None) -> bool: |
| | """ |
| | Load GGUF model with llama.cpp. |
| | |
| | Args: |
| | model_path: Path to GGUF file or directory containing GGUF |
| | |
| | Returns: |
| | True if successful |
| | """ |
| | if not HAS_LLAMA_CPP: |
| | raise BackendLoadError("llama-cpp-python not installed. Run: pip install llama-cpp-python") |
| | |
| | path = model_path or self.model_id |
| | |
| | |
| | gguf_path = self._find_gguf_file(path) |
| | |
| | if not gguf_path: |
| | |
| | try: |
| | from huggingface_hub import hf_hub_download |
| | gguf_path = hf_hub_download( |
| | repo_id=self.model_id, |
| | filename="finance-extractor-v8-f16.gguf" |
| | ) |
| | except Exception as e: |
| | raise BackendLoadError(f"Could not find GGUF file: {path}. Error: {e}") |
| | |
| | try: |
| | logger.info(f"Loading GGUF model: {gguf_path}") |
| | |
| | self._model = Llama( |
| | model_path=gguf_path, |
| | n_ctx=self.n_ctx, |
| | n_threads=self.n_threads, |
| | verbose=False, |
| | ) |
| | |
| | self._gguf_path = gguf_path |
| | self._loaded = True |
| | logger.info("llama.cpp model loaded successfully") |
| | return True |
| | |
| | except Exception as e: |
| | logger.error(f"Failed to load llama.cpp model: {e}") |
| | raise BackendLoadError(f"llama.cpp model load failed: {e}") |
| | |
| | def generate(self, prompt: str, max_tokens: int = 200, |
| | temperature: float = 0.1, **kwargs) -> str: |
| | """ |
| | Generate text using llama.cpp. |
| | |
| | Args: |
| | prompt: Input prompt |
| | max_tokens: Maximum tokens to generate |
| | temperature: Sampling temperature |
| | |
| | Returns: |
| | Generated text |
| | """ |
| | if not self._loaded: |
| | self.load_model() |
| | |
| | try: |
| | output = self._model( |
| | prompt, |
| | max_tokens=max_tokens, |
| | temperature=temperature, |
| | stop=["\n\n", "</s>", "<|end|>"], |
| | echo=False, |
| | **kwargs |
| | ) |
| | |
| | return output["choices"][0]["text"] |
| | |
| | except Exception as e: |
| | logger.error(f"llama.cpp generation failed: {e}") |
| | return "" |
| | |
| | def get_info(self): |
| | """Get backend info including GGUF path.""" |
| | info = super().get_info() |
| | info['gguf_path'] = self._gguf_path |
| | info['n_ctx'] = self.n_ctx |
| | info['n_threads'] = self.n_threads |
| | return info |
| |
|