Ranjit Behera

FinEE v1.0 - Finance Entity Extractor

dcc24f8 about 2 months ago

5.15 kB

	"""
	FinEE llama.cpp Backend - CPU fallback using llama-cpp-python.

	Works on any platform (Linux, Windows, macOS) without GPU.
	"""

	import logging
	from typing import Optional
	from pathlib import Path

	from .base import BaseBackend, BackendLoadError

	logger = logging.getLogger(__name__)

	# Check for llama-cpp-python availability
	try:
	from llama_cpp import Llama
	HAS_LLAMA_CPP = True
	except ImportError:
	HAS_LLAMA_CPP = False


	class LlamaCppBackend(BaseBackend):
	"""
	llama.cpp backend for CPU inference.

	Works on any platform without GPU requirements.
	Uses GGUF format models.

	Requirements:
	- llama-cpp-python package
	- GGUF format model file
	"""

	def __init__(self, model_id: str = "Ranjit0034/finance-entity-extractor",
	n_ctx: int = 4096,
	n_threads: Optional[int] = None):
	"""
	Initialize llama.cpp backend.

	Args:
	model_id: Hugging Face model ID or local GGUF path
	n_ctx: Context length
	n_threads: Number of CPU threads (None = auto)
	"""
	super().__init__(model_id)
	self.n_ctx = n_ctx
	self.n_threads = n_threads
	self._gguf_path: Optional[str] = None

	def is_available(self) -> bool:
	"""Check if llama-cpp-python is available."""
	return HAS_LLAMA_CPP

	def _find_gguf_file(self, path: str) -> Optional[str]:
	"""
	Find GGUF file in a directory or verify path.

	Args:
	path: Directory or file path

	Returns:
	Path to GGUF file or None
	"""
	path_obj = Path(path)

	# If it's a file, check if it's GGUF
	if path_obj.is_file() and path_obj.suffix == '.gguf':
	return str(path_obj)

	# If it's a directory, look for GGUF files
	if path_obj.is_dir():
	gguf_files = list(path_obj.glob('*.gguf'))
	if gguf_files:
	# Prefer q4_k_m, then f16, then any
	for pattern in ['q4_k_m', 'f16', '*']:
	for f in gguf_files:
	if pattern == '' or pattern.replace('', '') in f.name.lower():
	return str(f)

	return None

	def load_model(self, model_path: Optional[str] = None) -> bool:
	"""
	Load GGUF model with llama.cpp.

	Args:
	model_path: Path to GGUF file or directory containing GGUF

	Returns:
	True if successful
	"""
	if not HAS_LLAMA_CPP:
	raise BackendLoadError("llama-cpp-python not installed. Run: pip install llama-cpp-python")

	path = model_path or self.model_id

	# Find GGUF file
	gguf_path = self._find_gguf_file(path)

	if not gguf_path:
	# Try to download from HuggingFace
	try:
	from huggingface_hub import hf_hub_download
	gguf_path = hf_hub_download(
	repo_id=self.model_id,
	filename="finance-extractor-v8-f16.gguf"
	)
	except Exception as e:
	raise BackendLoadError(f"Could not find GGUF file: {path}. Error: {e}")

	try:
	logger.info(f"Loading GGUF model: {gguf_path}")

	self._model = Llama(
	model_path=gguf_path,
	n_ctx=self.n_ctx,
	n_threads=self.n_threads,
	verbose=False,
	)

	self._gguf_path = gguf_path
	self._loaded = True
	logger.info("llama.cpp model loaded successfully")
	return True

	except Exception as e:
	logger.error(f"Failed to load llama.cpp model: {e}")
	raise BackendLoadError(f"llama.cpp model load failed: {e}")

	def generate(self, prompt: str, max_tokens: int = 200,
	temperature: float = 0.1, **kwargs) -> str:
	"""
	Generate text using llama.cpp.

	Args:
	prompt: Input prompt
	max_tokens: Maximum tokens to generate
	temperature: Sampling temperature

	Returns:
	Generated text
	"""
	if not self._loaded:
	self.load_model()

	try:
	output = self._model(
	prompt,
	max_tokens=max_tokens,
	temperature=temperature,
	stop=["\n\n", "</s>", "<\|end\|>"],
	echo=False,
	**kwargs
	)

	return output["choices"][0]["text"]

	except Exception as e:
	logger.error(f"llama.cpp generation failed: {e}")
	return ""

	def get_info(self):
	"""Get backend info including GGUF path."""
	info = super().get_info()
	info['gguf_path'] = self._gguf_path
	info['n_ctx'] = self.n_ctx
	info['n_threads'] = self.n_threads
	return info