"""
FinEE llama.cpp Backend - CPU fallback using llama-cpp-python.

Works on any platform (Linux, Windows, macOS) without GPU.
"""

import logging
from typing import Optional
from pathlib import Path

from .base import BaseBackend, BackendLoadError

logger = logging.getLogger(__name__)

# Check for llama-cpp-python availability
try:
    from llama_cpp import Llama
    HAS_LLAMA_CPP = True
except ImportError:
    HAS_LLAMA_CPP = False


class LlamaCppBackend(BaseBackend):
    """
    llama.cpp backend for CPU inference.
    
    Works on any platform without GPU requirements.
    Uses GGUF format models.
    
    Requirements:
    - llama-cpp-python package
    - GGUF format model file
    """
    
    def __init__(self, model_id: str = "Ranjit0034/finance-entity-extractor",
                 n_ctx: int = 4096,
                 n_threads: Optional[int] = None):
        """
        Initialize llama.cpp backend.
        
        Args:
            model_id: Hugging Face model ID or local GGUF path
            n_ctx: Context length
            n_threads: Number of CPU threads (None = auto)
        """
        super().__init__(model_id)
        self.n_ctx = n_ctx
        self.n_threads = n_threads
        self._gguf_path: Optional[str] = None
    
    def is_available(self) -> bool:
        """Check if llama-cpp-python is available."""
        return HAS_LLAMA_CPP
    
    def _find_gguf_file(self, path: str) -> Optional[str]:
        """
        Find GGUF file in a directory or verify path.
        
        Args:
            path: Directory or file path
            
        Returns:
            Path to GGUF file or None
        """
        path_obj = Path(path)
        
        # If it's a file, check if it's GGUF
        if path_obj.is_file() and path_obj.suffix == '.gguf':
            return str(path_obj)
        
        # If it's a directory, look for GGUF files
        if path_obj.is_dir():
            gguf_files = list(path_obj.glob('*.gguf'))
            if gguf_files:
                # Prefer q4_k_m, then f16, then any
                for pattern in ['*q4_k_m*', '*f16*', '*']:
                    for f in gguf_files:
                        if pattern == '*' or pattern.replace('*', '') in f.name.lower():
                            return str(f)
        
        return None
    
    def load_model(self, model_path: Optional[str] = None) -> bool:
        """
        Load GGUF model with llama.cpp.
        
        Args:
            model_path: Path to GGUF file or directory containing GGUF
            
        Returns:
            True if successful
        """
        if not HAS_LLAMA_CPP:
            raise BackendLoadError("llama-cpp-python not installed. Run: pip install llama-cpp-python")
        
        path = model_path or self.model_id
        
        # Find GGUF file
        gguf_path = self._find_gguf_file(path)
        
        if not gguf_path:
            # Try to download from HuggingFace
            try:
                from huggingface_hub import hf_hub_download
                gguf_path = hf_hub_download(
                    repo_id=self.model_id,
                    filename="finance-extractor-v8-f16.gguf"
                )
            except Exception as e:
                raise BackendLoadError(f"Could not find GGUF file: {path}. Error: {e}")
        
        try:
            logger.info(f"Loading GGUF model: {gguf_path}")
            
            self._model = Llama(
                model_path=gguf_path,
                n_ctx=self.n_ctx,
                n_threads=self.n_threads,
                verbose=False,
            )
            
            self._gguf_path = gguf_path
            self._loaded = True
            logger.info("llama.cpp model loaded successfully")
            return True
            
        except Exception as e:
            logger.error(f"Failed to load llama.cpp model: {e}")
            raise BackendLoadError(f"llama.cpp model load failed: {e}")
    
    def generate(self, prompt: str, max_tokens: int = 200,
                 temperature: float = 0.1, **kwargs) -> str:
        """
        Generate text using llama.cpp.
        
        Args:
            prompt: Input prompt
            max_tokens: Maximum tokens to generate
            temperature: Sampling temperature
            
        Returns:
            Generated text
        """
        if not self._loaded:
            self.load_model()
        
        try:
            output = self._model(
                prompt,
                max_tokens=max_tokens,
                temperature=temperature,
                stop=["\n\n", "</s>", "<|end|>"],
                echo=False,
                **kwargs
            )
            
            return output["choices"][0]["text"]
            
        except Exception as e:
            logger.error(f"llama.cpp generation failed: {e}")
            return ""
    
    def get_info(self):
        """Get backend info including GGUF path."""
        info = super().get_info()
        info['gguf_path'] = self._gguf_path
        info['n_ctx'] = self.n_ctx
        info['n_threads'] = self.n_threads
        return info