import os
import llama_cpp._internals

# Silently fix LlamaModel destructor bug when initialization fails
try:
    _original_close = llama_cpp._internals.LlamaModel.close
    def _safe_close(self, *args, **kwargs):
        if not hasattr(self, 'sampler'):
            self.sampler = None
        return _original_close(self, *args, **kwargs)
    llama_cpp._internals.LlamaModel.close = _safe_close
except Exception:
    pass

from llama_cpp import Llama
from config import MODEL_PATH

_llm_instance = None
_llama_failed = False


def get_llm() -> Llama:
    """Lazy-load the LLM so the server starts even without a model file."""
    global _llm_instance, _llama_failed

    if _llama_failed:
        raise RuntimeError("llama-cpp-python previously failed to initialize on this hardware.")

    if _llm_instance is None:
        if not os.path.exists(MODEL_PATH):
            raise FileNotFoundError(
                f"\n\n  Model file not found: {os.path.abspath(MODEL_PATH)}\n"
                f"  Download a GGUF model and place it at:  {MODEL_PATH}\n"
                f"  Recommended: Gemma 3 1B Instruct (Q4_K_M)\n"
                f"  URL: https://huggingface.co/bartowski/google_gemma-3-1b-it-GGUF\n"
            )
        try:
            cpu_count = os.cpu_count()
            default_threads = max(1, min(4, cpu_count if cpu_count else 2))
            threads = int(os.getenv("LLAMA_THREADS", str(default_threads)))
            _llm_instance = Llama(
                model_path=MODEL_PATH,
                n_ctx=2048,
                n_threads=threads,
                verbose=False,
            )
        except Exception as e:
            _llama_failed = True
            raise e

    return _llm_instance


# Keep a module-level alias for backwards compatibility with existing imports
class _LazyLlm:
    """Proxy that loads the real model on first attribute access."""
    def __call__(self, *args, **kwargs):
        return get_llm()(*args, **kwargs)

    def __getattr__(self, name):
        return getattr(get_llm(), name)


llm = _LazyLlm()