try:
    from llama_cpp import Llama
except ImportError:
    Llama = None

class OVGGUFManager:
    def __init__(self, model_path, n_ctx=2048):
        if Llama is None:
            raise ImportError("Please install `llama-cpp-python` to use GGUF models.")
            
        print(f"Loading GGUF Model from {model_path}...")
        self.model = Llama(
            model_path=model_path,
            n_ctx=n_ctx,
            n_threads=4, # Adjust based on CPU
            verbose=False
        )
        print("✅ GGUF Model Loaded.")

    def generate(self, prompt, max_new_tokens=100):
        # Llama.cpp generate
        output = self.model(
            prompt,
            max_tokens=max_new_tokens,
            stop=["User:", "\n\n"],
            echo=False
        )
        return output["choices"][0]["text"].strip()