try: from llama_cpp import Llama except ImportError: Llama = None class OVGGUFManager: def __init__(self, model_path, n_ctx=2048): if Llama is None: raise ImportError("Please install `llama-cpp-python` to use GGUF models.") print(f"Loading GGUF Model from {model_path}...") self.model = Llama( model_path=model_path, n_ctx=n_ctx, n_threads=4, # Adjust based on CPU verbose=False ) print("✅ GGUF Model Loaded.") def generate(self, prompt, max_new_tokens=100): # Llama.cpp generate output = self.model( prompt, max_tokens=max_new_tokens, stop=["User:", "\n\n"], echo=False ) return output["choices"][0]["text"].strip()