| try: | |
| from llama_cpp import Llama | |
| except ImportError: | |
| Llama = None | |
| class OVGGUFManager: | |
| def __init__(self, model_path, n_ctx=2048): | |
| if Llama is None: | |
| raise ImportError("Please install `llama-cpp-python` to use GGUF models.") | |
| print(f"Loading GGUF Model from {model_path}...") | |
| self.model = Llama( | |
| model_path=model_path, | |
| n_ctx=n_ctx, | |
| n_threads=4, # Adjust based on CPU | |
| verbose=False | |
| ) | |
| print("✅ GGUF Model Loaded.") | |
| def generate(self, prompt, max_new_tokens=100): | |
| # Llama.cpp generate | |
| output = self.model( | |
| prompt, | |
| max_tokens=max_new_tokens, | |
| stop=["User:", "\n\n"], | |
| echo=False | |
| ) | |
| return output["choices"][0]["text"].strip() | |