File size: 830 Bytes
3930c05 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 | try:
from llama_cpp import Llama
except ImportError:
Llama = None
class OVGGUFManager:
def __init__(self, model_path, n_ctx=2048):
if Llama is None:
raise ImportError("Please install `llama-cpp-python` to use GGUF models.")
print(f"Loading GGUF Model from {model_path}...")
self.model = Llama(
model_path=model_path,
n_ctx=n_ctx,
n_threads=4, # Adjust based on CPU
verbose=False
)
print("✅ GGUF Model Loaded.")
def generate(self, prompt, max_new_tokens=100):
# Llama.cpp generate
output = self.model(
prompt,
max_tokens=max_new_tokens,
stop=["User:", "\n\n"],
echo=False
)
return output["choices"][0]["text"].strip()
|