vaibhavlakshmi's picture
Upload folder using huggingface_hub
3930c05 verified
raw
history blame contribute delete
830 Bytes
try:
from llama_cpp import Llama
except ImportError:
Llama = None
class OVGGUFManager:
def __init__(self, model_path, n_ctx=2048):
if Llama is None:
raise ImportError("Please install `llama-cpp-python` to use GGUF models.")
print(f"Loading GGUF Model from {model_path}...")
self.model = Llama(
model_path=model_path,
n_ctx=n_ctx,
n_threads=4, # Adjust based on CPU
verbose=False
)
print("✅ GGUF Model Loaded.")
def generate(self, prompt, max_new_tokens=100):
# Llama.cpp generate
output = self.model(
prompt,
max_tokens=max_new_tokens,
stop=["User:", "\n\n"],
echo=False
)
return output["choices"][0]["text"].strip()