Spaces:
Runtime error
Runtime error
File size: 1,005 Bytes
1914b78 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 | # src/pipeline/load_model.py
import logging
import os
logger = logging.getLogger(__name__)
GGUF_MODEL_PATH = r"MODELS\gguf\llama-3.2-1b-instruct.Q4_K_M.gguf"
def load_llm_model():
try:
from llama_cpp import Llama
if not os.path.exists(GGUF_MODEL_PATH):
raise FileNotFoundError(f"GGUF model not found at: {GGUF_MODEL_PATH}")
logger.info("Loading GGUF model...")
print(f"👉 Loading model from {GGUF_MODEL_PATH}")
llm = Llama(
model_path=GGUF_MODEL_PATH,
n_ctx=2048, # context window
n_threads=4, # CPU threads — adjust to your core count
n_gpu_layers=0, # 0 = CPU only; increase if you have GPU
verbose=False,
)
print("✅ Model fully loaded!")
return llm, None # no separate tokenizer needed
except Exception as e:
import traceback
print("❌ ERROR LOADING MODEL:")
traceback.print_exc()
raise e |