AI-powered-SQL / src /pipeline /load_model.py
github-actions
Auto deploy from GitHub Actions
1914b78
# src/pipeline/load_model.py
import logging
import os
logger = logging.getLogger(__name__)
GGUF_MODEL_PATH = r"MODELS\gguf\llama-3.2-1b-instruct.Q4_K_M.gguf"
def load_llm_model():
try:
from llama_cpp import Llama
if not os.path.exists(GGUF_MODEL_PATH):
raise FileNotFoundError(f"GGUF model not found at: {GGUF_MODEL_PATH}")
logger.info("Loading GGUF model...")
print(f"πŸ‘‰ Loading model from {GGUF_MODEL_PATH}")
llm = Llama(
model_path=GGUF_MODEL_PATH,
n_ctx=2048, # context window
n_threads=4, # CPU threads β€” adjust to your core count
n_gpu_layers=0, # 0 = CPU only; increase if you have GPU
verbose=False,
)
print("βœ… Model fully loaded!")
return llm, None # no separate tokenizer needed
except Exception as e:
import traceback
print("❌ ERROR LOADING MODEL:")
traceback.print_exc()
raise e