File size: 1,005 Bytes
1914b78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# src/pipeline/load_model.py

import logging
import os

logger = logging.getLogger(__name__)

GGUF_MODEL_PATH = r"MODELS\gguf\llama-3.2-1b-instruct.Q4_K_M.gguf"

def load_llm_model():
    try:
        from llama_cpp import Llama

        if not os.path.exists(GGUF_MODEL_PATH):
            raise FileNotFoundError(f"GGUF model not found at: {GGUF_MODEL_PATH}")

        logger.info("Loading GGUF model...")
        print(f"👉 Loading model from {GGUF_MODEL_PATH}")

        llm = Llama(
            model_path=GGUF_MODEL_PATH,
            n_ctx=2048,        # context window
            n_threads=4,       # CPU threads — adjust to your core count
            n_gpu_layers=0,    # 0 = CPU only; increase if you have GPU
            verbose=False,
        )

        print("✅ Model fully loaded!")
        return llm, None       # no separate tokenizer needed

    except Exception as e:
        import traceback
        print("❌ ERROR LOADING MODEL:")
        traceback.print_exc()
        raise e