Spaces:
Runtime error
Runtime error
| # src/pipeline/load_model.py | |
| import logging | |
| import os | |
| logger = logging.getLogger(__name__) | |
| GGUF_MODEL_PATH = r"MODELS\gguf\llama-3.2-1b-instruct.Q4_K_M.gguf" | |
| def load_llm_model(): | |
| try: | |
| from llama_cpp import Llama | |
| if not os.path.exists(GGUF_MODEL_PATH): | |
| raise FileNotFoundError(f"GGUF model not found at: {GGUF_MODEL_PATH}") | |
| logger.info("Loading GGUF model...") | |
| print(f"π Loading model from {GGUF_MODEL_PATH}") | |
| llm = Llama( | |
| model_path=GGUF_MODEL_PATH, | |
| n_ctx=2048, # context window | |
| n_threads=4, # CPU threads β adjust to your core count | |
| n_gpu_layers=0, # 0 = CPU only; increase if you have GPU | |
| verbose=False, | |
| ) | |
| print("β Model fully loaded!") | |
| return llm, None # no separate tokenizer needed | |
| except Exception as e: | |
| import traceback | |
| print("β ERROR LOADING MODEL:") | |
| traceback.print_exc() | |
| raise e |