ag1 / model_loader.py
nulltron's picture
Update model_loader.py
5ecd8d4 verified
Raw
History Blame Contribute Delete
1.07 kB
import os
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
# Quantized GGUF Model tracking paths
REPO_ID = "Qwen/Qwen2.5-0.5B-Instruct-GGUF"
MODEL_FILENAME = "qwen2.5-0.5b-instruct-q4_k_m.gguf"
print("[SYSTEM] Fetching quantized model files from HuggingFace Hub cluster...")
model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
print(f"[SYSTEM] Model secured safely at: {model_path}")
def get_local_llm_instance():
"""
Initializes LlamaCpp instance allocated to optimal CPU thread counts.
Context size restricted to 2048 to drastically speed up processing on 15GB RAM.
"""
print("[SYSTEM] Loading weights inside internal RAM parameters...")
llm = Llama(
model_path=model_path,
n_ctx=2048, # Optimized context tracking limit
n_threads=4, # Standard core optimizations for HuggingFace Free Tier
n_batch=512, # Batch sequence calculation limit
verbose=False
)
print("[SYSTEM] Model weights successfully attached!")
return llm