UPIF-Demo / model_loader.py
yashsecdev's picture
Deploy: Limitless UPIF Stack (Docker/FastAPI/React)
b28041c
import os
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
# Configuration
REPO_ID = "MaziyarPanahi/Llama-3-8B-Instruct-v0.3-GGUF"
FILENAME = "Llama-3-8B-Instruct-v0.3.Q4_K_M.gguf"
MODEL_PATH = os.path.join(os.path.dirname(__file__), "models", FILENAME)
def get_model():
"""
Downloads the model if not present, then loads it into memory.
Returns a Llama instance.
"""
if not os.path.exists(MODEL_PATH):
print(f"⬇️ Model not found. Downloading {FILENAME} from Hugging Face...")
os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
hf_hub_download(
repo_id=REPO_ID,
filename=FILENAME,
local_dir=os.path.dirname(MODEL_PATH),
local_dir_use_symlinks=False
)
print("✅ Download complete.")
else:
print(f"✅ Model found at {MODEL_PATH}")
print("🚀 Loading Llama-3 into memory (CPU Mode)...")
# Initialize Llama (Free Tier: 2 vCPU, 16GB RAM)
# n_ctx=2048 (Context window)
llm = Llama(
model_path=MODEL_PATH,
n_ctx=2048,
n_threads=2, # Optimizing for HF Spaces Free Tier
verbose=False
)
return llm
# Global instance for re-use
_llm_instance = None
def generate_response(prompt: str, system_prompt: str = "") -> str:
global _llm_instance
if _llm_instance is None:
_llm_instance = get_model()
full_prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_prompt}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
output = _llm_instance(
full_prompt,
max_tokens=512,
stop=["<|eot_id|>"],
echo=False
)
return output['choices'][0]['text']