ag2

Paused

ag2 / model_loader.py

Update model_loader.py

4bffb86 verified 2 days ago

1.27 kB

	import os
	from huggingface_hub import hf_hub_download
	from llama_cpp import Llama

	# Quantized GGUF Model tracking paths (100% verified single-file repo)
	REPO_ID = "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF"
	MODEL_FILENAME = "Meta-Llama-3-8B-Instruct.Q4_K_M.gguf"

	print("[SYSTEM] Fetching verified Meta-Llama-3-8B-Instruct GGUF from Hub...")
	try:
	model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
	print(f"[SYSTEM] Model secured safely at: {model_path}")
	except Exception as download_err:
	print(f"[CRITICAL DOWNLOAD ERROR] Failed to fetch target file: {download_err}")
	raise download_err

	def get_local_llm_instance():
	"""
	Initializes LlamaCpp instance allocated to optimal CPU thread counts.
	Context size restricted to 2048 to drastically speed up processing on 15GB RAM.
	"""
	print("[SYSTEM] Loading weights inside internal RAM parameters...")
	llm = Llama(
	model_path=model_path,
	n_ctx=2048, # Optimized context tracking limit
	n_threads=4, # Standard core optimizations for HuggingFace Free Tier
	n_batch=512, # Batch sequence calculation limit
	verbose=False
	)
	print("[SYSTEM] Model weights successfully attached!")
	return llm