Spaces:

khubchand
/

ai-assistant-engine

Sleeping

App Files Files Community

ai-assistant-engine / llm /model_loader.py

khubchand

Update system model to Gemma 3 1B Instruct and humanize responses

9eed65c 16 days ago

raw

history blame contribute delete

2.12 kB

	import os
	import llama_cpp._internals

	# Silently fix LlamaModel destructor bug when initialization fails
	try:
	_original_close = llama_cpp._internals.LlamaModel.close
	def _safe_close(self, args, *kwargs):
	if not hasattr(self, 'sampler'):
	self.sampler = None
	return _original_close(self, args, *kwargs)
	llama_cpp._internals.LlamaModel.close = _safe_close
	except Exception:
	pass

	from llama_cpp import Llama
	from config import MODEL_PATH

	_llm_instance = None
	_llama_failed = False


	def get_llm() -> Llama:
	"""Lazy-load the LLM so the server starts even without a model file."""
	global _llm_instance, _llama_failed

	if _llama_failed:
	raise RuntimeError("llama-cpp-python previously failed to initialize on this hardware.")

	if _llm_instance is None:
	if not os.path.exists(MODEL_PATH):
	raise FileNotFoundError(
	f"\n\n Model file not found: {os.path.abspath(MODEL_PATH)}\n"
	f" Download a GGUF model and place it at: {MODEL_PATH}\n"
	f" Recommended: Gemma 3 1B Instruct (Q4_K_M)\n"
	f" URL: https://huggingface.co/bartowski/google_gemma-3-1b-it-GGUF\n"
	)
	try:
	cpu_count = os.cpu_count()
	default_threads = max(1, min(4, cpu_count if cpu_count else 2))
	threads = int(os.getenv("LLAMA_THREADS", str(default_threads)))
	_llm_instance = Llama(
	model_path=MODEL_PATH,
	n_ctx=2048,
	n_threads=threads,
	verbose=False,
	)
	except Exception as e:
	_llama_failed = True
	raise e

	return _llm_instance


	# Keep a module-level alias for backwards compatibility with existing imports
	class _LazyLlm:
	"""Proxy that loads the real model on first attribute access."""
	def __call__(self, args, *kwargs):
	return get_llm()(args, *kwargs)

	def __getattr__(self, name):
	return getattr(get_llm(), name)


	llm = _LazyLlm()