Spaces:

LeonardoMdSA
/

LLMOps-RAG_solution-HS_spaces

Running

App Files Files Community

LLMOps-RAG_solution-HS_spaces / multi_doc_chat /model_loader.py

LeonardoMdSA

push to Spaces

5366fc0 2 months ago

raw

history blame contribute delete

3.15 kB

	from pathlib import Path
	from typing import List, Optional
	import yaml
	import numpy as np

	try:
	from llama_cpp import Llama
	except Exception:
	Llama = None

	try:
	from sentence_transformers import SentenceTransformer
	except Exception:
	SentenceTransformer = None


	# Load config
	CFG_PATH = Path(__file__).resolve().parent.parent.parent / "configs" / "default.yaml"
	if CFG_PATH.exists():
	with open(CFG_PATH, "r") as f:
	_CFG = yaml.safe_load(f)
	else:
	_CFG = {
	"model_path": "models/qwen2.5-0.5b-instruct-q4_0.gguf",
	"embed_model": "sentence-transformers/all-MiniLM-L6-v2",
	"faiss_dir": "faiss_index",
	"chunk_size": 1000,
	"chunk_overlap": 200
	}


	class ModelLoader:
	def __init__(
	self,
	model_path: Optional[str] = None,
	embed_model_name: Optional[str] = None,
	faiss_dir: Optional[str] = None,
	n_ctx: int = 2048, # 0.5B models cannot handle 4k context well
	):
	self.model_path = Path(model_path or _CFG.get("model_path"))
	self.embed_model_name = embed_model_name or _CFG.get("embed_model")
	self.faiss_dir = Path(faiss_dir or _CFG.get("faiss_dir"))
	self.n_ctx = n_ctx

	self.llm = None
	self.embedder = None
	self.index = None
	self.documents: List[str] = []

	self._load_all()

	def _load_llm(self):
	if not self.model_path.exists():
	print(f"[WARN] LLM model not found: {self.model_path}")
	return None

	if Llama is None:
	print("[WARN] llama-cpp-python missing.")
	return None

	print(f"[INFO] Loading local LLM: {self.model_path}")

	return Llama(
	model_path=str(self.model_path),
	n_ctx=self.n_ctx,
	n_threads=4,
	n_gpu_layers=0
	)

	def _load_embedder(self):
	if SentenceTransformer is None:
	print("[WARN] sentence-transformers missing.")
	return None

	print(f"[INFO] Loading embedder: {self.embed_model_name}")
	return SentenceTransformer(self.embed_model_name)

	def _load_all(self):
	self.llm = self._load_llm()
	self.embedder = self._load_embedder()
	self.index = None

	def embed(self, texts: List[str]):
	if self.embedder is None:
	raise RuntimeError("Embedder is missing.")
	return self.embedder.encode(texts, show_progress_bar=False)

	def chat(self, prompt: str, max_tokens: int = 256) -> str:
	if not self.llm:
	return "[Local LLM missing — place a .gguf model inside models/]"

	out = self.llm(
	prompt,
	max_tokens=max_tokens,
	temperature=0.7,
	top_p=0.9,
	echo=False
	)

	try:
	return out["choices"][0]["text"].strip()
	except Exception:
	return str(out)

	def answer_from_rag(self, query: str, max_tokens: int = 256) -> str:
	return self.chat(query, max_tokens=max_tokens)