Spaces:

LeonardoMdSA
/

LLMOps-RAG_solution-HS_spaces

Running

File size: 3,149 Bytes

5366fc0

from pathlib import Path
from typing import List, Optional
import yaml
import numpy as np

try:
    from llama_cpp import Llama
except Exception:
    Llama = None

try:
    from sentence_transformers import SentenceTransformer
except Exception:
    SentenceTransformer = None


# Load config
CFG_PATH = Path(__file__).resolve().parent.parent.parent / "configs" / "default.yaml"
if CFG_PATH.exists():
    with open(CFG_PATH, "r") as f:
        _CFG = yaml.safe_load(f)
else:
    _CFG = {
        "model_path": "models/qwen2.5-0.5b-instruct-q4_0.gguf",
        "embed_model": "sentence-transformers/all-MiniLM-L6-v2",
        "faiss_dir": "faiss_index",
        "chunk_size": 1000,
        "chunk_overlap": 200
    }


class ModelLoader:
    def __init__(

        self,

        model_path: Optional[str] = None,

        embed_model_name: Optional[str] = None,

        faiss_dir: Optional[str] = None,

        n_ctx: int = 2048,  # 0.5B models cannot handle 4k context well

    ):
        self.model_path = Path(model_path or _CFG.get("model_path"))
        self.embed_model_name = embed_model_name or _CFG.get("embed_model")
        self.faiss_dir = Path(faiss_dir or _CFG.get("faiss_dir"))
        self.n_ctx = n_ctx

        self.llm = None
        self.embedder = None
        self.index = None
        self.documents: List[str] = []

        self._load_all()

    def _load_llm(self):
        if not self.model_path.exists():
            print(f"[WARN] LLM model not found: {self.model_path}")
            return None

        if Llama is None:
            print("[WARN] llama-cpp-python missing.")
            return None

        print(f"[INFO] Loading local LLM: {self.model_path}")

        return Llama(
            model_path=str(self.model_path),
            n_ctx=self.n_ctx,
            n_threads=4,
            n_gpu_layers=0
        )

    def _load_embedder(self):
        if SentenceTransformer is None:
            print("[WARN] sentence-transformers missing.")
            return None

        print(f"[INFO] Loading embedder: {self.embed_model_name}")
        return SentenceTransformer(self.embed_model_name)

    def _load_all(self):
        self.llm = self._load_llm()
        self.embedder = self._load_embedder()
        self.index = None

    def embed(self, texts: List[str]):
        if self.embedder is None:
            raise RuntimeError("Embedder is missing.")
        return self.embedder.encode(texts, show_progress_bar=False)

    def chat(self, prompt: str, max_tokens: int = 256) -> str:
        if not self.llm:
            return "[Local LLM missing — place a .gguf model inside models/]"

        out = self.llm(
            prompt,
            max_tokens=max_tokens,
            temperature=0.7,
            top_p=0.9,
            echo=False
        )

        try:
            return out["choices"][0]["text"].strip()
        except Exception:
            return str(out)

    def answer_from_rag(self, query: str, max_tokens: int = 256) -> str:
        return self.chat(query, max_tokens=max_tokens)