Spaces:

LeonardoMdSA
/

LLMOps-RAG_solution-HS_spaces

Sleeping

App Files Files Community

LeonardoMdSA commited on Dec 11, 2025

Commit

5366fc0

1 Parent(s): 92e00f5

push to Spaces

Browse files

Files changed (3) hide show

multi_doc_chat/model_loader.py +106 -113
requirements.txt +2 -2
scripts/download_models.py +39 -39

multi_doc_chat/model_loader.py CHANGED Viewed

@@ -1,113 +1,106 @@
-"""
-multi_doc_chat/model_loader.py
-LLM + embedder loader (local only)
-"""
-from pathlib import Path
-from typing import List, Optional
-import yaml
-import numpy as np
-try:
-    from llama_cpp import Llama
-except Exception:
-    Llama = None
-try:
-    from sentence_transformers import SentenceTransformer
-except Exception:
-    SentenceTransformer = None
-# load default config
-CFG_PATH = Path(__file__).resolve().parent.parent.parent / "configs" / "default.yaml"
-if CFG_PATH.exists():
-    with open(CFG_PATH, "r") as f:
-        _CFG = yaml.safe_load(f)
-else:
-    _CFG = {
-        "model_path": "models/qwen2.5-1.5b-instruct-q4_0.gguf",
-        "embed_model": "sentence-transformers/all-MiniLM-L6-v2",
-        "faiss_dir": "faiss_index",
-        "chunk_size": 1000,
-        "chunk_overlap": 200
-    }
-class ModelLoader:
-    def __init__(
-        self,
-        model_path: Optional[str] = None,
-        embed_model_name: Optional[str] = None,
-        faiss_dir: Optional[str] = None,
-        n_ctx: int = 4096,
-    ):
-        self.model_path = Path(model_path or _CFG.get("model_path"))
-        self.embed_model_name = embed_model_name or _CFG.get("embed_model")
-        self.faiss_dir = Path(faiss_dir or _CFG.get("faiss_dir"))
-        self.n_ctx = n_ctx
-        self.llm = None
-        self.embedder = None
-        self.index = None
-        self.documents: List[str] = []
-        self._load_all()
-    def _load_llm(self):
-        if not self.model_path.exists():
-            print(f"[WARN] LLM model not found: {self.model_path}")
-            return None
-        if Llama is None:
-            print("[WARN] llama-cpp-python missing.")
-            return None
-        print(f"[INFO] Loading local LLM: {self.model_path}")
-        return Llama(
-            model_path=str(self.model_path),
-            n_ctx=self.n_ctx,
-            n_threads=4,
-            n_gpu_layers=0
-        )
-    def _load_embedder(self):
-        if SentenceTransformer is None:
-            print("[WARN] sentence-transformers missing.")
-            return None
-        print(f"[INFO] Loading embedder: {self.embed_model_name}")
-        return SentenceTransformer(self.embed_model_name)
-    def _load_all(self):
-        self.llm = self._load_llm()
-        self.embedder = self._load_embedder()
-        self.index = None
-    def embed(self, texts: List[str]):
-        if self.embedder is None:
-            raise RuntimeError("Embedder is missing.")
-        return self.embedder.encode(texts, show_progress_bar=False)
-    def chat(self, prompt: str, max_tokens: int = 256) -> str:
-        if not self.llm:
-            return "[Local LLM missing — place a .gguf model inside models/]"
-        # CORRECT llama-cpp-python call
-        out = self.llm(
-            prompt,
-            max_tokens=max_tokens,
-            temperature=0.7,
-            top_p=0.9,
-            echo=False
-        )
-        try:
-            return out["choices"][0]["text"].strip()
-        except Exception:
-            return str(out)
-    def answer_from_rag(self, query: str, max_tokens: int = 256) -> str:
-        # Currently just fallback; your RAGService inserts context
-        return self.chat(query, max_tokens=max_tokens)

+from pathlib import Path
+from typing import List, Optional
+import yaml
+import numpy as np
+try:
+    from llama_cpp import Llama
+except Exception:
+    Llama = None
+try:
+    from sentence_transformers import SentenceTransformer
+except Exception:
+    SentenceTransformer = None
+# Load config
+CFG_PATH = Path(__file__).resolve().parent.parent.parent / "configs" / "default.yaml"
+if CFG_PATH.exists():
+    with open(CFG_PATH, "r") as f:
+        _CFG = yaml.safe_load(f)
+else:
+    _CFG = {
+        "model_path": "models/qwen2.5-0.5b-instruct-q4_0.gguf",
+        "embed_model": "sentence-transformers/all-MiniLM-L6-v2",
+        "faiss_dir": "faiss_index",
+        "chunk_size": 1000,
+        "chunk_overlap": 200
+    }
+class ModelLoader:
+    def __init__(
+        self,
+        model_path: Optional[str] = None,
+        embed_model_name: Optional[str] = None,
+        faiss_dir: Optional[str] = None,
+        n_ctx: int = 2048,  # 0.5B models cannot handle 4k context well
+    ):
+        self.model_path = Path(model_path or _CFG.get("model_path"))
+        self.embed_model_name = embed_model_name or _CFG.get("embed_model")
+        self.faiss_dir = Path(faiss_dir or _CFG.get("faiss_dir"))
+        self.n_ctx = n_ctx
+        self.llm = None
+        self.embedder = None
+        self.index = None
+        self.documents: List[str] = []
+        self._load_all()
+    def _load_llm(self):
+        if not self.model_path.exists():
+            print(f"[WARN] LLM model not found: {self.model_path}")
+            return None
+        if Llama is None:
+            print("[WARN] llama-cpp-python missing.")
+            return None
+        print(f"[INFO] Loading local LLM: {self.model_path}")
+        return Llama(
+            model_path=str(self.model_path),
+            n_ctx=self.n_ctx,
+            n_threads=4,
+            n_gpu_layers=0
+        )
+    def _load_embedder(self):
+        if SentenceTransformer is None:
+            print("[WARN] sentence-transformers missing.")
+            return None
+        print(f"[INFO] Loading embedder: {self.embed_model_name}")
+        return SentenceTransformer(self.embed_model_name)
+    def _load_all(self):
+        self.llm = self._load_llm()
+        self.embedder = self._load_embedder()
+        self.index = None
+    def embed(self, texts: List[str]):
+        if self.embedder is None:
+            raise RuntimeError("Embedder is missing.")
+        return self.embedder.encode(texts, show_progress_bar=False)
+    def chat(self, prompt: str, max_tokens: int = 256) -> str:
+        if not self.llm:
+            return "[Local LLM missing — place a .gguf model inside models/]"
+        out = self.llm(
+            prompt,
+            max_tokens=max_tokens,
+            temperature=0.7,
+            top_p=0.9,
+            echo=False
+        )
+        try:
+            return out["choices"][0]["text"].strip()
+        except Exception:
+            return str(out)
+    def answer_from_rag(self, query: str, max_tokens: int = 256) -> str:
+        return self.chat(query, max_tokens=max_tokens)

requirements.txt CHANGED Viewed

@@ -1,12 +1,12 @@
 fastapi
 uvicorn[standard]
-sentence-transformers==2.2.2
 numpy
 tqdm
 requests
 PyPDF2
 PyYAML
 faiss-cpu
-llama-cpp-python==0.1.62
 pytest
 python-multipart

 fastapi
 uvicorn[standard]
+sentence-transformers
 numpy
 tqdm
 requests
 PyPDF2
 PyYAML
 faiss-cpu
+llama-cpp-python==0.2.74
 pytest
 python-multipart

scripts/download_models.py CHANGED Viewed

@@ -1,39 +1,39 @@
-from pathlib import Path
-import requests
-from tqdm import tqdm
-MODELS_DIR = Path("models")
-MODELS_DIR.mkdir(exist_ok=True)
-MODEL_LIST = [
-    {
-        "name": "qwen2.5-1.5b-instruct-q4_0",
-        "filename": "qwen2.5-1.5b-instruct-q4_0.gguf",
-        "url": "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-q4_0.gguf"
-    }
-]
-def download_file(url: str, dest: Path):
-    if dest.exists():
-        return
-    resp = requests.get(url, stream=True)
-    content_type = resp.headers.get("content-type", "")
-    if "text/html" in content_type:
-        raise ValueError(f"URL returned HTML, not a model file: {url}")
-    total = int(resp.headers.get("content-length", 0))
-    with open(dest, "wb") as f, tqdm(total=total, unit="B", unit_scale=True, desc=dest.name) as bar:
-        for chunk in resp.iter_content(chunk_size=1024*1024):
-            if chunk:
-                f.write(chunk)
-                bar.update(len(chunk))
-def main():
-    for m in MODEL_LIST:
-        dest = MODELS_DIR / m["filename"]
-        try:
-            download_file(m["url"], dest)
-        except Exception as e:
-            print(f"Failed to download {m['name']}: {e}")
-if __name__ == "__main__":
-    main()

+from pathlib import Path
+import requests
+from tqdm import tqdm
+MODELS_DIR = Path("models")
+MODELS_DIR.mkdir(exist_ok=True)
+MODEL_LIST = [
+    {
+        "name": "qwen2.5-0.5b-instruct-q4_0",
+        "filename": "qwen2.5-0.5b-instruct-q4_0.gguf",
+        "url": "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GGUF/resolve/main/qwen2.5-0.5b-instruct-q4_0.gguf"
+    }
+]
+def download_file(url: str, dest: Path):
+    if dest.exists():
+        return
+    resp = requests.get(url, stream=True)
+    content_type = resp.headers.get("content-type", "")
+    if "text/html" in content_type:
+        raise ValueError(f"URL returned HTML, not a model file: {url}")
+    total = int(resp.headers.get("content-length", 0))
+    with open(dest, "wb") as f, tqdm(total=total, unit="B", unit_scale=True, desc=dest.name) as bar:
+        for chunk in resp.iter_content(chunk_size=1024 * 1024):
+            if chunk:
+                f.write(chunk)
+                bar.update(len(chunk))
+def main():
+    for m in MODEL_LIST:
+        dest = MODELS_DIR / m["filename"]
+        try:
+            download_file(m["url"], dest)
+        except Exception as e:
+            print(f"Failed to download {m['name']}: {e}")
+if __name__ == "__main__":
+    main()