LeonardoMdSA commited on
Commit
5366fc0
·
1 Parent(s): 92e00f5

push to Spaces

Browse files
multi_doc_chat/model_loader.py CHANGED
@@ -1,113 +1,106 @@
1
- """
2
- multi_doc_chat/model_loader.py
3
- LLM + embedder loader (local only)
4
- """
5
-
6
- from pathlib import Path
7
- from typing import List, Optional
8
- import yaml
9
- import numpy as np
10
-
11
- try:
12
- from llama_cpp import Llama
13
- except Exception:
14
- Llama = None
15
-
16
- try:
17
- from sentence_transformers import SentenceTransformer
18
- except Exception:
19
- SentenceTransformer = None
20
-
21
-
22
- # load default config
23
- CFG_PATH = Path(__file__).resolve().parent.parent.parent / "configs" / "default.yaml"
24
- if CFG_PATH.exists():
25
- with open(CFG_PATH, "r") as f:
26
- _CFG = yaml.safe_load(f)
27
- else:
28
- _CFG = {
29
- "model_path": "models/qwen2.5-1.5b-instruct-q4_0.gguf",
30
- "embed_model": "sentence-transformers/all-MiniLM-L6-v2",
31
- "faiss_dir": "faiss_index",
32
- "chunk_size": 1000,
33
- "chunk_overlap": 200
34
- }
35
-
36
-
37
- class ModelLoader:
38
- def __init__(
39
- self,
40
- model_path: Optional[str] = None,
41
- embed_model_name: Optional[str] = None,
42
- faiss_dir: Optional[str] = None,
43
- n_ctx: int = 4096,
44
- ):
45
- self.model_path = Path(model_path or _CFG.get("model_path"))
46
- self.embed_model_name = embed_model_name or _CFG.get("embed_model")
47
- self.faiss_dir = Path(faiss_dir or _CFG.get("faiss_dir"))
48
- self.n_ctx = n_ctx
49
-
50
- self.llm = None
51
- self.embedder = None
52
- self.index = None
53
- self.documents: List[str] = []
54
-
55
- self._load_all()
56
-
57
- def _load_llm(self):
58
- if not self.model_path.exists():
59
- print(f"[WARN] LLM model not found: {self.model_path}")
60
- return None
61
-
62
- if Llama is None:
63
- print("[WARN] llama-cpp-python missing.")
64
- return None
65
-
66
- print(f"[INFO] Loading local LLM: {self.model_path}")
67
-
68
- return Llama(
69
- model_path=str(self.model_path),
70
- n_ctx=self.n_ctx,
71
- n_threads=4,
72
- n_gpu_layers=0
73
- )
74
-
75
- def _load_embedder(self):
76
- if SentenceTransformer is None:
77
- print("[WARN] sentence-transformers missing.")
78
- return None
79
-
80
- print(f"[INFO] Loading embedder: {self.embed_model_name}")
81
- return SentenceTransformer(self.embed_model_name)
82
-
83
- def _load_all(self):
84
- self.llm = self._load_llm()
85
- self.embedder = self._load_embedder()
86
- self.index = None
87
-
88
- def embed(self, texts: List[str]):
89
- if self.embedder is None:
90
- raise RuntimeError("Embedder is missing.")
91
- return self.embedder.encode(texts, show_progress_bar=False)
92
-
93
- def chat(self, prompt: str, max_tokens: int = 256) -> str:
94
- if not self.llm:
95
- return "[Local LLM missing — place a .gguf model inside models/]"
96
-
97
- # CORRECT llama-cpp-python call
98
- out = self.llm(
99
- prompt,
100
- max_tokens=max_tokens,
101
- temperature=0.7,
102
- top_p=0.9,
103
- echo=False
104
- )
105
-
106
- try:
107
- return out["choices"][0]["text"].strip()
108
- except Exception:
109
- return str(out)
110
-
111
- def answer_from_rag(self, query: str, max_tokens: int = 256) -> str:
112
- # Currently just fallback; your RAGService inserts context
113
- return self.chat(query, max_tokens=max_tokens)
 
1
+ from pathlib import Path
2
+ from typing import List, Optional
3
+ import yaml
4
+ import numpy as np
5
+
6
+ try:
7
+ from llama_cpp import Llama
8
+ except Exception:
9
+ Llama = None
10
+
11
+ try:
12
+ from sentence_transformers import SentenceTransformer
13
+ except Exception:
14
+ SentenceTransformer = None
15
+
16
+
17
+ # Load config
18
+ CFG_PATH = Path(__file__).resolve().parent.parent.parent / "configs" / "default.yaml"
19
+ if CFG_PATH.exists():
20
+ with open(CFG_PATH, "r") as f:
21
+ _CFG = yaml.safe_load(f)
22
+ else:
23
+ _CFG = {
24
+ "model_path": "models/qwen2.5-0.5b-instruct-q4_0.gguf",
25
+ "embed_model": "sentence-transformers/all-MiniLM-L6-v2",
26
+ "faiss_dir": "faiss_index",
27
+ "chunk_size": 1000,
28
+ "chunk_overlap": 200
29
+ }
30
+
31
+
32
+ class ModelLoader:
33
+ def __init__(
34
+ self,
35
+ model_path: Optional[str] = None,
36
+ embed_model_name: Optional[str] = None,
37
+ faiss_dir: Optional[str] = None,
38
+ n_ctx: int = 2048, # 0.5B models cannot handle 4k context well
39
+ ):
40
+ self.model_path = Path(model_path or _CFG.get("model_path"))
41
+ self.embed_model_name = embed_model_name or _CFG.get("embed_model")
42
+ self.faiss_dir = Path(faiss_dir or _CFG.get("faiss_dir"))
43
+ self.n_ctx = n_ctx
44
+
45
+ self.llm = None
46
+ self.embedder = None
47
+ self.index = None
48
+ self.documents: List[str] = []
49
+
50
+ self._load_all()
51
+
52
+ def _load_llm(self):
53
+ if not self.model_path.exists():
54
+ print(f"[WARN] LLM model not found: {self.model_path}")
55
+ return None
56
+
57
+ if Llama is None:
58
+ print("[WARN] llama-cpp-python missing.")
59
+ return None
60
+
61
+ print(f"[INFO] Loading local LLM: {self.model_path}")
62
+
63
+ return Llama(
64
+ model_path=str(self.model_path),
65
+ n_ctx=self.n_ctx,
66
+ n_threads=4,
67
+ n_gpu_layers=0
68
+ )
69
+
70
+ def _load_embedder(self):
71
+ if SentenceTransformer is None:
72
+ print("[WARN] sentence-transformers missing.")
73
+ return None
74
+
75
+ print(f"[INFO] Loading embedder: {self.embed_model_name}")
76
+ return SentenceTransformer(self.embed_model_name)
77
+
78
+ def _load_all(self):
79
+ self.llm = self._load_llm()
80
+ self.embedder = self._load_embedder()
81
+ self.index = None
82
+
83
+ def embed(self, texts: List[str]):
84
+ if self.embedder is None:
85
+ raise RuntimeError("Embedder is missing.")
86
+ return self.embedder.encode(texts, show_progress_bar=False)
87
+
88
+ def chat(self, prompt: str, max_tokens: int = 256) -> str:
89
+ if not self.llm:
90
+ return "[Local LLM missing — place a .gguf model inside models/]"
91
+
92
+ out = self.llm(
93
+ prompt,
94
+ max_tokens=max_tokens,
95
+ temperature=0.7,
96
+ top_p=0.9,
97
+ echo=False
98
+ )
99
+
100
+ try:
101
+ return out["choices"][0]["text"].strip()
102
+ except Exception:
103
+ return str(out)
104
+
105
+ def answer_from_rag(self, query: str, max_tokens: int = 256) -> str:
106
+ return self.chat(query, max_tokens=max_tokens)
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,12 +1,12 @@
1
  fastapi
2
  uvicorn[standard]
3
- sentence-transformers==2.2.2
4
  numpy
5
  tqdm
6
  requests
7
  PyPDF2
8
  PyYAML
9
  faiss-cpu
10
- llama-cpp-python==0.1.62
11
  pytest
12
  python-multipart
 
1
  fastapi
2
  uvicorn[standard]
3
+ sentence-transformers
4
  numpy
5
  tqdm
6
  requests
7
  PyPDF2
8
  PyYAML
9
  faiss-cpu
10
+ llama-cpp-python==0.2.74
11
  pytest
12
  python-multipart
scripts/download_models.py CHANGED
@@ -1,39 +1,39 @@
1
- from pathlib import Path
2
- import requests
3
- from tqdm import tqdm
4
-
5
- MODELS_DIR = Path("models")
6
- MODELS_DIR.mkdir(exist_ok=True)
7
-
8
- MODEL_LIST = [
9
- {
10
- "name": "qwen2.5-1.5b-instruct-q4_0",
11
- "filename": "qwen2.5-1.5b-instruct-q4_0.gguf",
12
- "url": "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-q4_0.gguf"
13
- }
14
- ]
15
-
16
- def download_file(url: str, dest: Path):
17
- if dest.exists():
18
- return
19
- resp = requests.get(url, stream=True)
20
- content_type = resp.headers.get("content-type", "")
21
- if "text/html" in content_type:
22
- raise ValueError(f"URL returned HTML, not a model file: {url}")
23
- total = int(resp.headers.get("content-length", 0))
24
- with open(dest, "wb") as f, tqdm(total=total, unit="B", unit_scale=True, desc=dest.name) as bar:
25
- for chunk in resp.iter_content(chunk_size=1024*1024):
26
- if chunk:
27
- f.write(chunk)
28
- bar.update(len(chunk))
29
-
30
- def main():
31
- for m in MODEL_LIST:
32
- dest = MODELS_DIR / m["filename"]
33
- try:
34
- download_file(m["url"], dest)
35
- except Exception as e:
36
- print(f"Failed to download {m['name']}: {e}")
37
-
38
- if __name__ == "__main__":
39
- main()
 
1
+ from pathlib import Path
2
+ import requests
3
+ from tqdm import tqdm
4
+
5
+ MODELS_DIR = Path("models")
6
+ MODELS_DIR.mkdir(exist_ok=True)
7
+
8
+ MODEL_LIST = [
9
+ {
10
+ "name": "qwen2.5-0.5b-instruct-q4_0",
11
+ "filename": "qwen2.5-0.5b-instruct-q4_0.gguf",
12
+ "url": "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GGUF/resolve/main/qwen2.5-0.5b-instruct-q4_0.gguf"
13
+ }
14
+ ]
15
+
16
+ def download_file(url: str, dest: Path):
17
+ if dest.exists():
18
+ return
19
+ resp = requests.get(url, stream=True)
20
+ content_type = resp.headers.get("content-type", "")
21
+ if "text/html" in content_type:
22
+ raise ValueError(f"URL returned HTML, not a model file: {url}")
23
+ total = int(resp.headers.get("content-length", 0))
24
+ with open(dest, "wb") as f, tqdm(total=total, unit="B", unit_scale=True, desc=dest.name) as bar:
25
+ for chunk in resp.iter_content(chunk_size=1024 * 1024):
26
+ if chunk:
27
+ f.write(chunk)
28
+ bar.update(len(chunk))
29
+
30
+ def main():
31
+ for m in MODEL_LIST:
32
+ dest = MODELS_DIR / m["filename"]
33
+ try:
34
+ download_file(m["url"], dest)
35
+ except Exception as e:
36
+ print(f"Failed to download {m['name']}: {e}")
37
+
38
+ if __name__ == "__main__":
39
+ main()