Ryanfafa commited on
Commit
4b89f17
·
verified ·
1 Parent(s): 746cdfd

Update rag_engine.py

Browse files
Files changed (1) hide show
  1. rag_engine.py +45 -40
rag_engine.py CHANGED
@@ -2,13 +2,14 @@
2
  RAG Engine
3
  Embeddings : sentence-transformers/all-MiniLM-L6-v2
4
  Vector DB : ChromaDB (local)
5
- LLM : HuggingFace InferenceClient (official library, auto-routing)
6
- Chunking : Recursive character splitter with overlap
7
  """
8
 
9
  import os
10
  import re
 
11
  import tempfile
 
12
  from typing import Tuple, List
13
 
14
  from chromadb.config import Settings
@@ -25,18 +26,18 @@ TOP_K = 4
25
  COLLECTION_NAME = "docmind_collection"
26
  CHROMA_DIR = "./chroma_db"
27
 
28
- # Models to try in order (all free with HF token)
 
 
 
29
  CANDIDATE_MODELS = [
30
  "mistralai/Mistral-7B-Instruct-v0.3",
31
  "microsoft/Phi-3.5-mini-instruct",
32
  "HuggingFaceH4/zephyr-7b-beta",
33
- "google/gemma-2-2b-it",
34
  ]
35
 
36
 
37
  class RAGEngine:
38
- """Full RAG pipeline: ingest, embed, store, retrieve, generate."""
39
-
40
  def __init__(self):
41
  self._embeddings = None
42
  self._vectorstore = None
@@ -67,12 +68,9 @@ class RAGEngine:
67
  suffix = get_suffix(name or path)
68
  loader = PyPDFLoader(path) if suffix == ".pdf" else TextLoader(path, encoding="utf-8")
69
  raw_docs = loader.load()
70
-
71
  for doc in raw_docs:
72
  doc.metadata["source"] = name or os.path.basename(path)
73
-
74
  chunks = self._splitter.split_documents(raw_docs)
75
-
76
  self._vectorstore = Chroma.from_documents(
77
  documents=chunks,
78
  embedding=self.embeddings,
@@ -85,7 +83,6 @@ class RAGEngine:
85
  def query(self, question: str) -> Tuple[str, List[str]]:
86
  if self._vectorstore is None:
87
  return "Please upload a document first.", []
88
-
89
  retriever = self._vectorstore.as_retriever(
90
  search_type="mmr",
91
  search_kwargs={"k": TOP_K, "fetch_k": TOP_K * 3},
@@ -100,20 +97,13 @@ class RAGEngine:
100
 
101
  def _generate(self, question: str, context: str) -> str:
102
  hf_token = os.environ.get("HF_TOKEN", "")
103
-
104
  if not hf_token:
105
- excerpt = extract_best(question, context)
106
  return (
107
- "HF_TOKEN not set. Add it as a Secret in your Space Settings.\n\n"
108
- "Best matching excerpt:\n\n" + excerpt
 
109
  )
110
 
111
- # Use the official huggingface_hub InferenceClient
112
- try:
113
- from huggingface_hub import InferenceClient
114
- except ImportError:
115
- return "huggingface_hub not installed. Check requirements.txt."
116
-
117
  system_prompt = (
118
  "You are DocMind, an expert document analyst. "
119
  "Answer using ONLY the provided document context. "
@@ -122,35 +112,50 @@ class RAGEngine:
122
  )
123
  user_message = "Document context:\n" + context + "\n\nQuestion: " + question
124
 
 
 
 
 
 
125
  last_error = ""
126
  for model_id in CANDIDATE_MODELS:
 
 
 
 
 
 
 
 
 
 
 
 
127
  try:
128
- client = InferenceClient(
129
- model=model_id,
130
- token=hf_token,
 
131
  timeout=60,
 
132
  )
133
- result = client.chat_completion(
134
- messages=[
135
- {"role": "system", "content": system_prompt},
136
- {"role": "user", "content": user_message},
137
- ],
138
- max_tokens=512,
139
- temperature=0.2,
140
- )
141
- answer = result.choices[0].message.content.strip()
142
- if answer:
143
- return answer
144
  except Exception as e:
145
  last_error = str(e)
146
  continue
147
 
148
- # All models failed — use extractive fallback
149
- excerpt = extract_best(question, context)
150
  return (
151
- "AI answer unavailable. Here is the most relevant part of your document:\n\n"
152
- + excerpt
153
- + "\n\n(Error: " + last_error + ")"
154
  )
155
 
156
 
@@ -170,4 +175,4 @@ def extract_best(question: str, context: str) -> str:
170
 
171
 
172
  def get_suffix(name: str) -> str:
173
- return os.path.splitext(name)[-1].lower() or ".txt"
 
2
  RAG Engine
3
  Embeddings : sentence-transformers/all-MiniLM-L6-v2
4
  Vector DB : ChromaDB (local)
5
+ LLM : HuggingFace Router API (direct requests, correct URL)
 
6
  """
7
 
8
  import os
9
  import re
10
+ import json
11
  import tempfile
12
+ import requests
13
  from typing import Tuple, List
14
 
15
  from chromadb.config import Settings
 
26
  COLLECTION_NAME = "docmind_collection"
27
  CHROMA_DIR = "./chroma_db"
28
 
29
+ # Correct router base (NOT api-inference.huggingface.co)
30
+ HF_ROUTER_BASE = "https://router.huggingface.co/hf-inference/models"
31
+
32
+ # Models to try in order
33
  CANDIDATE_MODELS = [
34
  "mistralai/Mistral-7B-Instruct-v0.3",
35
  "microsoft/Phi-3.5-mini-instruct",
36
  "HuggingFaceH4/zephyr-7b-beta",
 
37
  ]
38
 
39
 
40
  class RAGEngine:
 
 
41
  def __init__(self):
42
  self._embeddings = None
43
  self._vectorstore = None
 
68
  suffix = get_suffix(name or path)
69
  loader = PyPDFLoader(path) if suffix == ".pdf" else TextLoader(path, encoding="utf-8")
70
  raw_docs = loader.load()
 
71
  for doc in raw_docs:
72
  doc.metadata["source"] = name or os.path.basename(path)
 
73
  chunks = self._splitter.split_documents(raw_docs)
 
74
  self._vectorstore = Chroma.from_documents(
75
  documents=chunks,
76
  embedding=self.embeddings,
 
83
  def query(self, question: str) -> Tuple[str, List[str]]:
84
  if self._vectorstore is None:
85
  return "Please upload a document first.", []
 
86
  retriever = self._vectorstore.as_retriever(
87
  search_type="mmr",
88
  search_kwargs={"k": TOP_K, "fetch_k": TOP_K * 3},
 
97
 
98
  def _generate(self, question: str, context: str) -> str:
99
  hf_token = os.environ.get("HF_TOKEN", "")
 
100
  if not hf_token:
 
101
  return (
102
+ "HF_TOKEN not set.\n"
103
+ "Go to Space Settings -> Secrets -> add HF_TOKEN with your token from huggingface.co/settings/tokens\n\n"
104
+ "Best matching excerpt:\n\n" + extract_best(question, context)
105
  )
106
 
 
 
 
 
 
 
107
  system_prompt = (
108
  "You are DocMind, an expert document analyst. "
109
  "Answer using ONLY the provided document context. "
 
112
  )
113
  user_message = "Document context:\n" + context + "\n\nQuestion: " + question
114
 
115
+ headers = {
116
+ "Authorization": "Bearer " + hf_token,
117
+ "Content-Type": "application/json",
118
+ }
119
+
120
  last_error = ""
121
  for model_id in CANDIDATE_MODELS:
122
+ # Build URL directly - no library, no redirects
123
+ url = "{}/{}/v1/chat/completions".format(HF_ROUTER_BASE, model_id)
124
+ payload = {
125
+ "model": model_id,
126
+ "messages": [
127
+ {"role": "system", "content": system_prompt},
128
+ {"role": "user", "content": user_message},
129
+ ],
130
+ "max_tokens": 512,
131
+ "temperature": 0.2,
132
+ "stream": False,
133
+ }
134
  try:
135
+ resp = requests.post(
136
+ url,
137
+ headers=headers,
138
+ data=json.dumps(payload),
139
  timeout=60,
140
+ allow_redirects=False, # prevent redirect to old endpoint
141
  )
142
+ if resp.status_code == 200:
143
+ data = resp.json()
144
+ answer = data["choices"][0]["message"]["content"].strip()
145
+ if answer:
146
+ return answer
147
+ else:
148
+ last_error = "Model {} returned {}: {}".format(
149
+ model_id, resp.status_code, resp.text[:300]
150
+ )
 
 
151
  except Exception as e:
152
  last_error = str(e)
153
  continue
154
 
 
 
155
  return (
156
+ "AI answer unavailable. Most relevant excerpt:\n\n"
157
+ + extract_best(question, context)
158
+ + "\n\n(Last error: " + last_error + ")"
159
  )
160
 
161
 
 
175
 
176
 
177
  def get_suffix(name: str) -> str:
178
+ return os.path.splitext(name)[-1].lower() or ".txt"