Shubham170793 commited on
Commit
e727c6a
Β·
verified Β·
1 Parent(s): 7b7e367

Update src/qa.py

Browse files
Files changed (1) hide show
  1. src/qa.py +26 -37
src/qa.py CHANGED
@@ -1,5 +1,5 @@
1
  """
2
- qa.py β€” GPT-4o (SAP Gen AI Hub) + ReRank Retrieval + PRF Query Expansion
3
  --------------------------------------------------
4
  βœ… Semantic retrieval (FAISS + cosine re-rank + neighbor fill)
5
  βœ… Bullet-aware similarity boost for procedural chunks
@@ -7,7 +7,7 @@ qa.py β€” GPT-4o (SAP Gen AI Hub) + ReRank Retrieval + PRF Query Expansion
7
  βœ… Smart factual mode (fast)
8
  βœ… Deep reasoning mode (ChatGPT-like)
9
  βœ… genai_generate() helper for suggestions
10
- βœ… NEW: Lightweight PRF query expansion to fix synonym-based retrieval misses
11
  """
12
 
13
  import os
@@ -16,13 +16,12 @@ import json
16
  import pickle
17
  import hashlib
18
  import numpy as np
19
- from collections import Counter
20
  from sentence_transformers import SentenceTransformer
21
  from sklearn.metrics.pairwise import cosine_similarity
22
  from gen_ai_hub.proxy.core.proxy_clients import get_proxy_client
23
  from gen_ai_hub.proxy.langchain.openai import ChatOpenAI
24
 
25
- print("βœ… qa.py (GPT-4o via Gen AI Hub + Bullet-Aware Retrieval + PRF) loaded from:", __file__)
26
 
27
  # ==========================================================
28
  # 🧱 Permanent Embeddings Cache Directory
@@ -58,14 +57,17 @@ os.environ.update({
58
  # 2️⃣ Embedding Model (E5-small-v2)
59
  # ==========================================================
60
  try:
61
- _query_model = SentenceTransformer("intfloat/e5-small-v2", cache_folder=CACHE_DIR)
 
 
 
62
  print("βœ… Loaded embedding model: intfloat/e5-small-v2 (fast mode)")
63
  except Exception as e:
64
  print(f"⚠️ Embedding load failed ({e}), using MiniLM fallback")
65
  _query_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=CACHE_DIR)
66
 
67
  # ==========================================================
68
- # 3️⃣ GPT-4o via SAP Gen AI Hub β€” Lazy / On-demand initialization
69
  # ==========================================================
70
  CRED_PATH = os.path.join(os.path.dirname(__file__), "GEN AI HUB PROXY.json")
71
  _chat_llm = None
@@ -101,8 +103,9 @@ def get_chat_llm(model_name: str = "gpt-4o", temperature: float = 0.3, max_token
101
  _chat_llm = None
102
  raise
103
 
 
104
  # ==========================================================
105
- # 4️⃣ Embedding Generator (batch-optimized)
106
  # ==========================================================
107
  def embed_chunks(chunks, batch_size: int = 32):
108
  if not chunks:
@@ -162,8 +165,9 @@ def cache_embeddings(file_name: str, chunks, embed_func, chunk_size: int = None,
162
  _clean_old_caches(base_name, keep_latest=5)
163
  return embeddings
164
 
 
165
  # ==========================================================
166
- # 6️⃣ Prompt Templates
167
  # ==========================================================
168
  STRICT_PROMPT = (
169
  "You are an enterprise documentation assistant.\n"
@@ -172,8 +176,8 @@ STRICT_PROMPT = (
172
  "When the answer focuses on a single concept, definition, or explanation, write it as a clear and compact paragraph.\n"
173
  "Keep the tone professional and concise. Do not invent facts outside the provided content.\n"
174
  "Do not mention or refer to internal elements such as 'chunks', 'chunk numbers', 'passages', or 'sections of the document'.\n"
175
- "If the answer cannot be found directly but there are partial clues, summarize those clues briefly starting with 'Based on the available information,'.\n"
176
- "If nothing at all in the CONTEXT relates to the question, reply exactly:\n"
177
  "'I don't know based on the provided document.'\n\n"
178
  "Context:\n{context}\n\nQuestion: {query}\nAnswer:"
179
  )
@@ -183,36 +187,12 @@ REASONING_PROMPT = (
183
  "Think step by step and synthesize information even if scattered across chunks.\n"
184
  "Base your answer primarily on the CONTEXT, but if multiple partial clues exist, combine them logically.\n"
185
  "You may fill reasonable gaps with general knowledge to form a complete answer.\n"
186
- "Do not mention or refer to internal elements such as 'chunks', 'chunk numbers', 'passages', or 'sections of the document'.\n"
187
  "If absolutely nothing in the document relates, say exactly:\n"
188
  "'I don't know based on the provided document.'\n\n"
189
  "Context:\n{context}\n\nQuestion: {query}\nLet's reason step-by-step:\nAnswer:"
190
  )
191
 
192
- # ==========================================================
193
- # πŸ”Ή NEW: Lightweight PRF Query Expansion
194
- # ==========================================================
195
- def expand_query_embedding(query, model, index, chunks, topN=40, alpha=0.75):
196
- """
197
- Expands the query embedding slightly using top candidate chunks (PRF-style).
198
- Helps when query wording differs from document phrasing.
199
- """
200
- try:
201
- q_emb = model.encode([f"query: {query}"], convert_to_numpy=True, normalize_embeddings=True)[0]
202
- D, I = index.search(np.array([q_emb]).astype("float32"), topN)
203
- texts = " ".join(chunks[i] for i in I[0] if i >= 0)
204
- words = re.findall(r"[A-Za-z]{4,}", texts)
205
- common = [w for w, _ in Counter(words).most_common(6) if w.lower() not in query.lower()]
206
- if not common:
207
- return q_emb
208
- e_emb = model.encode([f"passage: {' '.join(common)}"], convert_to_numpy=True, normalize_embeddings=True)[0]
209
- combined = alpha * q_emb + (1 - alpha) * e_emb
210
- combined /= np.linalg.norm(combined)
211
- print(f"πŸ” Query expanded with: {common}")
212
- return combined
213
- except Exception as e:
214
- print(f"⚠️ Query expansion skipped due to error: {e}")
215
- return q_emb
216
 
217
  # ==========================================================
218
  # 7️⃣ Retrieval β€” FAISS + Bullet-Aware Re-rank + Neighbor Fill
@@ -227,8 +207,11 @@ def retrieve_chunks(query: str, index, chunks: list, top_k: int = 7,
227
  return []
228
 
229
  try:
230
- # --- PRF-enhanced query embedding
231
- q_emb = expand_query_embedding(query, _query_model, index, chunks)
 
 
 
232
 
233
  if hasattr(index, "d") and q_emb.shape[0] != index.d:
234
  print(f"⚠️ FAISS dimension mismatch: index={index.d}, query={q_emb.shape[0]}")
@@ -248,6 +231,7 @@ def retrieve_chunks(query: str, index, chunks: list, top_k: int = 7,
248
  normalize_embeddings=True,
249
  )
250
  sims = cosine_similarity([q_emb], doc_embs)[0]
 
251
  boosted_sims = []
252
  for idx, sim in zip(candidate_indices, sims):
253
  text = chunks[idx].strip()
@@ -276,6 +260,7 @@ def retrieve_chunks(query: str, index, chunks: list, top_k: int = 7,
276
  print(f"⚠️ Retrieval error: {repr(e)}")
277
  return []
278
 
 
279
  # ==========================================================
280
  # 8️⃣ Answer Generation
281
  # ==========================================================
@@ -301,6 +286,7 @@ def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = F
301
  "'I don't know based on the provided document.'"},
302
  {"role": "user", "content": prompt},
303
  ]
 
304
  try:
305
  response = chat_llm_local.invoke(messages)
306
  return response.content.strip()
@@ -308,6 +294,7 @@ def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = F
308
  print(f"⚠️ GPT-4o generation failed: {e}")
309
  return "⚠️ Error: Could not generate an answer."
310
 
 
311
  # ==========================================================
312
  # 9️⃣ Generic Text Generation Helper
313
  # ==========================================================
@@ -329,6 +316,7 @@ def genai_generate(prompt: str) -> str:
329
  print(f"⚠️ genai_generate() failed: {e}")
330
  return "⚠️ Unable to generate response."
331
 
 
332
  # ==========================================================
333
  # πŸ”Ÿ Local Test
334
  # ==========================================================
@@ -344,6 +332,7 @@ if __name__ == "__main__":
344
 
345
  embeddings = embed_chunks(dummy_chunks)
346
  index = build_faiss_index(embeddings)
 
347
  query = "What are the prerequisites for commerce automation?"
348
  retrieved = retrieve_chunks(query, index, dummy_chunks)
349
  print("πŸ” Retrieved:", retrieved)
 
1
  """
2
+ qa.py β€” GPT-4o (SAP Gen AI Hub) + ReRank Retrieval (Stable Strict)
3
  --------------------------------------------------
4
  βœ… Semantic retrieval (FAISS + cosine re-rank + neighbor fill)
5
  βœ… Bullet-aware similarity boost for procedural chunks
 
7
  βœ… Smart factual mode (fast)
8
  βœ… Deep reasoning mode (ChatGPT-like)
9
  βœ… genai_generate() helper for suggestions
10
+ βœ… Slightly softened Strict Prompt for better partial-context answers
11
  """
12
 
13
  import os
 
16
  import pickle
17
  import hashlib
18
  import numpy as np
 
19
  from sentence_transformers import SentenceTransformer
20
  from sklearn.metrics.pairwise import cosine_similarity
21
  from gen_ai_hub.proxy.core.proxy_clients import get_proxy_client
22
  from gen_ai_hub.proxy.langchain.openai import ChatOpenAI
23
 
24
+ print("βœ… qa.py (GPT-4o via Gen AI Hub + Bullet-Aware Retrieval + Cache) loaded from:", __file__)
25
 
26
  # ==========================================================
27
  # 🧱 Permanent Embeddings Cache Directory
 
57
  # 2️⃣ Embedding Model (E5-small-v2)
58
  # ==========================================================
59
  try:
60
+ _query_model = SentenceTransformer(
61
+ "intfloat/e5-small-v2",
62
+ cache_folder=CACHE_DIR
63
+ )
64
  print("βœ… Loaded embedding model: intfloat/e5-small-v2 (fast mode)")
65
  except Exception as e:
66
  print(f"⚠️ Embedding load failed ({e}), using MiniLM fallback")
67
  _query_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=CACHE_DIR)
68
 
69
  # ==========================================================
70
+ # 3️⃣ GPT-4o via SAP Gen AI Hub β€” Lazy Initialization
71
  # ==========================================================
72
  CRED_PATH = os.path.join(os.path.dirname(__file__), "GEN AI HUB PROXY.json")
73
  _chat_llm = None
 
103
  _chat_llm = None
104
  raise
105
 
106
+
107
  # ==========================================================
108
+ # 4️⃣ Embedding Generator (Batch-Optimized)
109
  # ==========================================================
110
  def embed_chunks(chunks, batch_size: int = 32):
111
  if not chunks:
 
165
  _clean_old_caches(base_name, keep_latest=5)
166
  return embeddings
167
 
168
+
169
  # ==========================================================
170
+ # 6️⃣ Prompt Templates (Improved Strict)
171
  # ==========================================================
172
  STRICT_PROMPT = (
173
  "You are an enterprise documentation assistant.\n"
 
176
  "When the answer focuses on a single concept, definition, or explanation, write it as a clear and compact paragraph.\n"
177
  "Keep the tone professional and concise. Do not invent facts outside the provided content.\n"
178
  "Do not mention or refer to internal elements such as 'chunks', 'chunk numbers', 'passages', or 'sections of the document'.\n"
179
+ "If only partial or indirect clues are available (e.g., related words, hints, or contextual evidence), infer cautiously and start the response with 'Based on the available information,'.\n"
180
+ "If absolutely nothing in the CONTEXT relates to the question, reply exactly:\n"
181
  "'I don't know based on the provided document.'\n\n"
182
  "Context:\n{context}\n\nQuestion: {query}\nAnswer:"
183
  )
 
187
  "Think step by step and synthesize information even if scattered across chunks.\n"
188
  "Base your answer primarily on the CONTEXT, but if multiple partial clues exist, combine them logically.\n"
189
  "You may fill reasonable gaps with general knowledge to form a complete answer.\n"
190
+ "Do not mention or refer to internal elements such as 'chunks', 'chunk numbers', or 'sections of the document'.\n"
191
  "If absolutely nothing in the document relates, say exactly:\n"
192
  "'I don't know based on the provided document.'\n\n"
193
  "Context:\n{context}\n\nQuestion: {query}\nLet's reason step-by-step:\nAnswer:"
194
  )
195
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
 
197
  # ==========================================================
198
  # 7️⃣ Retrieval β€” FAISS + Bullet-Aware Re-rank + Neighbor Fill
 
207
  return []
208
 
209
  try:
210
+ q_emb = _query_model.encode(
211
+ [f"query: {query.strip()}"],
212
+ convert_to_numpy=True,
213
+ normalize_embeddings=True
214
+ )[0]
215
 
216
  if hasattr(index, "d") and q_emb.shape[0] != index.d:
217
  print(f"⚠️ FAISS dimension mismatch: index={index.d}, query={q_emb.shape[0]}")
 
231
  normalize_embeddings=True,
232
  )
233
  sims = cosine_similarity([q_emb], doc_embs)[0]
234
+
235
  boosted_sims = []
236
  for idx, sim in zip(candidate_indices, sims):
237
  text = chunks[idx].strip()
 
260
  print(f"⚠️ Retrieval error: {repr(e)}")
261
  return []
262
 
263
+
264
  # ==========================================================
265
  # 8️⃣ Answer Generation
266
  # ==========================================================
 
286
  "'I don't know based on the provided document.'"},
287
  {"role": "user", "content": prompt},
288
  ]
289
+
290
  try:
291
  response = chat_llm_local.invoke(messages)
292
  return response.content.strip()
 
294
  print(f"⚠️ GPT-4o generation failed: {e}")
295
  return "⚠️ Error: Could not generate an answer."
296
 
297
+
298
  # ==========================================================
299
  # 9️⃣ Generic Text Generation Helper
300
  # ==========================================================
 
316
  print(f"⚠️ genai_generate() failed: {e}")
317
  return "⚠️ Unable to generate response."
318
 
319
+
320
  # ==========================================================
321
  # πŸ”Ÿ Local Test
322
  # ==========================================================
 
332
 
333
  embeddings = embed_chunks(dummy_chunks)
334
  index = build_faiss_index(embeddings)
335
+
336
  query = "What are the prerequisites for commerce automation?"
337
  retrieved = retrieve_chunks(query, index, dummy_chunks)
338
  print("πŸ” Retrieved:", retrieved)