Shubham170793 commited on
Commit
f27542b
·
verified ·
1 Parent(s): 51344d2

Update src/qa.py

Browse files
Files changed (1) hide show
  1. src/qa.py +24 -75
src/qa.py CHANGED
@@ -1,5 +1,5 @@
1
  """
2
- qa.py — GPT-4o (SAP Gen AI Hub) + ReRank Retrieval (Stable Strict)
3
  --------------------------------------------------
4
  ✅ Semantic retrieval (FAISS + cosine re-rank + neighbor fill)
5
  ✅ Bullet-aware similarity boost for procedural chunks
@@ -7,7 +7,7 @@ qa.py — GPT-4o (SAP Gen AI Hub) + ReRank Retrieval (Stable Strict)
7
  ✅ Smart factual mode (fast)
8
  ✅ Deep reasoning mode (ChatGPT-like)
9
  ✅ genai_generate() helper for suggestions
10
- Original Strict Prompt (safe + predictable)
11
  """
12
 
13
  import os
@@ -54,23 +54,18 @@ os.environ.update({
54
  })
55
 
56
  # ==========================================================
57
- # 2️⃣ Embedding Model (Multilingual E5 — supports Hindi + English)
58
  # ==========================================================
59
  try:
60
- # 🆕 Switched to multilingual model (same 384-dim dimension, so FAISS stays compatible)
61
- _query_model = SentenceTransformer(
62
- "intfloat/multilingual-e5-small",
63
- cache_folder=CACHE_DIR
64
- )
65
- print("✅ Loaded embedding model: intfloat/multilingual-e5-small (multilingual mode)")
66
  except Exception as e:
67
- print(f"⚠️ Embedding load failed ({e}), attempting English-only fallback...")
68
  try:
69
- _query_model = SentenceTransformer("intfloat/e5-small-v2", cache_folder=CACHE_DIR)
70
- print("🔁 Fallback: intfloat/e5-small-v2 loaded successfully.")
71
- except Exception:
72
  _query_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=CACHE_DIR)
73
- print("🔁 Final fallback: all-MiniLM-L6-v2 loaded.")
 
 
74
 
75
  # ==========================================================
76
  # 3️⃣ GPT-4o via SAP Gen AI Hub — Lazy Initialization
@@ -109,7 +104,6 @@ def get_chat_llm(model_name: str = "gpt-4o", temperature: float = 0.3, max_token
109
  _chat_llm = None
110
  raise
111
 
112
-
113
  # ==========================================================
114
  # 4️⃣ Embedding Generator (Batch-Optimized)
115
  # ==========================================================
@@ -171,7 +165,6 @@ def cache_embeddings(file_name: str, chunks, embed_func, chunk_size: int = None,
171
  _clean_old_caches(base_name, keep_latest=5)
172
  return embeddings
173
 
174
-
175
  # ==========================================================
176
  # 6️⃣ Prompt Templates (Original Strict)
177
  # ==========================================================
@@ -181,9 +174,7 @@ STRICT_PROMPT = (
181
  "When multiple causes, steps, or key points are discussed, present them as short, well-structured bullet points.\n"
182
  "When the answer focuses on a single concept, definition, or explanation, write it as a clear and compact paragraph.\n"
183
  "Keep the tone professional and concise. Do not invent facts outside the provided content.\n"
184
- "Do not mention or refer to internal elements such as 'chunks', 'chunk numbers', 'passages', or 'sections of the document'.\n"
185
- "If the answer cannot be found directly but there are partial clues, summarize those clues briefly starting with 'Based on the available information,'.\n"
186
- "If nothing at all in the CONTEXT relates to the question, reply exactly:\n"
187
  "'I don't know based on the provided document.'\n\n"
188
  "Context:\n{context}\n\nQuestion: {query}\nAnswer:"
189
  )
@@ -192,14 +183,11 @@ REASONING_PROMPT = (
192
  "You are an expert enterprise assistant capable of reasoning.\n"
193
  "Think step by step and synthesize information even if scattered across chunks.\n"
194
  "Base your answer primarily on the CONTEXT, but if multiple partial clues exist, combine them logically.\n"
195
- "You may fill reasonable gaps with general knowledge to form a complete answer.\n"
196
- "Do not mention or refer to internal elements such as 'chunks', 'chunk numbers', or 'sections of the document'.\n"
197
  "If absolutely nothing in the document relates, say exactly:\n"
198
  "'I don't know based on the provided document.'\n\n"
199
  "Context:\n{context}\n\nQuestion: {query}\nLet's reason step-by-step:\nAnswer:"
200
  )
201
 
202
-
203
  # ==========================================================
204
  # 7️⃣ Retrieval — FAISS + Bullet-Aware Re-rank + Neighbor Fill
205
  # ==========================================================
@@ -266,14 +254,12 @@ def retrieve_chunks(query: str, index, chunks: list, top_k: int = 7,
266
  print(f"⚠️ Retrieval error: {repr(e)}")
267
  return []
268
 
269
-
270
  # ==========================================================
271
- # 8️⃣ Answer Generation (Lazy GPT-4o Initialization + Language-Aware + Token-Safe)
272
  # ==========================================================
273
  def truncate_context(context_text: str, max_tokens: int = 100000, model: str = "gpt-4o") -> str:
274
  """
275
  Truncate context to stay safely within model limits (~128k tokens).
276
- Keeps only the earliest tokens up to max_tokens.
277
  """
278
  try:
279
  import tiktoken
@@ -283,7 +269,6 @@ def truncate_context(context_text: str, max_tokens: int = 100000, model: str = "
283
  import tiktoken
284
  enc = tiktoken.get_encoding("cl100k_base")
285
  except Exception:
286
- # crude fallback — approximate truncation
287
  return context_text[: max_tokens * 4]
288
 
289
  tokens = enc.encode(context_text)
@@ -293,72 +278,38 @@ def truncate_context(context_text: str, max_tokens: int = 100000, model: str = "
293
  return truncated
294
  return context_text
295
 
296
-
297
- def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = False, doc_lang: str = "en"):
298
  """
299
- Generates an answer using GPT-4o (SAP Gen AI Hub proxy).
300
- Now supports Hindi or English response formatting automatically,
301
- with safe context truncation to prevent token overflow.
302
  """
303
  if not retrieved_chunks:
304
  return "Sorry, I couldn’t find relevant information in the document."
305
 
306
- # Try lazy initialization
307
  try:
308
  chat_llm_local = get_chat_llm()
309
  except Exception:
310
  return "⚠️ GPT-4o not initialized. Check credentials or rebuild the Space."
311
 
312
- # ----------------------------------------------------------
313
- # 🧩 Build and clean context (deduplicate + truncate safely)
314
- # ----------------------------------------------------------
315
  context = "\n".join(f"[Chunk {i+1}] {chunk.strip()}" for i, chunk in enumerate(retrieved_chunks))
316
-
317
- # Remove duplicate lines to save tokens
318
  context = "\n".join(dict.fromkeys(context.splitlines()))
319
-
320
- # Truncate to stay within GPT-4o 128k context limit
321
  context = truncate_context(context, 100000)
322
 
323
- # ----------------------------------------------------------
324
- # 🌐 Language-specific prompt logic
325
- # ----------------------------------------------------------
326
- if doc_lang == "hi":
327
- # Hindi-language response
328
- prompt = (
329
- f"आप एक दस्तावेज़ सहायक हैं जो दिए गए अंशों के आधार पर सटीक उत्तर देता है। "
330
- f"कृपया नीचे दिए गए संदर्भ का उपयोग करते हुए प्रश्न का उत्तर हिंदी में दें। "
331
- f"यदि उत्तर स्पष्ट रूप से दस्तावेज़ में नहीं है, तो कहें — "
332
- f"'मुझे इस दस्तावेज़ के आधार पर उत्तर ज्ञात नहीं है।'\n\n"
333
- f"संदर्भ:\n{context}\n\nप्रश्न: {query}\nउत्तर:"
334
- )
335
- else:
336
- # Default English prompts
337
- prompt = (REASONING_PROMPT if reasoning_mode else STRICT_PROMPT).format(
338
- context=context, query=query
339
- )
340
 
341
- # ----------------------------------------------------------
342
- # 💬 System + user messages
343
- # ----------------------------------------------------------
344
  messages = [
345
- {
346
- "role": "system",
347
- "content": (
348
- "You are an expert enterprise documentation assistant. "
349
- "When reasoning_mode is off, stay strictly factual and concise. "
350
- "When reasoning_mode is on, combine insights across chunks logically "
351
- "and explain briefly. "
352
- "If the answer is not in the document, reply exactly: "
353
- "'I don't know based on the provided document.'"
354
- ),
355
- },
356
  {"role": "user", "content": prompt},
357
  ]
358
 
359
- # ----------------------------------------------------------
360
- # 🧠 Generate answer safely
361
- # ----------------------------------------------------------
362
  try:
363
  response = chat_llm_local.invoke(messages)
364
  return response.content.strip()
@@ -366,7 +317,6 @@ def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = F
366
  print(f"⚠️ GPT-4o generation failed: {e}")
367
  return "⚠️ Error: Could not generate an answer."
368
 
369
-
370
  # ==========================================================
371
  # 9️⃣ Generic Text Generation Helper
372
  # ==========================================================
@@ -388,7 +338,6 @@ def genai_generate(prompt: str) -> str:
388
  print(f"⚠️ genai_generate() failed: {e}")
389
  return "⚠️ Unable to generate response."
390
 
391
-
392
  # ==========================================================
393
  # 🔟 Local Test
394
  # ==========================================================
 
1
  """
2
+ qa.py — GPT-4o (SAP Gen AI Hub) + ReRank Retrieval (Stable Strict, English Only)
3
  --------------------------------------------------
4
  ✅ Semantic retrieval (FAISS + cosine re-rank + neighbor fill)
5
  ✅ Bullet-aware similarity boost for procedural chunks
 
7
  ✅ Smart factual mode (fast)
8
  ✅ Deep reasoning mode (ChatGPT-like)
9
  ✅ genai_generate() helper for suggestions
10
+ Token-safe truncation (prevents 128k overflow)
11
  """
12
 
13
  import os
 
54
  })
55
 
56
  # ==========================================================
57
+ # 2️⃣ Embedding Model (English Only)
58
  # ==========================================================
59
  try:
60
+ _query_model = SentenceTransformer("intfloat/e5-small-v2", cache_folder=CACHE_DIR)
61
+ print("✅ Loaded embedding model: intfloat/e5-small-v2 (English mode)")
 
 
 
 
62
  except Exception as e:
63
+ print(f"⚠️ Embedding load failed ({e}), attempting fallback...")
64
  try:
 
 
 
65
  _query_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=CACHE_DIR)
66
+ print("🔁 Fallback: all-MiniLM-L6-v2 loaded successfully.")
67
+ except Exception as e2:
68
+ raise RuntimeError(f"❌ Could not load any embedding model: {e2}")
69
 
70
  # ==========================================================
71
  # 3️⃣ GPT-4o via SAP Gen AI Hub — Lazy Initialization
 
104
  _chat_llm = None
105
  raise
106
 
 
107
  # ==========================================================
108
  # 4️⃣ Embedding Generator (Batch-Optimized)
109
  # ==========================================================
 
165
  _clean_old_caches(base_name, keep_latest=5)
166
  return embeddings
167
 
 
168
  # ==========================================================
169
  # 6️⃣ Prompt Templates (Original Strict)
170
  # ==========================================================
 
174
  "When multiple causes, steps, or key points are discussed, present them as short, well-structured bullet points.\n"
175
  "When the answer focuses on a single concept, definition, or explanation, write it as a clear and compact paragraph.\n"
176
  "Keep the tone professional and concise. Do not invent facts outside the provided content.\n"
177
+ "If nothing in the CONTEXT relates to the question, reply exactly:\n"
 
 
178
  "'I don't know based on the provided document.'\n\n"
179
  "Context:\n{context}\n\nQuestion: {query}\nAnswer:"
180
  )
 
183
  "You are an expert enterprise assistant capable of reasoning.\n"
184
  "Think step by step and synthesize information even if scattered across chunks.\n"
185
  "Base your answer primarily on the CONTEXT, but if multiple partial clues exist, combine them logically.\n"
 
 
186
  "If absolutely nothing in the document relates, say exactly:\n"
187
  "'I don't know based on the provided document.'\n\n"
188
  "Context:\n{context}\n\nQuestion: {query}\nLet's reason step-by-step:\nAnswer:"
189
  )
190
 
 
191
  # ==========================================================
192
  # 7️⃣ Retrieval — FAISS + Bullet-Aware Re-rank + Neighbor Fill
193
  # ==========================================================
 
254
  print(f"⚠️ Retrieval error: {repr(e)}")
255
  return []
256
 
 
257
  # ==========================================================
258
+ # 8️⃣ Answer Generation (English Only + Token-Safe)
259
  # ==========================================================
260
  def truncate_context(context_text: str, max_tokens: int = 100000, model: str = "gpt-4o") -> str:
261
  """
262
  Truncate context to stay safely within model limits (~128k tokens).
 
263
  """
264
  try:
265
  import tiktoken
 
269
  import tiktoken
270
  enc = tiktoken.get_encoding("cl100k_base")
271
  except Exception:
 
272
  return context_text[: max_tokens * 4]
273
 
274
  tokens = enc.encode(context_text)
 
278
  return truncated
279
  return context_text
280
 
281
+ def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = False):
 
282
  """
283
+ Generates an English answer using GPT-4o (SAP Gen AI Hub proxy).
 
 
284
  """
285
  if not retrieved_chunks:
286
  return "Sorry, I couldn’t find relevant information in the document."
287
 
 
288
  try:
289
  chat_llm_local = get_chat_llm()
290
  except Exception:
291
  return "⚠️ GPT-4o not initialized. Check credentials or rebuild the Space."
292
 
293
+ # Build and clean context
 
 
294
  context = "\n".join(f"[Chunk {i+1}] {chunk.strip()}" for i, chunk in enumerate(retrieved_chunks))
 
 
295
  context = "\n".join(dict.fromkeys(context.splitlines()))
 
 
296
  context = truncate_context(context, 100000)
297
 
298
+ prompt = (REASONING_PROMPT if reasoning_mode else STRICT_PROMPT).format(
299
+ context=context, query=query
300
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301
 
 
 
 
302
  messages = [
303
+ {"role": "system", "content": (
304
+ "You are an expert enterprise documentation assistant. "
305
+ "When reasoning_mode is off, stay strictly factual and concise. "
306
+ "When reasoning_mode is on, combine insights across chunks logically. "
307
+ "If the answer is not in the document, reply exactly: "
308
+ "'I don't know based on the provided document.'"
309
+ )},
 
 
 
 
310
  {"role": "user", "content": prompt},
311
  ]
312
 
 
 
 
313
  try:
314
  response = chat_llm_local.invoke(messages)
315
  return response.content.strip()
 
317
  print(f"⚠️ GPT-4o generation failed: {e}")
318
  return "⚠️ Error: Could not generate an answer."
319
 
 
320
  # ==========================================================
321
  # 9️⃣ Generic Text Generation Helper
322
  # ==========================================================
 
338
  print(f"⚠️ genai_generate() failed: {e}")
339
  return "⚠️ Unable to generate response."
340
 
 
341
  # ==========================================================
342
  # 🔟 Local Test
343
  # ==========================================================