Shubham170793 commited on
Commit
65116ce
·
verified ·
1 Parent(s): 1ffa2bc

Update src/qa.py

Browse files
Files changed (1) hide show
  1. src/qa.py +58 -17
src/qa.py CHANGED
@@ -268,15 +268,37 @@ def retrieve_chunks(query: str, index, chunks: list, top_k: int = 7,
268
 
269
 
270
  # ==========================================================
271
- # 8️⃣ Answer Generation
272
- # ==========================================================
273
- # ==========================================================
274
- # 8️⃣ Answer Generation (Lazy GPT-4o Initialization + Language-Aware)
275
  # ==========================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = False, doc_lang: str = "en"):
277
  """
278
  Generates an answer using GPT-4o (SAP Gen AI Hub proxy).
279
- Now supports Hindi or English response formatting automatically.
 
280
  """
281
  if not retrieved_chunks:
282
  return "Sorry, I couldn’t find relevant information in the document."
@@ -287,10 +309,20 @@ def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = F
287
  except Exception:
288
  return "⚠️ GPT-4o not initialized. Check credentials or rebuild the Space."
289
 
290
- # Build context
 
 
291
  context = "\n".join(f"[Chunk {i+1}] {chunk.strip()}" for i, chunk in enumerate(retrieved_chunks))
292
 
 
 
 
 
 
 
 
293
  # 🌐 Language-specific prompt logic
 
294
  if doc_lang == "hi":
295
  # Hindi-language response
296
  prompt = (
@@ -302,21 +334,31 @@ def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = F
302
  )
303
  else:
304
  # Default English prompts
305
- prompt = (REASONING_PROMPT if reasoning_mode else STRICT_PROMPT).format(context=context, query=query)
 
 
306
 
307
- # System role
 
 
308
  messages = [
309
- {"role": "system", "content":
310
- "You are an expert enterprise documentation assistant. "
311
- "When reasoning_mode is off, stay strictly factual and concise. "
312
- "When reasoning_mode is on, combine insights across chunks logically "
313
- "and explain briefly. "
314
- "If the answer is not in the document, reply exactly: "
315
- "'I don't know based on the provided document.'"},
 
 
 
 
316
  {"role": "user", "content": prompt},
317
  ]
318
 
319
- # Generate answer
 
 
320
  try:
321
  response = chat_llm_local.invoke(messages)
322
  return response.content.strip()
@@ -325,7 +367,6 @@ def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = F
325
  return "⚠️ Error: Could not generate an answer."
326
 
327
 
328
-
329
  # ==========================================================
330
  # 9️⃣ Generic Text Generation Helper
331
  # ==========================================================
 
268
 
269
 
270
  # ==========================================================
271
+ # 8️⃣ Answer Generation (Lazy GPT-4o Initialization + Language-Aware + Token-Safe)
 
 
 
272
  # ==========================================================
273
+ def truncate_context(context_text: str, max_tokens: int = 100000, model: str = "gpt-4o") -> str:
274
+ """
275
+ Truncate context to stay safely within model limits (~128k tokens).
276
+ Keeps only the earliest tokens up to max_tokens.
277
+ """
278
+ try:
279
+ import tiktoken
280
+ enc = tiktoken.encoding_for_model(model)
281
+ except Exception:
282
+ try:
283
+ import tiktoken
284
+ enc = tiktoken.get_encoding("cl100k_base")
285
+ except Exception:
286
+ # crude fallback — approximate truncation
287
+ return context_text[: max_tokens * 4]
288
+
289
+ tokens = enc.encode(context_text)
290
+ if len(tokens) > max_tokens:
291
+ truncated = enc.decode(tokens[:max_tokens])
292
+ print(f"⚠️ Context truncated from {len(tokens):,} → {max_tokens:,} tokens.")
293
+ return truncated
294
+ return context_text
295
+
296
+
297
  def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = False, doc_lang: str = "en"):
298
  """
299
  Generates an answer using GPT-4o (SAP Gen AI Hub proxy).
300
+ Now supports Hindi or English response formatting automatically,
301
+ with safe context truncation to prevent token overflow.
302
  """
303
  if not retrieved_chunks:
304
  return "Sorry, I couldn’t find relevant information in the document."
 
309
  except Exception:
310
  return "⚠️ GPT-4o not initialized. Check credentials or rebuild the Space."
311
 
312
+ # ----------------------------------------------------------
313
+ # 🧩 Build and clean context (deduplicate + truncate safely)
314
+ # ----------------------------------------------------------
315
  context = "\n".join(f"[Chunk {i+1}] {chunk.strip()}" for i, chunk in enumerate(retrieved_chunks))
316
 
317
+ # Remove duplicate lines to save tokens
318
+ context = "\n".join(dict.fromkeys(context.splitlines()))
319
+
320
+ # Truncate to stay within GPT-4o 128k context limit
321
+ context = truncate_context(context, 100000)
322
+
323
+ # ----------------------------------------------------------
324
  # 🌐 Language-specific prompt logic
325
+ # ----------------------------------------------------------
326
  if doc_lang == "hi":
327
  # Hindi-language response
328
  prompt = (
 
334
  )
335
  else:
336
  # Default English prompts
337
+ prompt = (REASONING_PROMPT if reasoning_mode else STRICT_PROMPT).format(
338
+ context=context, query=query
339
+ )
340
 
341
+ # ----------------------------------------------------------
342
+ # 💬 System + user messages
343
+ # ----------------------------------------------------------
344
  messages = [
345
+ {
346
+ "role": "system",
347
+ "content": (
348
+ "You are an expert enterprise documentation assistant. "
349
+ "When reasoning_mode is off, stay strictly factual and concise. "
350
+ "When reasoning_mode is on, combine insights across chunks logically "
351
+ "and explain briefly. "
352
+ "If the answer is not in the document, reply exactly: "
353
+ "'I don't know based on the provided document.'"
354
+ ),
355
+ },
356
  {"role": "user", "content": prompt},
357
  ]
358
 
359
+ # ----------------------------------------------------------
360
+ # 🧠 Generate answer safely
361
+ # ----------------------------------------------------------
362
  try:
363
  response = chat_llm_local.invoke(messages)
364
  return response.content.strip()
 
367
  return "⚠️ Error: Could not generate an answer."
368
 
369
 
 
370
  # ==========================================================
371
  # 9️⃣ Generic Text Generation Helper
372
  # ==========================================================