Spaces:

Shubham170793
/

enterprise-knowledge-assistant

Sleeping

App Files Files Community

Shubham170793 commited on Oct 21

Commit

65116ce

verified ·

1 Parent(s): 1ffa2bc

Update src/qa.py

Browse files

Files changed (1) hide show

src/qa.py +58 -17

src/qa.py CHANGED Viewed

@@ -268,15 +268,37 @@ def retrieve_chunks(query: str, index, chunks: list, top_k: int = 7,
 # ==========================================================
-# 8️⃣ Answer Generation
-# ==========================================================
-# ==========================================================
-# 8️⃣ Answer Generation (Lazy GPT-4o Initialization + Language-Aware)
 # ==========================================================
 def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = False, doc_lang: str = "en"):
     """
     Generates an answer using GPT-4o (SAP Gen AI Hub proxy).
-    Now supports Hindi or English response formatting automatically.
     """
     if not retrieved_chunks:
         return "Sorry, I couldn’t find relevant information in the document."
@@ -287,10 +309,20 @@ def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = F
     except Exception:
         return "⚠️ GPT-4o not initialized. Check credentials or rebuild the Space."
-    # Build context
     context = "\n".join(f"[Chunk {i+1}] {chunk.strip()}" for i, chunk in enumerate(retrieved_chunks))
     # 🌐 Language-specific prompt logic
     if doc_lang == "hi":
         # Hindi-language response
         prompt = (
@@ -302,21 +334,31 @@ def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = F
         )
     else:
         # Default English prompts
-        prompt = (REASONING_PROMPT if reasoning_mode else STRICT_PROMPT).format(context=context, query=query)
-    # System role
     messages = [
-        {"role": "system", "content":
-            "You are an expert enterprise documentation assistant. "
-            "When reasoning_mode is off, stay strictly factual and concise. "
-            "When reasoning_mode is on, combine insights across chunks logically "
-            "and explain briefly. "
-            "If the answer is not in the document, reply exactly: "
-            "'I don't know based on the provided document.'"},
         {"role": "user", "content": prompt},
     ]
-    # Generate answer
     try:
         response = chat_llm_local.invoke(messages)
         return response.content.strip()
@@ -325,7 +367,6 @@ def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = F
         return "⚠️ Error: Could not generate an answer."
 # ==========================================================
 # 9️⃣ Generic Text Generation Helper
 # ==========================================================

 # ==========================================================
+# 8️⃣ Answer Generation (Lazy GPT-4o Initialization + Language-Aware + Token-Safe)
 # ==========================================================
+def truncate_context(context_text: str, max_tokens: int = 100000, model: str = "gpt-4o") -> str:
+    """
+    Truncate context to stay safely within model limits (~128k tokens).
+    Keeps only the earliest tokens up to max_tokens.
+    """
+    try:
+        import tiktoken
+        enc = tiktoken.encoding_for_model(model)
+    except Exception:
+        try:
+            import tiktoken
+            enc = tiktoken.get_encoding("cl100k_base")
+        except Exception:
+            # crude fallback — approximate truncation
+            return context_text[: max_tokens * 4]
+    tokens = enc.encode(context_text)
+    if len(tokens) > max_tokens:
+        truncated = enc.decode(tokens[:max_tokens])
+        print(f"⚠️ Context truncated from {len(tokens):,} → {max_tokens:,} tokens.")
+        return truncated
+    return context_text
 def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = False, doc_lang: str = "en"):
     """
     Generates an answer using GPT-4o (SAP Gen AI Hub proxy).
+    Now supports Hindi or English response formatting automatically,
+    with safe context truncation to prevent token overflow.
     """
     if not retrieved_chunks:
         return "Sorry, I couldn’t find relevant information in the document."
     except Exception:
         return "⚠️ GPT-4o not initialized. Check credentials or rebuild the Space."
+    # ----------------------------------------------------------
+    # 🧩 Build and clean context (deduplicate + truncate safely)
+    # ----------------------------------------------------------
     context = "\n".join(f"[Chunk {i+1}] {chunk.strip()}" for i, chunk in enumerate(retrieved_chunks))
+    # Remove duplicate lines to save tokens
+    context = "\n".join(dict.fromkeys(context.splitlines()))
+    # Truncate to stay within GPT-4o 128k context limit
+    context = truncate_context(context, 100000)
+    # ----------------------------------------------------------
     # 🌐 Language-specific prompt logic
+    # ----------------------------------------------------------
     if doc_lang == "hi":
         # Hindi-language response
         prompt = (
         )
     else:
         # Default English prompts
+        prompt = (REASONING_PROMPT if reasoning_mode else STRICT_PROMPT).format(
+            context=context, query=query
+        )
+    # ----------------------------------------------------------
+    # 💬 System + user messages
+    # ----------------------------------------------------------
     messages = [
+        {
+            "role": "system",
+            "content": (
+                "You are an expert enterprise documentation assistant. "
+                "When reasoning_mode is off, stay strictly factual and concise. "
+                "When reasoning_mode is on, combine insights across chunks logically "
+                "and explain briefly. "
+                "If the answer is not in the document, reply exactly: "
+                "'I don't know based on the provided document.'"
+            ),
+        },
         {"role": "user", "content": prompt},
     ]
+    # ----------------------------------------------------------
+    # 🧠 Generate answer safely
+    # ----------------------------------------------------------
     try:
         response = chat_llm_local.invoke(messages)
         return response.content.strip()
         return "⚠️ Error: Could not generate an answer."
 # ==========================================================
 # 9️⃣ Generic Text Generation Helper
 # ==========================================================