Spaces:

Shubham170793
/

enterprise-knowledge-assistant

Sleeping

App Files Files Community

Shubham170793 commited on Oct 11, 2025

Commit

2f0b456

verified ·

1 Parent(s): a0dee9a

Update src/qa.py

Browse files

Files changed (1) hide show

src/qa.py +22 -6

src/qa.py CHANGED Viewed

@@ -104,23 +104,39 @@ def retrieve_chunks(query: str, index, chunks: list, top_k: int = 5):
 def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = True):
     """
     Generates answers using Phi-2.
-    reasoning_mode=True → reasoning + external knowledge
     reasoning_mode=False → strict chunk-only factual mode
     """
     if not retrieved_chunks:
         return "Sorry, I couldn’t find relevant information in the document."
     context = "\n".join([chunk.strip() for chunk in retrieved_chunks])
-    prompt = (REASONING_PROMPT if reasoning_mode else STRICT_PROMPT).format(context=context, query=query)
     try:
         result = _answer_model(
             prompt,
-            max_new_tokens=180,
-            temperature=0.4 if reasoning_mode else 0.2,
-            do_sample=False,
         )
-        return result[0]["generated_text"].split("ANSWER:")[-1].strip()
     except Exception as e:
         print(f"⚠️ Generation failed: {e}")
         return "⚠️ Error: Could not generate an answer."

 def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = True):
     """
     Generates answers using Phi-2.
+    reasoning_mode=True  → reasoning + external knowledge
     reasoning_mode=False → strict chunk-only factual mode
     """
     if not retrieved_chunks:
         return "Sorry, I couldn’t find relevant information in the document."
+    # Merge retrieved context
     context = "\n".join([chunk.strip() for chunk in retrieved_chunks])
+    # Select prompt based on mode
+    prompt = (REASONING_PROMPT if reasoning_mode else STRICT_PROMPT).format(
+        context=context, query=query
+    )
     try:
+        # ⚡ Speed-optimized generation
         result = _answer_model(
             prompt,
+            max_new_tokens=140 if reasoning_mode else 100,   # ⏱ shorter output = faster
+            temperature=0.3 if reasoning_mode else 0.1,      # balanced creativity
+            do_sample=False,                                 # ✅ greedy decoding = fastest
+            repetition_penalty=1.1,                          # avoids repetitive phrasing
         )
+        # Cleanly extract the answer
+        answer = result[0]["generated_text"].split("ANSWER:")[-1].strip()
+        # Safety: truncate overly long rambles
+        if len(answer.split()) > 150:
+            answer = " ".join(answer.split()[:150]) + "..."
+        return answer
     except Exception as e:
         print(f"⚠️ Generation failed: {e}")
         return "⚠️ Error: Could not generate an answer."