Spaces:

Shubham170793
/

enterprise-knowledge-assistant

Sleeping

App Files Files Community

Shubham170793 commited on Oct 14

Commit

f384f96

verified ·

1 Parent(s): 8f19216

Update src/qa.py

Browse files

Files changed (1) hide show

src/qa.py +41 -25

src/qa.py CHANGED Viewed

@@ -1,10 +1,11 @@
 """
-qa.py — FAST Phi-2 with Re-ranking + Similarity Threshold
----------------------------------------------------------
 ✅ Optimized for Hugging Face Spaces & Streamlit
-✅ Uses intfloat/e5-small-v2 embeddings
-✅ Uses microsoft/phi-2 for generation (fast CPU-optimized)
-✅ Includes re-ranking and semantic similarity filtering
 """
 import os
@@ -14,7 +15,7 @@ from sklearn.metrics.pairwise import cosine_similarity
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
-print("✅ qa.py (Phi-2 Fast + Rerank + Similarity Filter) loaded from:", __file__)
 # ==========================================================
 # 1️⃣ Hugging Face Cache Setup
@@ -66,9 +67,9 @@ except Exception as e:
     _answer_model = None
 # ==========================================================
-# 4️⃣ Prompt Template (Concise & Factual)
 # ==========================================================
-PROMPT_TEMPLATE = (
     "You are an assistant for enterprise documentation.\n"
     "Answer the question based ONLY on the context below.\n"
     "If the answer is not in the context, reply exactly:\n"
@@ -76,22 +77,30 @@ PROMPT_TEMPLATE = (
     "Context:\n{context}\n\nQuestion: {query}\nAnswer:"
 )
 # ==========================================================
-# 5️⃣ Retrieve Chunks — FAISS + Rerank + Similarity Filter
 # ==========================================================
 def retrieve_chunks(query: str, index, chunks: list, top_k: int = 5, min_similarity: float = 0.6):
     """
     Retrieves top-K relevant chunks with re-ranking and similarity threshold filtering.
     Steps:
     1️⃣ Use FAISS to get approximate top candidates.
-    2️⃣ Re-rank those by cosine similarity.
-    3️⃣ Filter out low-similarity chunks (below min_similarity).
     """
     if not index or not chunks:
         return []
     try:
-        # --- Encode the query ---
         q_emb = _query_model.encode(
             [f"query: {query.strip()}"],
             convert_to_numpy=True,
@@ -127,25 +136,26 @@ def retrieve_chunks(query: str, index, chunks: list, top_k: int = 5, min_similar
         return []
 # ==========================================================
-# 6️⃣ Answer Generation (Fast & Deterministic)
 # ==========================================================
-def generate_answer(query: str, retrieved_chunks: list):
     """
-    Generates a concise factual answer using Phi-2.
-    Retrieval should already be clean & re-ranked.
     """
     if not retrieved_chunks:
         return "Sorry, I couldn’t find relevant information in the document."
     context = "\n".join(chunk.strip() for chunk in retrieved_chunks)
-    prompt = PROMPT_TEMPLATE.format(context=context, query=query)
     try:
         result = _answer_model(
             prompt,
-            max_new_tokens=120,       # Keep short for speed
-            temperature=0.2,
-            do_sample=False,          # Deterministic for strict mode
             pad_token_id=_tokenizer.eos_token_id,
         )
         answer = result[0]["generated_text"].strip()
@@ -164,20 +174,26 @@ def generate_answer(query: str, retrieved_chunks: list):
 # ==========================================================
 if __name__ == "__main__":
     from vectorstore import build_faiss_index
     dummy_chunks = [
         "Step 1: Open the dashboard and navigate to reports.",
         "Step 2: Click 'Export' to download a CSV summary.",
         "Step 3: Review the generated report in your downloads folder."
     ]
     embeddings = [
         _query_model.encode([f"passage: {chunk}"], convert_to_numpy=True, normalize_embeddings=True)[0]
         for chunk in dummy_chunks
     ]
-    import faiss
-    index = faiss.IndexFlatL2(embeddings[0].shape[0])
     index.add(np.array(embeddings).astype("float32"))
-    query = "How do I export a report?"
     retrieved = retrieve_chunks(query, index, dummy_chunks, top_k=3, min_similarity=0.6)
-    print("🔍 Retrieved:", retrieved)
-    print("💬 Answer:", generate_answer(query, retrieved))

 """
+qa.py — Phi-2 Hybrid (Fast + Reasoning) with Rerank & Similarity Filtering
+--------------------------------------------------------------------------
 ✅ Optimized for Hugging Face Spaces & Streamlit
+✅ intfloat/e5-small-v2 for embeddings
+✅ microsoft/phi-2 for generation (fast CPU-optimized)
+✅ Re-ranking + minimum similarity threshold for clean retrieval
+✅ reasoning_mode toggle for deeper answers
 """
 import os
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
+print("✅ qa.py (Phi-2 Hybrid + Rerank + Similarity Filter) loaded from:", __file__)
 # ==========================================================
 # 1️⃣ Hugging Face Cache Setup
     _answer_model = None
 # ==========================================================
+# 4️⃣ Prompt Templates
 # ==========================================================
+STRICT_PROMPT = (
     "You are an assistant for enterprise documentation.\n"
     "Answer the question based ONLY on the context below.\n"
     "If the answer is not in the context, reply exactly:\n"
     "Context:\n{context}\n\nQuestion: {query}\nAnswer:"
 )
+REASONING_PROMPT = (
+    "You are an expert enterprise assistant.\n"
+    "Carefully reason about the following context and provide a detailed, step-by-step answer.\n"
+    "If the context does not provide enough information, you may make cautious inferences based on logical reasoning.\n"
+    "However, always note when you are inferring beyond the text.\n\n"
+    "Context:\n{context}\n\nQuestion: {query}\n\nReasoning and Answer:"
+)
 # ==========================================================
+# 5️⃣ Retrieve Chunks — FAISS + Re-rank + Similarity Filter
 # ==========================================================
 def retrieve_chunks(query: str, index, chunks: list, top_k: int = 5, min_similarity: float = 0.6):
     """
     Retrieves top-K relevant chunks with re-ranking and similarity threshold filtering.
     Steps:
     1️⃣ Use FAISS to get approximate top candidates.
+    2️⃣ Re-rank them by cosine similarity with the query.
+    3️⃣ Filter out low-similarity chunks below min_similarity.
     """
     if not index or not chunks:
         return []
     try:
+        # --- Encode query ---
         q_emb = _query_model.encode(
             [f"query: {query.strip()}"],
             convert_to_numpy=True,
         return []
 # ==========================================================
+# 6️⃣ Answer Generation (Fast / Reasoning Hybrid)
 # ==========================================================
+def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = False):
     """
+    Generates concise or reasoning-rich answers using Phi-2.
+    reasoning_mode=True → longer, more explanatory (slower)
+    reasoning_mode=False → short factual (fast)
     """
     if not retrieved_chunks:
         return "Sorry, I couldn’t find relevant information in the document."
     context = "\n".join(chunk.strip() for chunk in retrieved_chunks)
+    prompt = (REASONING_PROMPT if reasoning_mode else STRICT_PROMPT).format(context=context, query=query)
     try:
         result = _answer_model(
             prompt,
+            max_new_tokens=200 if reasoning_mode else 120,
+            temperature=0.6 if reasoning_mode else 0.2,
+            do_sample=reasoning_mode,
             pad_token_id=_tokenizer.eos_token_id,
         )
         answer = result[0]["generated_text"].strip()
 # ==========================================================
 if __name__ == "__main__":
     from vectorstore import build_faiss_index
+    import faiss
     dummy_chunks = [
         "Step 1: Open the dashboard and navigate to reports.",
         "Step 2: Click 'Export' to download a CSV summary.",
         "Step 3: Review the generated report in your downloads folder."
     ]
     embeddings = [
         _query_model.encode([f"passage: {chunk}"], convert_to_numpy=True, normalize_embeddings=True)[0]
         for chunk in dummy_chunks
     ]
+    dim = embeddings[0].shape[0]
+    index = faiss.IndexFlatL2(dim)
     index.add(np.array(embeddings).astype("float32"))
+    query = "How to export a report?"
     retrieved = retrieve_chunks(query, index, dummy_chunks, top_k=3, min_similarity=0.6)
+    print("\n🔍 Retrieved chunks:", retrieved)
+    print("\n💬 FAST Answer:", generate_answer(query, retrieved, reasoning_mode=False))
+    print("\n🧠 REASONING Answer:", generate_answer(query, retrieved, reasoning_mode=True))