Spaces:

Shubham170793
/

enterprise-knowledge-assistant

Running

App Files Files Community

Shubham170793 commited on Oct 17

Commit

d73a9dd

verified ·

1 Parent(s): d511cfa

Update src/qa.py

Browse files

Files changed (1) hide show

src/qa.py +42 -21

src/qa.py CHANGED Viewed

@@ -2,20 +2,21 @@
 qa.py — GPT-4o (SAP Gen AI Hub) + ReRank Retrieval
 --------------------------------------------------
 ✅ Semantic retrieval (FAISS + cosine re-rank + neighbor fill)
 ✅ Smart factual mode (fast)
 ✅ Deep reasoning mode (ChatGPT-like)
 """
 import os
 import json
 import numpy as np
 from sentence_transformers import SentenceTransformer
 from sklearn.metrics.pairwise import cosine_similarity
 from gen_ai_hub.proxy.core.proxy_clients import get_proxy_client
 from gen_ai_hub.proxy.langchain.openai import ChatOpenAI
-from vectorstore import build_faiss_index
-print("✅ qa.py (GPT-4o via Gen AI Hub + ReRank) loaded from:", __file__)
 # ==========================================================
 # 1️⃣ Hugging Face Cache
@@ -34,7 +35,7 @@ os.environ.update({
 # ==========================================================
 try:
     _query_model = SentenceTransformer(
-        "intfloat/e5-small-v2",
         cache_folder=CACHE_DIR
     )
     print("✅ Loaded embedding model: intfloat/e5-small-v2 (fast mode)")
@@ -83,7 +84,6 @@ STRICT_PROMPT = (
     "If the answer cannot be found even after considering all chunks, say exactly:\n"
     "'I don't know based on the provided document.'\n\n"
     "Context:\n{context}\n\nQuestion: {query}\nAnswer:"
 )
 REASONING_PROMPT = (
@@ -97,13 +97,16 @@ REASONING_PROMPT = (
 )
 # ==========================================================
-# 5️⃣ Retrieval — FAISS + Re-rank + Neighbor Fill
 # ==========================================================
 def retrieve_chunks(query: str, index, chunks: list, top_k: int = 5,
                     min_similarity: float = 0.6, candidate_multiplier: int = 3,
                     embeddings: list = None):
     """
     Re-rank and optionally fill with neighbors for context continuity.
     Auto-detects and rebuilds FAISS index if dimension mismatch occurs.
     """
@@ -119,37 +122,53 @@ def retrieve_chunks(query: str, index, chunks: list, top_k: int = 5,
             normalize_embeddings=True
         )[0]
-        # ✅ Check dimension match
         if hasattr(index, "d") and q_emb.shape[0] != index.d:
             print(f"⚠️ FAISS index dimension mismatch: index={index.d}, query={q_emb.shape[0]}")
             if embeddings:
                 print("🔄 Rebuilding FAISS index to match embedding dimensions...")
                 index = build_faiss_index(embeddings)
-                q_emb = _query_model.encode([f"query: {query.strip()}"], convert_to_numpy=True, normalize_embeddings=True)[0]
             else:
                 return []
         # Step 1️⃣ — Initial FAISS retrieval
         num_candidates = max(top_k * candidate_multiplier, top_k + 2)
         distances, indices = index.search(np.array([q_emb]).astype("float32"), num_candidates)
         candidate_indices = [int(i) for i in indices[0] if i >= 0]
-        candidate_indices = list(dict.fromkeys(candidate_indices))
-        # Step 2️⃣ — Re-rank by cosine similarity
         doc_embs = _query_model.encode(
             [f"passage: {chunks[i]}" for i in candidate_indices],
             convert_to_numpy=True,
             normalize_embeddings=True,
         )
         sims = cosine_similarity([q_emb], doc_embs)[0]
-        ranked = sorted(zip(candidate_indices, sims), key=lambda x: x[1], reverse=True)
-        # Step 3️⃣ — Filter by similarity
         filtered = [idx for idx, sim in ranked if sim >= min_similarity]
         if len(filtered) > top_k:
             filtered = filtered[:top_k]
-        # Step 4️⃣ — Include ±1 neighbors for continuity
         neighbors = set()
         for idx in filtered:
             for n in [idx - 1, idx + 1]:
@@ -159,7 +178,7 @@ def retrieve_chunks(query: str, index, chunks: list, top_k: int = 5,
         # Step 5️⃣ — Build final chunk list
         final_chunks = [chunks[i] for i in filtered]
-        print(f"✅ Retrieved {len(final_chunks)} chunks (semantic + neighbor fill).")
         return final_chunks
     except Exception as e:
@@ -179,6 +198,7 @@ def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = F
     if chat_llm is None:
         return "⚠️ GPT-4o not initialized. Check credentials or rebuild the Space."
     context = "\n".join(f"[Chunk {i+1}] {chunk.strip()}" for i, chunk in enumerate(retrieved_chunks))
     prompt = (REASONING_PROMPT if reasoning_mode else STRICT_PROMPT).format(context=context, query=query)
@@ -189,8 +209,8 @@ def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = F
                 "You are an expert enterprise documentation assistant. "
                 "When reasoning_mode is off, stay strictly factual and concise. "
                 "When reasoning_mode is on, combine insights across chunks logically "
-                "and explain the reasoning briefly."
-                "If answer not in document, say exactly: "
                 "'I don't know based on the provided document.'"
             ),
         },
@@ -204,16 +224,17 @@ def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = F
         print(f"⚠️ GPT-4o generation failed: {e}")
         return "⚠️ Error: Could not generate an answer."
 # ==========================================================
 # 7️⃣ Local Test
 # ==========================================================
 if __name__ == "__main__":
     dummy_chunks = [
-        "Step 1: Open the dashboard and navigate to reports.",
-        "Step 2: Click 'Export' to download a CSV summary.",
-        "Step 3: Review the generated report in your downloads folder.",
-        "Appendix: Communication user creation steps are explained later in this guide."
     ]
     embeddings = [
         _query_model.encode([f"passage: {c}"], convert_to_numpy=True, normalize_embeddings=True)[0]
@@ -221,7 +242,7 @@ if __name__ == "__main__":
     ]
     index = build_faiss_index(embeddings)
-    query = "How do I create a communication user?"
     retrieved = retrieve_chunks(query, index, dummy_chunks)
     print("🔍 Retrieved:", retrieved)
     print("💬 Answer:", generate_answer(query, retrieved, reasoning_mode=False))

 qa.py — GPT-4o (SAP Gen AI Hub) + ReRank Retrieval
 --------------------------------------------------
 ✅ Semantic retrieval (FAISS + cosine re-rank + neighbor fill)
+✅ Bullet-aware similarity boost for procedural chunks
 ✅ Smart factual mode (fast)
 ✅ Deep reasoning mode (ChatGPT-like)
 """
 import os
+import re
 import json
 import numpy as np
 from sentence_transformers import SentenceTransformer
 from sklearn.metrics.pairwise import cosine_similarity
 from gen_ai_hub.proxy.core.proxy_clients import get_proxy_client
 from gen_ai_hub.proxy.langchain.openai import ChatOpenAI
+print("✅ qa.py (GPT-4o via Gen AI Hub + Bullet-Aware Retrieval) loaded from:", __file__)
 # ==========================================================
 # 1️⃣ Hugging Face Cache
 # ==========================================================
 try:
     _query_model = SentenceTransformer(
+        "intfloat/e5-small-v2",   # ⚡ Faster, 384-dim embeddings
         cache_folder=CACHE_DIR
     )
     print("✅ Loaded embedding model: intfloat/e5-small-v2 (fast mode)")
     "If the answer cannot be found even after considering all chunks, say exactly:\n"
     "'I don't know based on the provided document.'\n\n"
     "Context:\n{context}\n\nQuestion: {query}\nAnswer:"
 )
 REASONING_PROMPT = (
 )
 # ==========================================================
+# 5️⃣ Retrieval — FAISS + Bullet-Aware Re-rank + Neighbor Fill
 # ==========================================================
+from vectorstore import build_faiss_index
 def retrieve_chunks(query: str, index, chunks: list, top_k: int = 5,
                     min_similarity: float = 0.6, candidate_multiplier: int = 3,
                     embeddings: list = None):
     """
     Re-rank and optionally fill with neighbors for context continuity.
+    Adds small similarity boost for bullet-style or step-based chunks.
     Auto-detects and rebuilds FAISS index if dimension mismatch occurs.
     """
             normalize_embeddings=True
         )[0]
+        # ✅ Sanity check: dimension match between query and FAISS index
         if hasattr(index, "d") and q_emb.shape[0] != index.d:
             print(f"⚠️ FAISS index dimension mismatch: index={index.d}, query={q_emb.shape[0]}")
             if embeddings:
                 print("🔄 Rebuilding FAISS index to match embedding dimensions...")
                 index = build_faiss_index(embeddings)
+                print("✅ FAISS index successfully rebuilt.")
+                q_emb = _query_model.encode(
+                    [f"query: {query.strip()}"],
+                    convert_to_numpy=True,
+                    normalize_embeddings=True
+                )[0]
             else:
+                print("❌ No embeddings available to rebuild FAISS index.")
                 return []
         # Step 1️⃣ — Initial FAISS retrieval
         num_candidates = max(top_k * candidate_multiplier, top_k + 2)
         distances, indices = index.search(np.array([q_emb]).astype("float32"), num_candidates)
         candidate_indices = [int(i) for i in indices[0] if i >= 0]
+        candidate_indices = list(dict.fromkeys(candidate_indices))  # de-dupe
+        # Step 2️⃣ — Compute similarities
         doc_embs = _query_model.encode(
             [f"passage: {chunks[i]}" for i in candidate_indices],
             convert_to_numpy=True,
             normalize_embeddings=True,
         )
         sims = cosine_similarity([q_emb], doc_embs)[0]
+        # 🔹 NEW: Boost similarity for bullet-style or step-based chunks
+        boosted_sims = []
+        for idx, sim in zip(candidate_indices, sims):
+            chunk_text = chunks[idx].strip()
+            if re.match(r"^[-•\d]+[\.\s]", chunk_text):  # bullet or numbered
+                sim += 0.05  # small procedural context boost
+            boosted_sims.append((idx, sim))
+        ranked = sorted(boosted_sims, key=lambda x: x[1], reverse=True)
+        # Step 3️⃣ — Filter by similarity threshold
         filtered = [idx for idx, sim in ranked if sim >= min_similarity]
         if len(filtered) > top_k:
             filtered = filtered[:top_k]
+        # Step 4️⃣ — Neighbor fill (context continuity)
         neighbors = set()
         for idx in filtered:
             for n in [idx - 1, idx + 1]:
         # Step 5️⃣ — Build final chunk list
         final_chunks = [chunks[i] for i in filtered]
+        print(f"✅ Retrieved {len(final_chunks)} chunks (bullet-aware + continuity).")
         return final_chunks
     except Exception as e:
     if chat_llm is None:
         return "⚠️ GPT-4o not initialized. Check credentials or rebuild the Space."
+    # Combine chunks with markers
     context = "\n".join(f"[Chunk {i+1}] {chunk.strip()}" for i, chunk in enumerate(retrieved_chunks))
     prompt = (REASONING_PROMPT if reasoning_mode else STRICT_PROMPT).format(context=context, query=query)
                 "You are an expert enterprise documentation assistant. "
                 "When reasoning_mode is off, stay strictly factual and concise. "
                 "When reasoning_mode is on, combine insights across chunks logically "
+                "and explain the reasoning briefly. "
+                "If the answer is not in the document, reply exactly: "
                 "'I don't know based on the provided document.'"
             ),
         },
         print(f"⚠️ GPT-4o generation failed: {e}")
         return "⚠️ Error: Could not generate an answer."
 # ==========================================================
 # 7️⃣ Local Test
 # ==========================================================
 if __name__ == "__main__":
+    from vectorstore import build_faiss_index
     dummy_chunks = [
+        "- Step 1: Enable order confirmation capability.",
+        "- Step 2: Configure supplier email.",
+        "Setup instructions and configuration details.",
+        "Prerequisites for automation are described here."
     ]
     embeddings = [
         _query_model.encode([f"passage: {c}"], convert_to_numpy=True, normalize_embeddings=True)[0]
     ]
     index = build_faiss_index(embeddings)
+    query = "What are the prerequisites for commerce automation?"
     retrieved = retrieve_chunks(query, index, dummy_chunks)
     print("🔍 Retrieved:", retrieved)
     print("💬 Answer:", generate_answer(query, retrieved, reasoning_mode=False))