Spaces:

Shubham170793
/

enterprise-knowledge-assistant

Sleeping

App Files Files Community

Shubham170793 commited on Oct 17

Commit

0671dc0

verified ·

1 Parent(s): b61a150

Update src/qa.py

Browse files

Files changed (1) hide show

src/qa.py +48 -105

src/qa.py CHANGED Viewed

@@ -13,6 +13,7 @@ from sentence_transformers import SentenceTransformer
 from sklearn.metrics.pairwise import cosine_similarity
 from gen_ai_hub.proxy.core.proxy_clients import get_proxy_client
 from gen_ai_hub.proxy.langchain.openai import ChatOpenAI
 print("✅ qa.py (GPT-4o via Gen AI Hub + ReRank) loaded from:", __file__)
@@ -33,7 +34,7 @@ os.environ.update({
 # ==========================================================
 try:
     _query_model = SentenceTransformer(
-        "intfloat/e5-small-v2",   # ⚡ Faster, 384-dim embeddings
         cache_folder=CACHE_DIR
     )
     print("✅ Loaded embedding model: intfloat/e5-small-v2 (fast mode)")
@@ -76,8 +77,10 @@ except Exception as e:
 # ==========================================================
 STRICT_PROMPT = (
     "You are an enterprise documentation assistant.\n"
-    "Answer clearly and factually using ONLY the CONTEXT below.\n"
-    "If the answer is not in the document, reply exactly:\n"
     "'I don't know based on the provided document.'\n\n"
     "Context:\n{context}\n\nQuestion: {query}\nAnswer:"
 )
@@ -92,32 +95,15 @@ REASONING_PROMPT = (
     "Context:\n{context}\n\nQuestion: {query}\nLet's reason step-by-step:\nAnswer:"
 )
 # ==========================================================
-# 🔍 Improved Retrieval — Multi-Span Query + Adaptive Similarity + Context Expansion
 # ==========================================================
-from vectorstore import build_faiss_index
-def _split_query(query: str):
-    """
-    Breaks long or compound questions into smaller sub-queries for richer retrieval coverage.
-    """
-    separators = [".", "?", "and", "then", "also", ",", ";"]
-    for sep in separators:
-        query = query.replace(sep, "|")
-    parts = [q.strip() for q in query.split("|") if len(q.strip()) > 3]
-    return parts[:3] if parts else [query.strip()]
 def retrieve_chunks(query: str, index, chunks: list, top_k: int = 5,
                     min_similarity: float = 0.6, candidate_multiplier: int = 3,
-                    embeddings: list = None, token_budget: int = 3500):
     """
-    Enhanced retrieval:
-      ✅ Handles large / multi-part questions
-      ✅ Dynamically adjusts similarity threshold
-      ✅ Expands context until token budget is reached
-      ✅ Keeps neighbor fill for continuity
     """
     if not index or not chunks:
@@ -125,96 +111,54 @@ def retrieve_chunks(query: str, index, chunks: list, top_k: int = 5,
         return []
     try:
-        # 🔹 Step 0 — Split into sub-queries
-        sub_queries = _split_query(query)
-        dynamic_min_sim = max(0.45, min(0.6, 0.6 - 0.02 * len(sub_queries)))
-        print(f"🧩 Sub-queries: {sub_queries} | Dynamic min_similarity={dynamic_min_sim:.2f}")
-        # 🔹 Step 1 — Embed all sub-queries and gather candidate indices
-        all_candidates = set()
-        for sub_q in sub_queries:
-            q_emb = _query_model.encode(
-                [f"query: {sub_q.strip()}"],
-                convert_to_numpy=True,
-                normalize_embeddings=True
-            )[0]
-            # ✅ Auto-heal FAISS index dimension mismatch
-            if hasattr(index, "d") and q_emb.shape[0] != index.d:
-                print(f"⚠️ FAISS index dimension mismatch: index={index.d}, query={q_emb.shape[0]}")
-                if embeddings:
-                    print("🔄 Rebuilding FAISS index to match embedding dimensions...")
-                    index = build_faiss_index(embeddings)
-                    print("✅ FAISS index successfully rebuilt.")
-                    q_emb = _query_model.encode(
-                        [f"query: {sub_q.strip()}"],
-                        convert_to_numpy=True,
-                        normalize_embeddings=True
-                    )[0]
-                else:
-                    print("❌ No embeddings available to rebuild FAISS index.")
-                    continue
-            # Initial retrieval for each sub-query
-            num_candidates = max(top_k * candidate_multiplier, top_k + 2)
-            distances, indices = index.search(np.array([q_emb]).astype("float32"), num_candidates)
-            all_candidates.update([int(i) for i in indices[0] if i >= 0])
-        if not all_candidates:
-            print("⚠️ No retrieval candidates found.")
-            return []
-        candidate_indices = list(all_candidates)
-        # 🔹 Step 2 — Re-rank by cosine similarity
-        q_emb_global = _query_model.encode(
             [f"query: {query.strip()}"],
             convert_to_numpy=True,
             normalize_embeddings=True
         )[0]
         doc_embs = _query_model.encode(
             [f"passage: {chunks[i]}" for i in candidate_indices],
             convert_to_numpy=True,
             normalize_embeddings=True,
         )
-        sims = cosine_similarity([q_emb_global], doc_embs)[0]
         ranked = sorted(zip(candidate_indices, sims), key=lambda x: x[1], reverse=True)
-        # 🔹 Step 3 — Dynamic filtering
-        filtered = [idx for idx, sim in ranked if sim >= dynamic_min_sim]
-        if not filtered:
-            filtered = [idx for idx, _ in ranked[:top_k]]
-        # 🔹 Step 4 — Neighbor fill for continuity
-        if len(filtered) < top_k:
-            expanded = set(filtered)
-            for idx in filtered:
-                for neighbor in [idx - 1, idx + 1]:
-                    if 0 <= neighbor < len(chunks):
-                        expanded.add(neighbor)
-                        if len(expanded) >= top_k:
-                            break
-                if len(expanded) >= top_k:
-                    break
-            filtered = sorted(expanded)
-        # 🔹 Step 5 — Context expansion (token-budget-aware)
-        context_limit = token_budget  # approx. by word count
-        context_accum, current_len = [], 0
-        for idx, sim in ranked:
-            if idx not in filtered:
-                filtered.append(idx)
-            chunk_len = len(chunks[idx].split())
-            if current_len + chunk_len > context_limit:
-                break
-            context_accum.append(idx)
-            current_len += chunk_len
-        filtered = sorted(set(context_accum or filtered))[: max(top_k, len(filtered))]
-        # 🔹 Step 6 — Final context prep
         final_chunks = [chunks[i] for i in filtered]
-        print(f"✅ Retrieved {len(final_chunks)} chunks (multi-span + adaptive threshold).")
         return final_chunks
     except Exception as e:
@@ -234,7 +178,6 @@ def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = F
     if chat_llm is None:
         return "⚠️ GPT-4o not initialized. Check credentials or rebuild the Space."
-    # Combine chunks with markers
     context = "\n".join(f"[Chunk {i+1}] {chunk.strip()}" for i, chunk in enumerate(retrieved_chunks))
     prompt = (REASONING_PROMPT if reasoning_mode else STRICT_PROMPT).format(context=context, query=query)
@@ -243,7 +186,8 @@ def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = F
             "role": "system",
             "content": (
                 "You are an expert enterprise documentation assistant. "
-                "Answer only using provided context; if reasoning_mode is on, explain briefly. "
                 "If answer not in document, say exactly: "
                 "'I don't know based on the provided document.'"
             ),
@@ -258,12 +202,11 @@ def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = F
         print(f"⚠️ GPT-4o generation failed: {e}")
         return "⚠️ Error: Could not generate an answer."
 # ==========================================================
 # 7️⃣ Local Test
 # ==========================================================
 if __name__ == "__main__":
-    from vectorstore import build_faiss_index
     dummy_chunks = [
         "Step 1: Open the dashboard and navigate to reports.",
         "Step 2: Click 'Export' to download a CSV summary.",
@@ -279,4 +222,4 @@ if __name__ == "__main__":
     query = "How do I create a communication user?"
     retrieved = retrieve_chunks(query, index, dummy_chunks)
     print("🔍 Retrieved:", retrieved)
-    print("💬 Answer:", generate_answer(query, retrieved, reasoning_mode=True))

 from sklearn.metrics.pairwise import cosine_similarity
 from gen_ai_hub.proxy.core.proxy_clients import get_proxy_client
 from gen_ai_hub.proxy.langchain.openai import ChatOpenAI
+from vectorstore import build_faiss_index
 print("✅ qa.py (GPT-4o via Gen AI Hub + ReRank) loaded from:", __file__)
 # ==========================================================
 try:
     _query_model = SentenceTransformer(
+        "intfloat/e5-small-v2",
         cache_folder=CACHE_DIR
     )
     print("✅ Loaded embedding model: intfloat/e5-small-v2 (fast mode)")
 # ==========================================================
 STRICT_PROMPT = (
     "You are an enterprise documentation assistant.\n"
+    "Use all relevant information from the CONTEXT below.\n"
+    "If multiple related points appear across chunks, combine them logically into one clear answer.\n"
+    "Do not invent facts outside the provided content.\n"
+    "If the answer cannot be found even after considering all chunks, say exactly:\n"
     "'I don't know based on the provided document.'\n\n"
     "Context:\n{context}\n\nQuestion: {query}\nAnswer:"
 )
     "Context:\n{context}\n\nQuestion: {query}\nLet's reason step-by-step:\nAnswer:"
 )
 # ==========================================================
+# 5️⃣ Retrieval — FAISS + Re-rank + Neighbor Fill
 # ==========================================================
 def retrieve_chunks(query: str, index, chunks: list, top_k: int = 5,
                     min_similarity: float = 0.6, candidate_multiplier: int = 3,
+                    embeddings: list = None):
     """
+    Re-rank and optionally fill with neighbors for context continuity.
+    Auto-detects and rebuilds FAISS index if dimension mismatch occurs.
     """
     if not index or not chunks:
         return []
     try:
+        # Encode query embedding
+        q_emb = _query_model.encode(
             [f"query: {query.strip()}"],
             convert_to_numpy=True,
             normalize_embeddings=True
         )[0]
+        # ✅ Check dimension match
+        if hasattr(index, "d") and q_emb.shape[0] != index.d:
+            print(f"⚠️ FAISS index dimension mismatch: index={index.d}, query={q_emb.shape[0]}")
+            if embeddings:
+                print("🔄 Rebuilding FAISS index to match embedding dimensions...")
+                index = build_faiss_index(embeddings)
+                q_emb = _query_model.encode([f"query: {query.strip()}"], convert_to_numpy=True, normalize_embeddings=True)[0]
+            else:
+                return []
+        # Step 1️⃣ — Initial FAISS retrieval
+        num_candidates = max(top_k * candidate_multiplier, top_k + 2)
+        distances, indices = index.search(np.array([q_emb]).astype("float32"), num_candidates)
+        candidate_indices = [int(i) for i in indices[0] if i >= 0]
+        candidate_indices = list(dict.fromkeys(candidate_indices))
+        # Step 2️⃣ — Re-rank by cosine similarity
         doc_embs = _query_model.encode(
             [f"passage: {chunks[i]}" for i in candidate_indices],
             convert_to_numpy=True,
             normalize_embeddings=True,
         )
+        sims = cosine_similarity([q_emb], doc_embs)[0]
         ranked = sorted(zip(candidate_indices, sims), key=lambda x: x[1], reverse=True)
+        # Step 3️⃣ — Filter by similarity
+        filtered = [idx for idx, sim in ranked if sim >= min_similarity]
+        if len(filtered) > top_k:
+            filtered = filtered[:top_k]
+        # Step 4️⃣ — Include ±1 neighbors for continuity
+        neighbors = set()
+        for idx in filtered:
+            for n in [idx - 1, idx + 1]:
+                if 0 <= n < len(chunks):
+                    neighbors.add(n)
+        filtered = sorted(set(filtered) | neighbors)
+        # Step 5️⃣ — Build final chunk list
         final_chunks = [chunks[i] for i in filtered]
+        print(f"✅ Retrieved {len(final_chunks)} chunks (semantic + neighbor fill).")
         return final_chunks
     except Exception as e:
     if chat_llm is None:
         return "⚠️ GPT-4o not initialized. Check credentials or rebuild the Space."
     context = "\n".join(f"[Chunk {i+1}] {chunk.strip()}" for i, chunk in enumerate(retrieved_chunks))
     prompt = (REASONING_PROMPT if reasoning_mode else STRICT_PROMPT).format(context=context, query=query)
             "role": "system",
             "content": (
                 "You are an expert enterprise documentation assistant. "
+                "When reasoning_mode is off, stay strictly factual and concise. "
+                "When on, combine insights across chunks logically. "
                 "If answer not in document, say exactly: "
                 "'I don't know based on the provided document.'"
             ),
         print(f"⚠️ GPT-4o generation failed: {e}")
         return "⚠️ Error: Could not generate an answer."
 # ==========================================================
 # 7️⃣ Local Test
 # ==========================================================
 if __name__ == "__main__":
     dummy_chunks = [
         "Step 1: Open the dashboard and navigate to reports.",
         "Step 2: Click 'Export' to download a CSV summary.",
     query = "How do I create a communication user?"
     retrieved = retrieve_chunks(query, index, dummy_chunks)
     print("🔍 Retrieved:", retrieved)
+    print("💬 Answer:", generate_answer(query, retrieved, reasoning_mode=False))