Spaces:

Shubham170793
/

enterprise-knowledge-assistant

Sleeping

App Files Files Community

Shubham170793 commited on Oct 20

Commit

e727c6a

verified ·

1 Parent(s): 7b7e367

Update src/qa.py

Browse files

Files changed (1) hide show

src/qa.py +26 -37

src/qa.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-qa.py — GPT-4o (SAP Gen AI Hub) + ReRank Retrieval + PRF Query Expansion
 --------------------------------------------------
 ✅ Semantic retrieval (FAISS + cosine re-rank + neighbor fill)
 ✅ Bullet-aware similarity boost for procedural chunks
@@ -7,7 +7,7 @@ qa.py — GPT-4o (SAP Gen AI Hub) + ReRank Retrieval + PRF Query Expansion
 ✅ Smart factual mode (fast)
 ✅ Deep reasoning mode (ChatGPT-like)
 ✅ genai_generate() helper for suggestions
-✅ NEW: Lightweight PRF query expansion to fix synonym-based retrieval misses
 """
 import os
@@ -16,13 +16,12 @@ import json
 import pickle
 import hashlib
 import numpy as np
-from collections import Counter
 from sentence_transformers import SentenceTransformer
 from sklearn.metrics.pairwise import cosine_similarity
 from gen_ai_hub.proxy.core.proxy_clients import get_proxy_client
 from gen_ai_hub.proxy.langchain.openai import ChatOpenAI
-print("✅ qa.py (GPT-4o via Gen AI Hub + Bullet-Aware Retrieval + PRF) loaded from:", __file__)
 # ==========================================================
 # 🧱 Permanent Embeddings Cache Directory
@@ -58,14 +57,17 @@ os.environ.update({
 # 2️⃣ Embedding Model (E5-small-v2)
 # ==========================================================
 try:
-    _query_model = SentenceTransformer("intfloat/e5-small-v2", cache_folder=CACHE_DIR)
     print("✅ Loaded embedding model: intfloat/e5-small-v2 (fast mode)")
 except Exception as e:
     print(f"⚠️ Embedding load failed ({e}), using MiniLM fallback")
     _query_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=CACHE_DIR)
 # ==========================================================
-# 3️⃣ GPT-4o via SAP Gen AI Hub — Lazy / On-demand initialization
 # ==========================================================
 CRED_PATH = os.path.join(os.path.dirname(__file__), "GEN AI HUB PROXY.json")
 _chat_llm = None
@@ -101,8 +103,9 @@ def get_chat_llm(model_name: str = "gpt-4o", temperature: float = 0.3, max_token
         _chat_llm = None
         raise
 # ==========================================================
-# 4️⃣ Embedding Generator (batch-optimized)
 # ==========================================================
 def embed_chunks(chunks, batch_size: int = 32):
     if not chunks:
@@ -162,8 +165,9 @@ def cache_embeddings(file_name: str, chunks, embed_func, chunk_size: int = None,
     _clean_old_caches(base_name, keep_latest=5)
     return embeddings
 # ==========================================================
-# 6️⃣ Prompt Templates
 # ==========================================================
 STRICT_PROMPT = (
     "You are an enterprise documentation assistant.\n"
@@ -172,8 +176,8 @@ STRICT_PROMPT = (
     "When the answer focuses on a single concept, definition, or explanation, write it as a clear and compact paragraph.\n"
     "Keep the tone professional and concise. Do not invent facts outside the provided content.\n"
     "Do not mention or refer to internal elements such as 'chunks', 'chunk numbers', 'passages', or 'sections of the document'.\n"
-    "If the answer cannot be found directly but there are partial clues, summarize those clues briefly starting with 'Based on the available information,'.\n"
-    "If nothing at all in the CONTEXT relates to the question, reply exactly:\n"
     "'I don't know based on the provided document.'\n\n"
     "Context:\n{context}\n\nQuestion: {query}\nAnswer:"
 )
@@ -183,36 +187,12 @@ REASONING_PROMPT = (
     "Think step by step and synthesize information even if scattered across chunks.\n"
     "Base your answer primarily on the CONTEXT, but if multiple partial clues exist, combine them logically.\n"
     "You may fill reasonable gaps with general knowledge to form a complete answer.\n"
-    "Do not mention or refer to internal elements such as 'chunks', 'chunk numbers', 'passages', or 'sections of the document'.\n"
     "If absolutely nothing in the document relates, say exactly:\n"
     "'I don't know based on the provided document.'\n\n"
     "Context:\n{context}\n\nQuestion: {query}\nLet's reason step-by-step:\nAnswer:"
 )
-# ==========================================================
-# 🔹 NEW: Lightweight PRF Query Expansion
-# ==========================================================
-def expand_query_embedding(query, model, index, chunks, topN=40, alpha=0.75):
-    """
-    Expands the query embedding slightly using top candidate chunks (PRF-style).
-    Helps when query wording differs from document phrasing.
-    """
-    try:
-        q_emb = model.encode([f"query: {query}"], convert_to_numpy=True, normalize_embeddings=True)[0]
-        D, I = index.search(np.array([q_emb]).astype("float32"), topN)
-        texts = " ".join(chunks[i] for i in I[0] if i >= 0)
-        words = re.findall(r"[A-Za-z]{4,}", texts)
-        common = [w for w, _ in Counter(words).most_common(6) if w.lower() not in query.lower()]
-        if not common:
-            return q_emb
-        e_emb = model.encode([f"passage: {' '.join(common)}"], convert_to_numpy=True, normalize_embeddings=True)[0]
-        combined = alpha * q_emb + (1 - alpha) * e_emb
-        combined /= np.linalg.norm(combined)
-        print(f"🔍 Query expanded with: {common}")
-        return combined
-    except Exception as e:
-        print(f"⚠️ Query expansion skipped due to error: {e}")
-        return q_emb
 # ==========================================================
 # 7️⃣ Retrieval — FAISS + Bullet-Aware Re-rank + Neighbor Fill
@@ -227,8 +207,11 @@ def retrieve_chunks(query: str, index, chunks: list, top_k: int = 7,
         return []
     try:
-        # --- PRF-enhanced query embedding
-        q_emb = expand_query_embedding(query, _query_model, index, chunks)
         if hasattr(index, "d") and q_emb.shape[0] != index.d:
             print(f"⚠️ FAISS dimension mismatch: index={index.d}, query={q_emb.shape[0]}")
@@ -248,6 +231,7 @@ def retrieve_chunks(query: str, index, chunks: list, top_k: int = 7,
             normalize_embeddings=True,
         )
         sims = cosine_similarity([q_emb], doc_embs)[0]
         boosted_sims = []
         for idx, sim in zip(candidate_indices, sims):
             text = chunks[idx].strip()
@@ -276,6 +260,7 @@ def retrieve_chunks(query: str, index, chunks: list, top_k: int = 7,
         print(f"⚠️ Retrieval error: {repr(e)}")
         return []
 # ==========================================================
 # 8️⃣ Answer Generation
 # ==========================================================
@@ -301,6 +286,7 @@ def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = F
             "'I don't know based on the provided document.'"},
         {"role": "user", "content": prompt},
     ]
     try:
         response = chat_llm_local.invoke(messages)
         return response.content.strip()
@@ -308,6 +294,7 @@ def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = F
         print(f"⚠️ GPT-4o generation failed: {e}")
         return "⚠️ Error: Could not generate an answer."
 # ==========================================================
 # 9️⃣ Generic Text Generation Helper
 # ==========================================================
@@ -329,6 +316,7 @@ def genai_generate(prompt: str) -> str:
         print(f"⚠️ genai_generate() failed: {e}")
         return "⚠️ Unable to generate response."
 # ==========================================================
 # 🔟 Local Test
 # ==========================================================
@@ -344,6 +332,7 @@ if __name__ == "__main__":
     embeddings = embed_chunks(dummy_chunks)
     index = build_faiss_index(embeddings)
     query = "What are the prerequisites for commerce automation?"
     retrieved = retrieve_chunks(query, index, dummy_chunks)
     print("🔍 Retrieved:", retrieved)

 """
+qa.py — GPT-4o (SAP Gen AI Hub) + ReRank Retrieval (Stable Strict)
 --------------------------------------------------
 ✅ Semantic retrieval (FAISS + cosine re-rank + neighbor fill)
 ✅ Bullet-aware similarity boost for procedural chunks
 ✅ Smart factual mode (fast)
 ✅ Deep reasoning mode (ChatGPT-like)
 ✅ genai_generate() helper for suggestions
+✅ Slightly softened Strict Prompt for better partial-context answers
 """
 import os
 import pickle
 import hashlib
 import numpy as np
 from sentence_transformers import SentenceTransformer
 from sklearn.metrics.pairwise import cosine_similarity
 from gen_ai_hub.proxy.core.proxy_clients import get_proxy_client
 from gen_ai_hub.proxy.langchain.openai import ChatOpenAI
+print("✅ qa.py (GPT-4o via Gen AI Hub + Bullet-Aware Retrieval + Cache) loaded from:", __file__)
 # ==========================================================
 # 🧱 Permanent Embeddings Cache Directory
 # 2️⃣ Embedding Model (E5-small-v2)
 # ==========================================================
 try:
+    _query_model = SentenceTransformer(
+        "intfloat/e5-small-v2",
+        cache_folder=CACHE_DIR
+    )
     print("✅ Loaded embedding model: intfloat/e5-small-v2 (fast mode)")
 except Exception as e:
     print(f"⚠️ Embedding load failed ({e}), using MiniLM fallback")
     _query_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=CACHE_DIR)
 # ==========================================================
+# 3️⃣ GPT-4o via SAP Gen AI Hub — Lazy Initialization
 # ==========================================================
 CRED_PATH = os.path.join(os.path.dirname(__file__), "GEN AI HUB PROXY.json")
 _chat_llm = None
         _chat_llm = None
         raise
 # ==========================================================
+# 4️⃣ Embedding Generator (Batch-Optimized)
 # ==========================================================
 def embed_chunks(chunks, batch_size: int = 32):
     if not chunks:
     _clean_old_caches(base_name, keep_latest=5)
     return embeddings
 # ==========================================================
+# 6️⃣ Prompt Templates (Improved Strict)
 # ==========================================================
 STRICT_PROMPT = (
     "You are an enterprise documentation assistant.\n"
     "When the answer focuses on a single concept, definition, or explanation, write it as a clear and compact paragraph.\n"
     "Keep the tone professional and concise. Do not invent facts outside the provided content.\n"
     "Do not mention or refer to internal elements such as 'chunks', 'chunk numbers', 'passages', or 'sections of the document'.\n"
+    "If only partial or indirect clues are available (e.g., related words, hints, or contextual evidence), infer cautiously and start the response with 'Based on the available information,'.\n"
+    "If absolutely nothing in the CONTEXT relates to the question, reply exactly:\n"
     "'I don't know based on the provided document.'\n\n"
     "Context:\n{context}\n\nQuestion: {query}\nAnswer:"
 )
     "Think step by step and synthesize information even if scattered across chunks.\n"
     "Base your answer primarily on the CONTEXT, but if multiple partial clues exist, combine them logically.\n"
     "You may fill reasonable gaps with general knowledge to form a complete answer.\n"
+    "Do not mention or refer to internal elements such as 'chunks', 'chunk numbers', or 'sections of the document'.\n"
     "If absolutely nothing in the document relates, say exactly:\n"
     "'I don't know based on the provided document.'\n\n"
     "Context:\n{context}\n\nQuestion: {query}\nLet's reason step-by-step:\nAnswer:"
 )
 # ==========================================================
 # 7️⃣ Retrieval — FAISS + Bullet-Aware Re-rank + Neighbor Fill
         return []
     try:
+        q_emb = _query_model.encode(
+            [f"query: {query.strip()}"],
+            convert_to_numpy=True,
+            normalize_embeddings=True
+        )[0]
         if hasattr(index, "d") and q_emb.shape[0] != index.d:
             print(f"⚠️ FAISS dimension mismatch: index={index.d}, query={q_emb.shape[0]}")
             normalize_embeddings=True,
         )
         sims = cosine_similarity([q_emb], doc_embs)[0]
         boosted_sims = []
         for idx, sim in zip(candidate_indices, sims):
             text = chunks[idx].strip()
         print(f"⚠️ Retrieval error: {repr(e)}")
         return []
 # ==========================================================
 # 8️⃣ Answer Generation
 # ==========================================================
             "'I don't know based on the provided document.'"},
         {"role": "user", "content": prompt},
     ]
     try:
         response = chat_llm_local.invoke(messages)
         return response.content.strip()
         print(f"⚠️ GPT-4o generation failed: {e}")
         return "⚠️ Error: Could not generate an answer."
 # ==========================================================
 # 9️⃣ Generic Text Generation Helper
 # ==========================================================
         print(f"⚠️ genai_generate() failed: {e}")
         return "⚠️ Unable to generate response."
 # ==========================================================
 # 🔟 Local Test
 # ==========================================================
     embeddings = embed_chunks(dummy_chunks)
     index = build_faiss_index(embeddings)
     query = "What are the prerequisites for commerce automation?"
     retrieved = retrieve_chunks(query, index, dummy_chunks)
     print("🔍 Retrieved:", retrieved)