Spaces:

Shubham170793
/

enterprise-knowledge-assistant

Running

App Files Files Community

Shubham170793 commited on Oct 21

Commit

f27542b

verified ·

1 Parent(s): 51344d2

Update src/qa.py

Browse files

Files changed (1) hide show

src/qa.py +24 -75

src/qa.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-qa.py — GPT-4o (SAP Gen AI Hub) + ReRank Retrieval (Stable Strict)
 --------------------------------------------------
 ✅ Semantic retrieval (FAISS + cosine re-rank + neighbor fill)
 ✅ Bullet-aware similarity boost for procedural chunks
@@ -7,7 +7,7 @@ qa.py — GPT-4o (SAP Gen AI Hub) + ReRank Retrieval (Stable Strict)
 ✅ Smart factual mode (fast)
 ✅ Deep reasoning mode (ChatGPT-like)
 ✅ genai_generate() helper for suggestions
-✅ Original Strict Prompt (safe + predictable)
 """
 import os
@@ -54,23 +54,18 @@ os.environ.update({
 })
 # ==========================================================
-# 2️⃣ Embedding Model (Multilingual E5 — supports Hindi + English)
 # ==========================================================
 try:
-    # 🆕 Switched to multilingual model (same 384-dim dimension, so FAISS stays compatible)
-    _query_model = SentenceTransformer(
-        "intfloat/multilingual-e5-small",
-        cache_folder=CACHE_DIR
-    )
-    print("✅ Loaded embedding model: intfloat/multilingual-e5-small (multilingual mode)")
 except Exception as e:
-    print(f"⚠️ Embedding load failed ({e}), attempting English-only fallback...")
     try:
-        _query_model = SentenceTransformer("intfloat/e5-small-v2", cache_folder=CACHE_DIR)
-        print("🔁 Fallback: intfloat/e5-small-v2 loaded successfully.")
-    except Exception:
         _query_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=CACHE_DIR)
-        print("🔁 Final fallback: all-MiniLM-L6-v2 loaded.")
 # ==========================================================
 # 3️⃣ GPT-4o via SAP Gen AI Hub — Lazy Initialization
@@ -109,7 +104,6 @@ def get_chat_llm(model_name: str = "gpt-4o", temperature: float = 0.3, max_token
         _chat_llm = None
         raise
 # ==========================================================
 # 4️⃣ Embedding Generator (Batch-Optimized)
 # ==========================================================
@@ -171,7 +165,6 @@ def cache_embeddings(file_name: str, chunks, embed_func, chunk_size: int = None,
     _clean_old_caches(base_name, keep_latest=5)
     return embeddings
 # ==========================================================
 # 6️⃣ Prompt Templates (Original Strict)
 # ==========================================================
@@ -181,9 +174,7 @@ STRICT_PROMPT = (
     "When multiple causes, steps, or key points are discussed, present them as short, well-structured bullet points.\n"
     "When the answer focuses on a single concept, definition, or explanation, write it as a clear and compact paragraph.\n"
     "Keep the tone professional and concise. Do not invent facts outside the provided content.\n"
-    "Do not mention or refer to internal elements such as 'chunks', 'chunk numbers', 'passages', or 'sections of the document'.\n"
-    "If the answer cannot be found directly but there are partial clues, summarize those clues briefly starting with 'Based on the available information,'.\n"
-    "If nothing at all in the CONTEXT relates to the question, reply exactly:\n"
     "'I don't know based on the provided document.'\n\n"
     "Context:\n{context}\n\nQuestion: {query}\nAnswer:"
 )
@@ -192,14 +183,11 @@ REASONING_PROMPT = (
     "You are an expert enterprise assistant capable of reasoning.\n"
     "Think step by step and synthesize information even if scattered across chunks.\n"
     "Base your answer primarily on the CONTEXT, but if multiple partial clues exist, combine them logically.\n"
-    "You may fill reasonable gaps with general knowledge to form a complete answer.\n"
-    "Do not mention or refer to internal elements such as 'chunks', 'chunk numbers', or 'sections of the document'.\n"
     "If absolutely nothing in the document relates, say exactly:\n"
     "'I don't know based on the provided document.'\n\n"
     "Context:\n{context}\n\nQuestion: {query}\nLet's reason step-by-step:\nAnswer:"
 )
 # ==========================================================
 # 7️⃣ Retrieval — FAISS + Bullet-Aware Re-rank + Neighbor Fill
 # ==========================================================
@@ -266,14 +254,12 @@ def retrieve_chunks(query: str, index, chunks: list, top_k: int = 7,
         print(f"⚠️ Retrieval error: {repr(e)}")
         return []
 # ==========================================================
-# 8️⃣ Answer Generation (Lazy GPT-4o Initialization + Language-Aware + Token-Safe)
 # ==========================================================
 def truncate_context(context_text: str, max_tokens: int = 100000, model: str = "gpt-4o") -> str:
     """
     Truncate context to stay safely within model limits (~128k tokens).
-    Keeps only the earliest tokens up to max_tokens.
     """
     try:
         import tiktoken
@@ -283,7 +269,6 @@ def truncate_context(context_text: str, max_tokens: int = 100000, model: str = "
             import tiktoken
             enc = tiktoken.get_encoding("cl100k_base")
         except Exception:
-            # crude fallback — approximate truncation
             return context_text[: max_tokens * 4]
     tokens = enc.encode(context_text)
@@ -293,72 +278,38 @@ def truncate_context(context_text: str, max_tokens: int = 100000, model: str = "
         return truncated
     return context_text
-def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = False, doc_lang: str = "en"):
     """
-    Generates an answer using GPT-4o (SAP Gen AI Hub proxy).
-    Now supports Hindi or English response formatting automatically,
-    with safe context truncation to prevent token overflow.
     """
     if not retrieved_chunks:
         return "Sorry, I couldn’t find relevant information in the document."
-    # Try lazy initialization
     try:
         chat_llm_local = get_chat_llm()
     except Exception:
         return "⚠️ GPT-4o not initialized. Check credentials or rebuild the Space."
-    # ----------------------------------------------------------
-    # 🧩 Build and clean context (deduplicate + truncate safely)
-    # ----------------------------------------------------------
     context = "\n".join(f"[Chunk {i+1}] {chunk.strip()}" for i, chunk in enumerate(retrieved_chunks))
-    # Remove duplicate lines to save tokens
     context = "\n".join(dict.fromkeys(context.splitlines()))
-    # Truncate to stay within GPT-4o 128k context limit
     context = truncate_context(context, 100000)
-    # ----------------------------------------------------------
-    # 🌐 Language-specific prompt logic
-    # ----------------------------------------------------------
-    if doc_lang == "hi":
-        # Hindi-language response
-        prompt = (
-            f"आप एक दस्तावेज़ सहायक हैं जो दिए गए अंशों के आधार पर सटीक उत्तर देता है। "
-            f"कृपया नीचे दिए गए संदर्भ का उपयोग करते हुए प्रश्न का उत्तर हिंदी में दें। "
-            f"यदि उत्तर स्पष्ट रूप से दस्तावेज़ में नहीं है, तो कहें — "
-            f"'मुझे इस दस्तावेज़ के आधार पर उत्तर ज्ञात नहीं है।'\n\n"
-            f"संदर्भ:\n{context}\n\nप्रश्न: {query}\nउत्तर:"
-        )
-    else:
-        # Default English prompts
-        prompt = (REASONING_PROMPT if reasoning_mode else STRICT_PROMPT).format(
-            context=context, query=query
-        )
-    # ----------------------------------------------------------
-    # 💬 System + user messages
-    # ----------------------------------------------------------
     messages = [
-        {
-            "role": "system",
-            "content": (
-                "You are an expert enterprise documentation assistant. "
-                "When reasoning_mode is off, stay strictly factual and concise. "
-                "When reasoning_mode is on, combine insights across chunks logically "
-                "and explain briefly. "
-                "If the answer is not in the document, reply exactly: "
-                "'I don't know based on the provided document.'"
-            ),
-        },
         {"role": "user", "content": prompt},
     ]
-    # ----------------------------------------------------------
-    # 🧠 Generate answer safely
-    # ----------------------------------------------------------
     try:
         response = chat_llm_local.invoke(messages)
         return response.content.strip()
@@ -366,7 +317,6 @@ def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = F
         print(f"⚠️ GPT-4o generation failed: {e}")
         return "⚠️ Error: Could not generate an answer."
 # ==========================================================
 # 9️⃣ Generic Text Generation Helper
 # ==========================================================
@@ -388,7 +338,6 @@ def genai_generate(prompt: str) -> str:
         print(f"⚠️ genai_generate() failed: {e}")
         return "⚠️ Unable to generate response."
 # ==========================================================
 # 🔟 Local Test
 # ==========================================================

 """
+qa.py — GPT-4o (SAP Gen AI Hub) + ReRank Retrieval (Stable Strict, English Only)
 --------------------------------------------------
 ✅ Semantic retrieval (FAISS + cosine re-rank + neighbor fill)
 ✅ Bullet-aware similarity boost for procedural chunks
 ✅ Smart factual mode (fast)
 ✅ Deep reasoning mode (ChatGPT-like)
 ✅ genai_generate() helper for suggestions
+✅ Token-safe truncation (prevents 128k overflow)
 """
 import os
 })
 # ==========================================================
+# 2️⃣ Embedding Model (English Only)
 # ==========================================================
 try:
+    _query_model = SentenceTransformer("intfloat/e5-small-v2", cache_folder=CACHE_DIR)
+    print("✅ Loaded embedding model: intfloat/e5-small-v2 (English mode)")
 except Exception as e:
+    print(f"⚠️ Embedding load failed ({e}), attempting fallback...")
     try:
         _query_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=CACHE_DIR)
+        print("🔁 Fallback: all-MiniLM-L6-v2 loaded successfully.")
+    except Exception as e2:
+        raise RuntimeError(f"❌ Could not load any embedding model: {e2}")
 # ==========================================================
 # 3️⃣ GPT-4o via SAP Gen AI Hub — Lazy Initialization
         _chat_llm = None
         raise
 # ==========================================================
 # 4️⃣ Embedding Generator (Batch-Optimized)
 # ==========================================================
     _clean_old_caches(base_name, keep_latest=5)
     return embeddings
 # ==========================================================
 # 6️⃣ Prompt Templates (Original Strict)
 # ==========================================================
     "When multiple causes, steps, or key points are discussed, present them as short, well-structured bullet points.\n"
     "When the answer focuses on a single concept, definition, or explanation, write it as a clear and compact paragraph.\n"
     "Keep the tone professional and concise. Do not invent facts outside the provided content.\n"
+    "If nothing in the CONTEXT relates to the question, reply exactly:\n"
     "'I don't know based on the provided document.'\n\n"
     "Context:\n{context}\n\nQuestion: {query}\nAnswer:"
 )
     "You are an expert enterprise assistant capable of reasoning.\n"
     "Think step by step and synthesize information even if scattered across chunks.\n"
     "Base your answer primarily on the CONTEXT, but if multiple partial clues exist, combine them logically.\n"
     "If absolutely nothing in the document relates, say exactly:\n"
     "'I don't know based on the provided document.'\n\n"
     "Context:\n{context}\n\nQuestion: {query}\nLet's reason step-by-step:\nAnswer:"
 )
 # ==========================================================
 # 7️⃣ Retrieval — FAISS + Bullet-Aware Re-rank + Neighbor Fill
 # ==========================================================
         print(f"⚠️ Retrieval error: {repr(e)}")
         return []
 # ==========================================================
+# 8️⃣ Answer Generation (English Only + Token-Safe)
 # ==========================================================
 def truncate_context(context_text: str, max_tokens: int = 100000, model: str = "gpt-4o") -> str:
     """
     Truncate context to stay safely within model limits (~128k tokens).
     """
     try:
         import tiktoken
             import tiktoken
             enc = tiktoken.get_encoding("cl100k_base")
         except Exception:
             return context_text[: max_tokens * 4]
     tokens = enc.encode(context_text)
         return truncated
     return context_text
+def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = False):
     """
+    Generates an English answer using GPT-4o (SAP Gen AI Hub proxy).
     """
     if not retrieved_chunks:
         return "Sorry, I couldn’t find relevant information in the document."
     try:
         chat_llm_local = get_chat_llm()
     except Exception:
         return "⚠️ GPT-4o not initialized. Check credentials or rebuild the Space."
+    # Build and clean context
     context = "\n".join(f"[Chunk {i+1}] {chunk.strip()}" for i, chunk in enumerate(retrieved_chunks))
     context = "\n".join(dict.fromkeys(context.splitlines()))
     context = truncate_context(context, 100000)
+    prompt = (REASONING_PROMPT if reasoning_mode else STRICT_PROMPT).format(
+        context=context, query=query
+    )
     messages = [
+        {"role": "system", "content": (
+            "You are an expert enterprise documentation assistant. "
+            "When reasoning_mode is off, stay strictly factual and concise. "
+            "When reasoning_mode is on, combine insights across chunks logically. "
+            "If the answer is not in the document, reply exactly: "
+            "'I don't know based on the provided document.'"
+        )},
         {"role": "user", "content": prompt},
     ]
     try:
         response = chat_llm_local.invoke(messages)
         return response.content.strip()
         print(f"⚠️ GPT-4o generation failed: {e}")
         return "⚠️ Error: Could not generate an answer."
 # ==========================================================
 # 9️⃣ Generic Text Generation Helper
 # ==========================================================
         print(f"⚠️ genai_generate() failed: {e}")
         return "⚠️ Unable to generate response."
 # ==========================================================
 # 🔟 Local Test
 # ==========================================================