Spaces:

Shubham170793
/

enterprise-knowledge-assistant

Running

App Files Files Community

Shubham170793 commited on Oct 16

Commit

d5f56bf

verified ·

1 Parent(s): 8db2f50

Update src/qa.py

Browse files

Files changed (1) hide show

src/qa.py +55 -85

src/qa.py CHANGED Viewed

@@ -1,29 +1,23 @@
 """
-qa.py — Phi-2 FAST + ReRank (with FULL Reasoning Mode)
--------------------------------------------------------
-✅ Semantic retrieval (FAISS + cosine re-rank + neighbor-fill)
-✅ Smart factual mode
 ✅ Deep reasoning mode (ChatGPT-like)
 """
 import os
 import numpy as np
 from sentence_transformers import SentenceTransformer
 from sklearn.metrics.pairwise import cosine_similarity
-from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
-import torch
-print("✅ qa.py (Phi-2 FAST + ReRank + Full Reasoning) loaded from:", __file__)
-api_key = os.getenv("OPENAI_API_KEY")
-if not api_key:
-    print("❌ OPENAI_API_KEY not found in environment!")
-else:
-    print("✅ OPENAI_API_KEY loaded successfully (length:", len(api_key), ")")
 # ==========================================================
-# 1️⃣ Cache Setup
 # ==========================================================
 CACHE_DIR = "/tmp/hf_cache"
 os.makedirs(CACHE_DIR, exist_ok=True)
@@ -35,7 +29,7 @@ os.environ.update({
 })
 # ==========================================================
-# 2️⃣ Embedding Model
 # ==========================================================
 try:
     _query_model = SentenceTransformer("intfloat/e5-small-v2", cache_folder=CACHE_DIR)
@@ -45,15 +39,9 @@ except Exception as e:
     _query_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=CACHE_DIR)
 # ==========================================================
-# 3️⃣ GPT-4o Model Setup (SAP Gen AI Hub)
 # ==========================================================
-import json, os
-from gen_ai_hub.proxy.core.proxy_clients import get_proxy_client
-from gen_ai_hub.proxy.langchain.openai import ChatOpenAI
 print("✅ Loading GPT-4o via SAP Gen AI Hub...")
-# Load JSON credentials
 CRED_PATH = os.path.join(os.path.dirname(__file__), "irpa-r1208-hands-on-exercises-sk.json")
 try:
@@ -75,40 +63,37 @@ try:
         temperature=0.3,
         max_tokens=800
     )
     print("✅ GPT-4o (via Gen AI Hub) ready for generation.")
 except Exception as e:
     print(f"⚠️ Gen AI Hub setup failed: {e}")
     chat_llm = None
 # ==========================================================
-# 4️⃣ Prompts
 # ==========================================================
 STRICT_PROMPT = (
     "You are an enterprise documentation assistant.\n"
-    "Use ONLY the CONTEXT below to answer the QUESTION clearly and factually.\n"
-    "If the answer isn’t in the document, reply exactly:\n"
     "'I don't know based on the provided document.'\n\n"
     "Context:\n{context}\n\nQuestion: {query}\nAnswer:"
 )
 REASONING_PROMPT = (
-    "You are an expert enterprise assistant capable of deep reasoning.\n"
-    "Think step by step before answering. Use the CONTEXT below first, but also apply your world knowledge logically.\n"
-    "Explain your reasoning concisely if it helps clarity.\n"
-    "Avoid hallucination — if the document does not include the answer, say:\n"
     "'I don't know based on the provided document.'\n\n"
-    "Context:\n{context}\n\nQuestion: {query}\nLet's reason this out carefully:\nAnswer:"
 )
 # ==========================================================
-# 5️⃣ Retrieval — FAISS + Re-rank + Neighbor Fill
 # ==========================================================
 def retrieve_chunks(query: str, index, chunks: list, top_k: int = 5,
                     min_similarity: float = 0.6, candidate_multiplier: int = 3):
-    """Re-rank and optionally fill with neighbors for context continuity."""
     if not index or not chunks:
         return []
@@ -117,11 +102,11 @@ def retrieve_chunks(query: str, index, chunks: list, top_k: int = 5,
             [f"query: {query.strip()}"], convert_to_numpy=True, normalize_embeddings=True
         )[0]
-        # Initial FAISS search
         distances, indices = index.search(np.array([q_emb]).astype("float32"), top_k * candidate_multiplier)
-        candidate_indices = list(dict.fromkeys(indices[0]))  # dedup
-        # Re-rank by cosine similarity
         doc_embs = _query_model.encode(
             [f"passage: {chunks[i]}" for i in candidate_indices],
             convert_to_numpy=True,
@@ -130,82 +115,67 @@ def retrieve_chunks(query: str, index, chunks: list, top_k: int = 5,
         sims = cosine_similarity([q_emb], doc_embs)[0]
         ranked = sorted(zip(candidate_indices, sims), key=lambda x: x[1], reverse=True)
-        # Filter by min_similarity
-        filtered = [idx for idx, sim in ranked if sim >= min_similarity]
-        if len(filtered) > top_k:
-            filtered = filtered[:top_k]
-        # Neighbor fill if needed
         if len(filtered) < top_k:
             expanded = set(filtered)
             for idx in filtered:
-                for neighbor in [idx - 1, idx + 1]:
-                    if 0 <= neighbor < len(chunks):
-                        expanded.add(neighbor)
                         if len(expanded) >= top_k:
                             break
                 if len(expanded) >= top_k:
                     break
             filtered = sorted(expanded)[:top_k]
-        return [chunks[i] for i in filtered]
     except Exception as e:
         print(f"⚠️ Retrieval error: {e}")
         return []
 # ==========================================================
-# 6️⃣ Answer Generation (GPT-4o with Full Reasoning)
 # ==========================================================
-from openai import OpenAI
-client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
-MODEL_NAME = "gpt-4o"
 def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = False):
     """
-    Generates answers using GPT-4o.
-    - reasoning_mode=False → strict factual mode (fast)
-    - reasoning_mode=True  → reasoning-rich mode (longer, more explanatory)
     """
     if not retrieved_chunks:
         return "Sorry, I couldn’t find relevant information in the document."
-    # Format context with chunk tags
     context = "\n".join(f"[Chunk {i+1}] {chunk.strip()}" for i, chunk in enumerate(retrieved_chunks))
-    prompt = (REASONING_PROMPT if reasoning_mode else STRICT_PROMPT).format(
-        context=context, query=query
-    )
     try:
-        response = client.chat.completions.create(
-            model=MODEL_NAME,
-            messages=[
-                {
-                    "role": "system",
-                    "content": (
-                        "You are an expert enterprise documentation assistant. "
-                        "Answer questions precisely using the provided context. "
-                        "If reasoning_mode is enabled, provide deeper explanations and step-by-step logic. "
-                        "If the document lacks information, respond exactly: "
-                        "'I don't know based on the provided document.'"
-                    ),
-                },
-                {"role": "user", "content": prompt},
-            ],
-            temperature=0.6 if reasoning_mode else 0.2,
-            max_tokens=600 if reasoning_mode else 350,
-            top_p=0.95,
-        )
-        text = response.choices[0].message.content.strip()
-        return text
     except Exception as e:
         print(f"⚠️ GPT-4o generation failed: {e}")
         return "⚠️ Error: Could not generate an answer."
 # ==========================================================
 # 7️⃣ Local Test
 # ==========================================================

 """
+qa.py — GPT-4o (SAP Gen AI Hub) + ReRank Retrieval
+--------------------------------------------------
+✅ Semantic retrieval (FAISS + cosine re-rank + neighbor fill)
+✅ Smart factual mode (fast)
 ✅ Deep reasoning mode (ChatGPT-like)
 """
 import os
+import json
 import numpy as np
 from sentence_transformers import SentenceTransformer
 from sklearn.metrics.pairwise import cosine_similarity
+from gen_ai_hub.proxy.core.proxy_clients import get_proxy_client
+from gen_ai_hub.proxy.langchain.openai import ChatOpenAI
+print("✅ qa.py (GPT-4o via Gen AI Hub + ReRank) loaded from:", __file__)
 # ==========================================================
+# 1️⃣ Hugging Face Cache
 # ==========================================================
 CACHE_DIR = "/tmp/hf_cache"
 os.makedirs(CACHE_DIR, exist_ok=True)
 })
 # ==========================================================
+# 2️⃣ Embedding Model (E5-small-v2)
 # ==========================================================
 try:
     _query_model = SentenceTransformer("intfloat/e5-small-v2", cache_folder=CACHE_DIR)
     _query_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=CACHE_DIR)
 # ==========================================================
+# 3️⃣ GPT-4o via SAP Gen AI Hub
 # ==========================================================
 print("✅ Loading GPT-4o via SAP Gen AI Hub...")
 CRED_PATH = os.path.join(os.path.dirname(__file__), "irpa-r1208-hands-on-exercises-sk.json")
 try:
         temperature=0.3,
         max_tokens=800
     )
     print("✅ GPT-4o (via Gen AI Hub) ready for generation.")
 except Exception as e:
     print(f"⚠️ Gen AI Hub setup failed: {e}")
     chat_llm = None
 # ==========================================================
+# 4️⃣ Prompt Templates
 # ==========================================================
 STRICT_PROMPT = (
     "You are an enterprise documentation assistant.\n"
+    "Answer clearly and factually using ONLY the CONTEXT below.\n"
+    "If the answer is not in the document, reply exactly:\n"
     "'I don't know based on the provided document.'\n\n"
     "Context:\n{context}\n\nQuestion: {query}\nAnswer:"
 )
 REASONING_PROMPT = (
+    "You are an expert enterprise assistant capable of reasoning.\n"
+    "Think step by step. Base your answer primarily on the CONTEXT, "
+    "but apply logical inference only when necessary.\n"
+    "If the document lacks the answer, say exactly:\n"
     "'I don't know based on the provided document.'\n\n"
+    "Context:\n{context}\n\nQuestion: {query}\nLet's reason step-by-step:\nAnswer:"
 )
 # ==========================================================
+# 5️⃣ Retrieval — FAISS + Cosine Re-Rank + Neighbor Fill
 # ==========================================================
 def retrieve_chunks(query: str, index, chunks: list, top_k: int = 5,
                     min_similarity: float = 0.6, candidate_multiplier: int = 3):
+    """Select top chunks via FAISS, rerank by cosine similarity, fill gaps with neighbors."""
     if not index or not chunks:
         return []
             [f"query: {query.strip()}"], convert_to_numpy=True, normalize_embeddings=True
         )[0]
+        # 1️⃣ Initial FAISS search
         distances, indices = index.search(np.array([q_emb]).astype("float32"), top_k * candidate_multiplier)
+        candidate_indices = list(dict.fromkeys(indices[0]))  # dedup, preserve order
+        # 2️⃣ Compute true cosine similarity for rerank
         doc_embs = _query_model.encode(
             [f"passage: {chunks[i]}" for i in candidate_indices],
             convert_to_numpy=True,
         sims = cosine_similarity([q_emb], doc_embs)[0]
         ranked = sorted(zip(candidate_indices, sims), key=lambda x: x[1], reverse=True)
+        # 3️⃣ Keep only chunks meeting threshold
+        filtered = [idx for idx, sim in ranked if sim >= min_similarity][:top_k]
+        # 4️⃣ Neighbor fill if not enough
         if len(filtered) < top_k:
             expanded = set(filtered)
             for idx in filtered:
+                for nb in [idx - 1, idx + 1]:
+                    if 0 <= nb < len(chunks):
+                        expanded.add(nb)
                         if len(expanded) >= top_k:
                             break
                 if len(expanded) >= top_k:
                     break
             filtered = sorted(expanded)[:top_k]
+        final_chunks = [chunks[i] for i in filtered]
+        print(f"✅ Retrieved {len(final_chunks)} chunks (semantic + neighbor fill)")
+        return final_chunks
     except Exception as e:
         print(f"⚠️ Retrieval error: {e}")
         return []
 # ==========================================================
+# 6️⃣ Answer Generation — GPT-4o via Gen AI Hub
 # ==========================================================
 def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = False):
     """
+    reasoning_mode=False → strict factual mode (fast)
+    reasoning_mode=True  → deep reasoning mode (ChatGPT-like)
     """
     if not retrieved_chunks:
         return "Sorry, I couldn’t find relevant information in the document."
+    if chat_llm is None:
+        return "⚠️ GPT-4o not initialized. Check credentials or rebuild the Space."
+    # Combine chunks with markers
     context = "\n".join(f"[Chunk {i+1}] {chunk.strip()}" for i, chunk in enumerate(retrieved_chunks))
+    prompt = (REASONING_PROMPT if reasoning_mode else STRICT_PROMPT).format(context=context, query=query)
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                "You are an expert enterprise documentation assistant. "
+                "Answer only using provided context; if reasoning_mode is on, explain briefly. "
+                "If answer not in document, say exactly: "
+                "'I don't know based on the provided document.'"
+            ),
+        },
+        {"role": "user", "content": prompt},
+    ]
     try:
+        response = chat_llm.invoke(messages)
+        return response.content.strip()
     except Exception as e:
         print(f"⚠️ GPT-4o generation failed: {e}")
         return "⚠️ Error: Could not generate an answer."
 # ==========================================================
 # 7️⃣ Local Test
 # ==========================================================