Spaces:

Shubham170793
/

enterprise-knowledge-assistant

Sleeping

App Files Files Community

Shubham170793 commited on Oct 15

Commit

9466a37

verified ·

1 Parent(s): fbd4778

Update src/qa.py

Browse files

Files changed (1) hide show

src/qa.py +36 -44

src/qa.py CHANGED Viewed

@@ -1,9 +1,9 @@
 """
-qa.py — Phi-2 FAST + SMART RETRIEVAL (Stable)
----------------------------------------------
-• intfloat/e5-small-v2 — embeddings
-• microsoft/phi-2 — generation
-Optimized for: speed, factual accuracy, low hallucination
 """
 import os
@@ -13,10 +13,10 @@ from sklearn.metrics.pairwise import cosine_similarity
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
-print("✅ qa.py (Phi-2 FAST + Smart Retrieval) loaded from:", __file__)
 # ==========================================================
-# 1️⃣ Cache Setup (Hugging Face /tmp cache)
 # ==========================================================
 CACHE_DIR = "/tmp/hf_cache"
 os.makedirs(CACHE_DIR, exist_ok=True)
@@ -38,7 +38,7 @@ except Exception as e:
     _query_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=CACHE_DIR)
 # ==========================================================
-# 3️⃣ Phi-2 LLM Setup
 # ==========================================================
 MODEL_NAME = "microsoft/phi-2"
 print(f"✅ Loading LLM: {MODEL_NAME}")
@@ -61,63 +61,58 @@ _answer_model = pipeline(
 print("✅ Phi-2 text-generation pipeline ready (optimized).")
 # ==========================================================
-# 4️⃣ Prompt Templates
 # ==========================================================
 STRICT_PROMPT = (
     "You are an enterprise documentation assistant.\n"
-    "Use ONLY the CONTEXT below to answer the QUESTION.\n"
-    "If the answer isn’t present, reply exactly:\n"
     "'I don't know based on the provided document.'\n\n"
     "Context:\n{context}\n\nQuestion: {query}\nAnswer:"
 )
 REASONING_PROMPT = (
-    "You are an enterprise assistant with reasoning ability.\n"
-    "Think carefully, but use the document context first.\n"
-    "If you must infer, say so explicitly.\n"
-    "If answer not in the document, reply exactly:\n"
     "'I don't know based on the provided document.'\n\n"
-    "Context:\n{context}\n\nQuestion: {query}\nAnswer:"
 )
 # ==========================================================
-# 5️⃣ Smart Retrieval (Re-rank + Neighbor Fill)
 # ==========================================================
 def retrieve_chunks(query: str, index, chunks: list, top_k: int = 5,
                     min_similarity: float = 0.6, candidate_multiplier: int = 3):
-    """FAISS → Re-rank by cosine sim → Filter → Neighbor fill (only if needed)."""
     if not index or not chunks:
         return []
     try:
-        # 1️⃣ Encode query
         q_emb = _query_model.encode(
-            [f"query: {query.strip()}"],
-            convert_to_numpy=True,
-            normalize_embeddings=True
         )[0]
-        # 2️⃣ Initial FAISS retrieval (larger candidate pool)
-        num_candidates = top_k * candidate_multiplier
-        distances, indices = index.search(np.array([q_emb]).astype("float32"), num_candidates)
-        candidate_indices = list(dict.fromkeys(indices[0]))  # dedup, preserve order
-        # 3️⃣ Re-rank by cosine similarity
-        candidate_texts = [chunks[i] for i in candidate_indices]
         doc_embs = _query_model.encode(
-            [f"passage: {c}" for c in candidate_texts],
             convert_to_numpy=True,
-            normalize_embeddings=True
         )
         sims = cosine_similarity([q_emb], doc_embs)[0]
         ranked = sorted(zip(candidate_indices, sims), key=lambda x: x[1], reverse=True)
-        # 4️⃣ Filter low-similarity
         filtered = [idx for idx, sim in ranked if sim >= min_similarity]
         if len(filtered) > top_k:
             filtered = filtered[:top_k]
-        # 5️⃣ Neighbor fill (only if fewer than top_k)
         if len(filtered) < top_k:
             expanded = set(filtered)
             for idx in filtered:
@@ -130,7 +125,6 @@ def retrieve_chunks(query: str, index, chunks: list, top_k: int = 5,
                     break
             filtered = sorted(expanded)[:top_k]
-        print(f"✅ Retrieved {len(filtered)} chunks (top_k={top_k}, min_sim={min_similarity})")
         return [chunks[i] for i in filtered]
     except Exception as e:
@@ -138,25 +132,22 @@ def retrieve_chunks(query: str, index, chunks: list, top_k: int = 5,
         return []
 # ==========================================================
-# 6️⃣ Answer Generation
 # ==========================================================
 def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = False):
-    """Generate concise, factual or reasoning-based answers using Phi-2."""
     if not retrieved_chunks:
         return "Sorry, I couldn’t find relevant information in the document."
-    # Include [Chunk N] markers
     context = "\n".join(f"[Chunk {i+1}] {chunk.strip()}" for i, chunk in enumerate(retrieved_chunks))
-    prompt = (REASONING_PROMPT if reasoning_mode else STRICT_PROMPT).format(
-        context=context, query=query
-    )
     try:
         result = _answer_model(
             prompt,
-            max_new_tokens=180 if reasoning_mode else 140,
-            temperature=0.5 if reasoning_mode else 0.2,
             do_sample=reasoning_mode,
             pad_token_id=_tokenizer.eos_token_id,
             early_stopping=True,
@@ -166,12 +157,13 @@ def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = F
         if "Answer:" in text:
             text = text.split("Answer:")[-1].strip()
-        return text or "⚠️ No answer generated."
     except Exception as e:
         print(f"⚠️ Generation failed: {e}")
         return "⚠️ Error: Could not generate an answer."
 # ==========================================================
 # 7️⃣ Local Test
 # ==========================================================
@@ -193,4 +185,4 @@ if __name__ == "__main__":
     query = "How do I create a communication user?"
     retrieved = retrieve_chunks(query, index, dummy_chunks)
     print("🔍 Retrieved:", retrieved)
-    print("💬 Answer:", generate_answer(query, retrieved))

 """
+qa.py — Phi-2 FAST + ReRank (with FULL Reasoning Mode)
+-------------------------------------------------------
+✅ Semantic retrieval (FAISS + cosine re-rank + neighbor-fill)
+✅ Smart factual mode
+✅ Deep reasoning mode (ChatGPT-like)
 """
 import os
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
+print("✅ qa.py (Phi-2 FAST + ReRank + Full Reasoning) loaded from:", __file__)
 # ==========================================================
+# 1️⃣ Cache Setup
 # ==========================================================
 CACHE_DIR = "/tmp/hf_cache"
 os.makedirs(CACHE_DIR, exist_ok=True)
     _query_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=CACHE_DIR)
 # ==========================================================
+# 3️⃣ Phi-2 Model Setup
 # ==========================================================
 MODEL_NAME = "microsoft/phi-2"
 print(f"✅ Loading LLM: {MODEL_NAME}")
 print("✅ Phi-2 text-generation pipeline ready (optimized).")
 # ==========================================================
+# 4️⃣ Prompts
 # ==========================================================
 STRICT_PROMPT = (
     "You are an enterprise documentation assistant.\n"
+    "Use ONLY the CONTEXT below to answer the QUESTION clearly and factually.\n"
+    "If the answer isn’t in the document, reply exactly:\n"
     "'I don't know based on the provided document.'\n\n"
     "Context:\n{context}\n\nQuestion: {query}\nAnswer:"
 )
 REASONING_PROMPT = (
+    "You are an expert enterprise assistant capable of deep reasoning.\n"
+    "Think step by step before answering. Use the CONTEXT below first, but also apply your world knowledge logically.\n"
+    "Explain your reasoning concisely if it helps clarity.\n"
+    "Avoid hallucination — if the document does not include the answer, say:\n"
     "'I don't know based on the provided document.'\n\n"
+    "Context:\n{context}\n\nQuestion: {query}\nLet's reason this out carefully:\nAnswer:"
 )
 # ==========================================================
+# 5️⃣ Retrieval — FAISS + Re-rank + Neighbor Fill
 # ==========================================================
 def retrieve_chunks(query: str, index, chunks: list, top_k: int = 5,
                     min_similarity: float = 0.6, candidate_multiplier: int = 3):
+    """Re-rank and optionally fill with neighbors for context continuity."""
     if not index or not chunks:
         return []
     try:
         q_emb = _query_model.encode(
+            [f"query: {query.strip()}"], convert_to_numpy=True, normalize_embeddings=True
         )[0]
+        # Initial FAISS search
+        distances, indices = index.search(np.array([q_emb]).astype("float32"), top_k * candidate_multiplier)
+        candidate_indices = list(dict.fromkeys(indices[0]))  # dedup
+        # Re-rank by cosine similarity
         doc_embs = _query_model.encode(
+            [f"passage: {chunks[i]}" for i in candidate_indices],
             convert_to_numpy=True,
+            normalize_embeddings=True,
         )
         sims = cosine_similarity([q_emb], doc_embs)[0]
         ranked = sorted(zip(candidate_indices, sims), key=lambda x: x[1], reverse=True)
+        # Filter by min_similarity
         filtered = [idx for idx, sim in ranked if sim >= min_similarity]
         if len(filtered) > top_k:
             filtered = filtered[:top_k]
+        # Neighbor fill if needed
         if len(filtered) < top_k:
             expanded = set(filtered)
             for idx in filtered:
                     break
             filtered = sorted(expanded)[:top_k]
         return [chunks[i] for i in filtered]
     except Exception as e:
         return []
 # ==========================================================
+# 6️⃣ Answer Generation (Restored Full Reasoning)
 # ==========================================================
 def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = False):
+    """Generate detailed, human-like reasoning when enabled."""
     if not retrieved_chunks:
         return "Sorry, I couldn’t find relevant information in the document."
     context = "\n".join(f"[Chunk {i+1}] {chunk.strip()}" for i, chunk in enumerate(retrieved_chunks))
+    prompt = (REASONING_PROMPT if reasoning_mode else STRICT_PROMPT).format(context=context, query=query)
     try:
         result = _answer_model(
             prompt,
+            max_new_tokens=260 if reasoning_mode else 140,
+            temperature=0.7 if reasoning_mode else 0.2,
+            top_p=0.95 if reasoning_mode else 1.0,
             do_sample=reasoning_mode,
             pad_token_id=_tokenizer.eos_token_id,
             early_stopping=True,
         if "Answer:" in text:
             text = text.split("Answer:")[-1].strip()
+        return text
     except Exception as e:
         print(f"⚠️ Generation failed: {e}")
         return "⚠️ Error: Could not generate an answer."
 # ==========================================================
 # 7️⃣ Local Test
 # ==========================================================
     query = "How do I create a communication user?"
     retrieved = retrieve_chunks(query, index, dummy_chunks)
     print("🔍 Retrieved:", retrieved)
+    print("💬 Answer:", generate_answer(query, retrieved, reasoning_mode=True))