Spaces:

Shubham170793
/

enterprise-knowledge-assistant

Running

App Files Files Community

Shubham170793 commited on Oct 7

Commit

c7133f4

verified ·

1 Parent(s): 5491531

Update src/qa.py

Browse files

Files changed (1) hide show

src/qa.py +72 -46

src/qa.py CHANGED Viewed

@@ -3,8 +3,8 @@ qa.py — Retrieval + Generation Layer
 -------------------------------------
 Handles:
 • Query embedding (SentenceTransformer / E5-compatible)
-• Chunk retrieval (FAISS + cosine re-ranking)
-• Answer generation (OpenAI GPT-4o-mini or FLAN-T5 fallback)
 Optimized for Hugging Face Spaces & Streamlit.
 """
@@ -14,23 +14,10 @@ from sentence_transformers import SentenceTransformer
 from sklearn.metrics.pairwise import cosine_similarity
 from vectorstore import search_faiss
-# ==========================================================
-# 1️⃣ Load OpenAI if key available
-# ==========================================================
-USE_OPENAI = bool(os.getenv("OPENAI_API_KEY"))
-if USE_OPENAI:
-    from openai import OpenAI
-    client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
-    print("✅ Using OpenAI GPT-4o-mini for answer generation")
-else:
-    from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
-    print("⚙️ No OpenAI key found — using fallback FLAN-T5 model")
-print("✅ qa.py loaded successfully")
 # ==========================================================
-# 2️⃣ Hugging Face Cache Setup (Safe for Spaces)
 # ==========================================================
 CACHE_DIR = "/tmp/hf_cache"
 os.makedirs(CACHE_DIR, exist_ok=True)
@@ -42,32 +29,48 @@ os.environ.update({
 })
 # ==========================================================
-# 3️⃣ Embedding Model (E5 for better retrieval)
 # ==========================================================
 try:
     _query_model = SentenceTransformer("intfloat/e5-small-v2", cache_folder=CACHE_DIR)
-    print("✅ Loaded embedding model: intfloat/e5-small-v2")
 except Exception as e:
-    print(f"⚠️ Failed to load e5-small-v2 ({e}), switching to MiniLM.")
     _query_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=CACHE_DIR)
-    print("✅ Loaded fallback: all-MiniLM-L6-v2")
 # ==========================================================
-# 4️⃣ Fallback Model (FLAN-T5)
 # ==========================================================
-if not USE_OPENAI:
     MODEL_NAME = "google/flan-t5-base"
     _tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
     _model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
     _answer_model = pipeline("text2text-generation", model=_model, tokenizer=_tokenizer, device=-1)
 # ==========================================================
-# 5️⃣ Prompt Template
 # ==========================================================
 PROMPT_TEMPLATE = """
 You are an enterprise knowledge assistant.
-Use ONLY the context below to answer the question clearly, precisely, and factually.
-If the context doesn’t contain the answer, say exactly:
 "I don't know based on the provided document."
 ---
@@ -81,28 +84,31 @@ Answer:
 """
 # ==========================================================
-# 6️⃣ Chunk Retrieval
 # ==========================================================
 def retrieve_chunks(query: str, index, chunks: list, top_k: int = 5):
-    """Retrieve top-K relevant chunks and re-rank by semantic similarity."""
     if not index or not chunks:
         return []
     try:
         query_emb = _query_model.encode(
             [f"query: {query.strip()}"],
             convert_to_numpy=True,
             normalize_embeddings=True
         )[0]
-        # Retrieve more and then re-rank
         distances, indices = index.search(np.array([query_emb]).astype("float32"), top_k * 2)
         merged_chunks = []
         for idx in indices[0]:
             neighbors = [chunks[i] for i in range(max(0, idx - 1), min(len(chunks), idx + 2))]
             merged_chunks.append(" ".join(neighbors))
-        # Re-rank by cosine similarity
         chunk_vecs = np.array([
             _query_model.encode([c], convert_to_numpy=True, normalize_embeddings=True)[0]
             for c in merged_chunks
@@ -110,32 +116,36 @@ def retrieve_chunks(query: str, index, chunks: list, top_k: int = 5):
         scores = cosine_similarity(np.array([query_emb]), chunk_vecs)[0]
         sorted_indices = np.argsort(scores)[::-1]
         return [merged_chunks[i] for i in sorted_indices[:top_k]]
     except Exception as e:
         print(f"⚠️ Retrieval error: {e}")
         return []
 # ==========================================================
-# 7️⃣ Answer Generation
 # ==========================================================
 def generate_answer(query: str, retrieved_chunks: list):
-    """Generate factual answer using OpenAI GPT-4o-mini (preferred) or FLAN fallback."""
     if not retrieved_chunks:
         return "Sorry, I couldn’t find relevant information in the document."
-    # Merge retrieved chunks
-    context = "\n\n".join(
-        [f"[Chunk {i+1}]: {chunk.strip()}" for i, chunk in enumerate(retrieved_chunks)]
-    )
     prompt = PROMPT_TEMPLATE.format(context=context, query=query)
-    try:
-        if USE_OPENAI:
             response = client.chat.completions.create(
                 model="gpt-4o-mini",
                 messages=[
-                    {"role": "system", "content": "You are a precise enterprise assistant that answers only from the provided context."},
                     {"role": "user", "content": prompt},
                 ],
                 temperature=0.4,
@@ -143,16 +153,28 @@ def generate_answer(query: str, retrieved_chunks: list):
             )
             return response.choices[0].message.content.strip()
-        else:
-            result = _answer_model(prompt, max_new_tokens=600, do_sample=False, temperature=0.3)
-            return result[0]["generated_text"].strip()
     except Exception as e:
-        print(f"⚠️ Generation failed: {e}")
-        return "⚠️ Error: Could not generate an answer at the moment."
 # ==========================================================
-# 8️⃣ Local Test
 # ==========================================================
 if __name__ == "__main__":
     dummy_chunks = [
@@ -163,7 +185,11 @@ if __name__ == "__main__":
     from vectorstore import build_faiss_index
     index = build_faiss_index([
-        _query_model.encode([f"passage: {chunk}"], convert_to_numpy=True, normalize_embeddings=True)[0]
         for chunk in dummy_chunks
     ])

 -------------------------------------
 Handles:
 • Query embedding (SentenceTransformer / E5-compatible)
+• Chunk retrieval (FAISS with neighborhood merging + re-ranking)
+• Answer generation (OpenAI GPT-4o-mini → FLAN-T5 fallback)
 Optimized for Hugging Face Spaces & Streamlit.
 """
 from sklearn.metrics.pairwise import cosine_similarity
 from vectorstore import search_faiss
+print("✅ qa.py loaded from:", __file__)
 # ==========================================================
+# 1️⃣ Hugging Face Cache Setup
 # ==========================================================
 CACHE_DIR = "/tmp/hf_cache"
 os.makedirs(CACHE_DIR, exist_ok=True)
 })
 # ==========================================================
+# 2️⃣ Query Embedding Model
 # ==========================================================
 try:
     _query_model = SentenceTransformer("intfloat/e5-small-v2", cache_folder=CACHE_DIR)
+    print("✅ Loaded query model: intfloat/e5-small-v2")
 except Exception as e:
+    print(f"⚠️ Query model load failed ({e}), falling back to MiniLM.")
     _query_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=CACHE_DIR)
 # ==========================================================
+# 3️⃣ LLM Setup: OpenAI (primary) + FLAN (fallback)
 # ==========================================================
+USE_OPENAI = bool(os.getenv("OPENAI_API_KEY"))
+_answer_model = None  # ensures it's always defined
+if USE_OPENAI:
+    try:
+        from openai import OpenAI
+        client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+        print("✅ Using OpenAI GPT-4o-mini for answer generation")
+    except Exception as e:
+        print(f"⚠️ Failed to initialize OpenAI client: {e}")
+        USE_OPENAI = False
+# Always prepare fallback safely
+try:
+    from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
     MODEL_NAME = "google/flan-t5-base"
     _tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
     _model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
     _answer_model = pipeline("text2text-generation", model=_model, tokenizer=_tokenizer, device=-1)
+    print("💡 Fallback FLAN-T5 ready.")
+except Exception as e:
+    print(f"⚠️ Could not initialize FLAN fallback: {e}")
 # ==========================================================
+# 4️⃣ Prompt Template
 # ==========================================================
 PROMPT_TEMPLATE = """
 You are an enterprise knowledge assistant.
+Use ONLY the CONTEXT below to answer the QUESTION clearly, completely, and factually.
+If the context doesn’t contain the answer, reply exactly:
 "I don't know based on the provided document."
 ---
 """
 # ==========================================================
+# 5️⃣ Chunk Retrieval Function
 # ==========================================================
 def retrieve_chunks(query: str, index, chunks: list, top_k: int = 5):
+    """Retrieve top-K relevant chunks, merge nearby ones, and re-rank by semantic similarity."""
     if not index or not chunks:
         return []
     try:
+        # Step 1: Encode the query
         query_emb = _query_model.encode(
             [f"query: {query.strip()}"],
             convert_to_numpy=True,
             normalize_embeddings=True
         )[0]
+        # Step 2: Initial FAISS retrieval
         distances, indices = index.search(np.array([query_emb]).astype("float32"), top_k * 2)
+        # Step 3: Merge neighboring chunks
         merged_chunks = []
         for idx in indices[0]:
             neighbors = [chunks[i] for i in range(max(0, idx - 1), min(len(chunks), idx + 2))]
             merged_chunks.append(" ".join(neighbors))
+        # Step 4: Re-rank using cosine similarity
         chunk_vecs = np.array([
             _query_model.encode([c], convert_to_numpy=True, normalize_embeddings=True)[0]
             for c in merged_chunks
         scores = cosine_similarity(np.array([query_emb]), chunk_vecs)[0]
         sorted_indices = np.argsort(scores)[::-1]
+        # Step 5: Return top-ranked merged chunks
         return [merged_chunks[i] for i in sorted_indices[:top_k]]
     except Exception as e:
         print(f"⚠️ Retrieval error: {e}")
         return []
 # ==========================================================
+# 6️⃣ Answer Generation Function
 # ==========================================================
 def generate_answer(query: str, retrieved_chunks: list):
+    """Generate factual, context-grounded answers using OpenAI or fallback FLAN-T5."""
     if not retrieved_chunks:
         return "Sorry, I couldn’t find relevant information in the document."
+    # Build full context
+    context = "\n\n".join([
+        f"[Chunk {i+1}]: {chunk.strip()}"
+        for i, chunk in enumerate(retrieved_chunks)
+    ])
     prompt = PROMPT_TEMPLATE.format(context=context, query=query)
+    # --- Try OpenAI first ---
+    if USE_OPENAI:
+        try:
             response = client.chat.completions.create(
                 model="gpt-4o-mini",
                 messages=[
+                    {"role": "system", "content": "You are a precise enterprise document assistant."},
                     {"role": "user", "content": prompt},
                 ],
                 temperature=0.4,
             )
             return response.choices[0].message.content.strip()
+        except Exception as e:
+            print(f"⚠️ OpenAI generation failed: {e}. Switching to fallback...")
+    # --- Fallback to FLAN-T5 ---
+    try:
+        if _answer_model:
+            result = _answer_model(
+                prompt,
+                max_new_tokens=600,
+                do_sample=False,
+                temperature=0.3
+            )
+            return result[0]["generated_text"].strip()
+        else:
+            return "⚠️ Error: Fallback model not available."
     except Exception as e:
+        print(f"⚠️ Fallback model failed: {e}")
+        return "⚠️ Error: Both OpenAI and fallback generation failed."
 # ==========================================================
+# 7️⃣ Local Test
 # ==========================================================
 if __name__ == "__main__":
     dummy_chunks = [
     from vectorstore import build_faiss_index
     index = build_faiss_index([
+        _query_model.encode(
+            [f"passage: {chunk}"],
+            convert_to_numpy=True,
+            normalize_embeddings=True
+        )[0]
         for chunk in dummy_chunks
     ])