Spaces:

Shubham170793
/

enterprise-knowledge-assistant

Sleeping

App Files Files Community

Shubham170793 commited on Oct 5

Commit

43b802c

verified ·

1 Parent(s): 3a56dbd

Update src/qa.py

Browse files

Files changed (1) hide show

src/qa.py +48 -26

src/qa.py CHANGED Viewed

@@ -2,7 +2,7 @@
 qa.py — Retrieval + Generation Layer
 -------------------------------------
 Handles:
-• Query embedding (SentenceTransformer)
 • Chunk retrieval (FAISS)
 • Answer generation (Flan-T5)
 Optimized for Hugging Face Spaces & Streamlit.
@@ -16,7 +16,7 @@ from vectorstore import search_faiss
 print("✅ qa.py loaded from:", __file__)
 # ==========================================================
-# 1️⃣ Cache Configuration (Hugging Face safe /tmp folder)
 # ==========================================================
 CACHE_DIR = "/tmp/hf_cache"
 os.makedirs(CACHE_DIR, exist_ok=True)
@@ -29,37 +29,46 @@ os.environ.update({
 })
 # ==========================================================
-# 2️⃣ Embedding Model (for Query Encoding)
 # ==========================================================
-_query_model = SentenceTransformer(
-    "sentence-transformers/all-MiniLM-L6-v2",
-    cache_folder=CACHE_DIR
-)
-print("✅ Loaded embedding model: all-MiniLM-L6-v2")
 # ==========================================================
-# 3️⃣ LLM for Answers (Google FLAN-T5)
 # ==========================================================
-MODEL_NAME = "google/flan-t5-base"   # lighter & faster; can switch to 'large' for higher accuracy
 print(f"✅ Loading LLM: {MODEL_NAME}")
 _tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
 _model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
-# Efficient text2text generation pipeline
 _answer_model = pipeline(
     "text2text-generation",
     model=_model,
     tokenizer=_tokenizer,
-    device=-1  # ensures CPU-safe execution (avoid GPU dependency)
 )
 # ==========================================================
-# 4️⃣ Prompt Template
 # ==========================================================
-PROMPT_TEMPLATE = """You are an expert enterprise assistant.
-Using ONLY the context provided below, answer the question clearly and factually.
-If the context doesn’t contain the answer, reply exactly:
 "I don't know based on the provided document."
 ---
@@ -69,22 +78,31 @@ Context:
 Question:
 {query}
 ---
-Answer:"""
 # ==========================================================
-# 5️⃣ Retrieval Function
 # ==========================================================
 def retrieve_chunks(query: str, index, chunks: list, top_k: int = 3):
     """
-    Encodes the user query and retrieves top-k most relevant chunks from FAISS.
     """
     if not index or not chunks:
         return []
     try:
-        q_emb = _query_model.encode([query], convert_to_numpy=True)[0]
-        results = search_faiss(q_emb, index, chunks, top_k)
         return results
     except Exception as e:
         print(f"⚠️ Retrieval error: {e}")
         return []
@@ -100,17 +118,18 @@ def generate_answer(query: str, retrieved_chunks: list):
     if not retrieved_chunks:
         return "Sorry, I couldn’t find relevant information in the document."
-    # Merge top chunks into one context block
     context = "\n\n".join([f"[Chunk {i+1}]: {chunk}" for i, chunk in enumerate(retrieved_chunks)])
     prompt = PROMPT_TEMPLATE.format(context=context, query=query)
     try:
         result = _answer_model(
             prompt,
-            max_new_tokens=250,
             do_sample=False,
-            temperature=0.3
         )
         return result[0]["generated_text"].strip()
     except Exception as e:
@@ -119,7 +138,7 @@ def generate_answer(query: str, retrieved_chunks: list):
 # ==========================================================
-# 7️⃣ Optional: Test Run
 # ==========================================================
 if __name__ == "__main__":
     dummy_chunks = [
@@ -128,10 +147,13 @@ if __name__ == "__main__":
         "Integration with SAP ERP allows for seamless data synchronization."
     ]
     from vectorstore import build_faiss_index
     index = build_faiss_index([
-        _query_model.encode([chunk], convert_to_numpy=True)[0]
         for chunk in dummy_chunks
     ])
     query = "What is SAP Ariba used for?"
     retrieved = retrieve_chunks(query, index, dummy_chunks)
     print("🔍 Retrieved:", retrieved)

 qa.py — Retrieval + Generation Layer
 -------------------------------------
 Handles:
+• Query embedding (SentenceTransformer / E5-compatible)
 • Chunk retrieval (FAISS)
 • Answer generation (Flan-T5)
 Optimized for Hugging Face Spaces & Streamlit.
 print("✅ qa.py loaded from:", __file__)
 # ==========================================================
+# 1️⃣ Hugging Face Cache Setup (Safe for Spaces)
 # ==========================================================
 CACHE_DIR = "/tmp/hf_cache"
 os.makedirs(CACHE_DIR, exist_ok=True)
 })
 # ==========================================================
+# 2️⃣ Query Embedding Model
 # ==========================================================
+# Use E5-small-v2 for retrieval consistency with embeddings.py
+try:
+    _query_model = SentenceTransformer(
+        "intfloat/e5-small-v2",
+        cache_folder=CACHE_DIR
+    )
+    print("✅ Loaded query model: intfloat/e5-small-v2")
+except Exception as e:
+    print(f"⚠️ Query model load failed ({e}), falling back to MiniLM.")
+    _query_model = SentenceTransformer(
+        "sentence-transformers/all-MiniLM-L6-v2",
+        cache_folder=CACHE_DIR
+    )
+    print("✅ Loaded fallback model: all-MiniLM-L6-v2")
 # ==========================================================
+# 3️⃣ LLM for Answer Generation (FLAN-T5)
 # ==========================================================
+MODEL_NAME = "google/flan-t5-base"   # switch to 'large' if RAM allows
 print(f"✅ Loading LLM: {MODEL_NAME}")
 _tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
 _model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
 _answer_model = pipeline(
     "text2text-generation",
     model=_model,
     tokenizer=_tokenizer,
+    device=-1  # CPU-safe for Spaces
 )
 # ==========================================================
+# 4️⃣ Prompt Template (concise and factual)
 # ==========================================================
+PROMPT_TEMPLATE = """
+You are an expert enterprise assistant.
+Using ONLY the CONTEXT below, answer the QUESTION clearly and factually.
+If the context doesn’t contain the answer, reply exactly:
 "I don't know based on the provided document."
 ---
 Question:
 {query}
 ---
+Answer:
+"""
 # ==========================================================
+# 5️⃣ Chunk Retrieval Function
 # ==========================================================
 def retrieve_chunks(query: str, index, chunks: list, top_k: int = 3):
     """
+    Encodes the user query and retrieves top-k relevant chunks via FAISS.
+    Uses 'query:' prefix (E5 training style) for semantic alignment.
     """
     if not index or not chunks:
         return []
     try:
+        # E5 expects 'query:' prefix for better retrieval accuracy
+        query_emb = _query_model.encode(
+            [f"query: {query.strip()}"],
+            convert_to_numpy=True,
+            normalize_embeddings=True
+        )[0]
+        results = search_faiss(query_emb, index, chunks, top_k)
         return results
     except Exception as e:
         print(f"⚠️ Retrieval error: {e}")
         return []
     if not retrieved_chunks:
         return "Sorry, I couldn’t find relevant information in the document."
+    # Merge retrieved chunks for context
     context = "\n\n".join([f"[Chunk {i+1}]: {chunk}" for i, chunk in enumerate(retrieved_chunks)])
+    # Build structured prompt
     prompt = PROMPT_TEMPLATE.format(context=context, query=query)
     try:
         result = _answer_model(
             prompt,
+            max_new_tokens=300,
             do_sample=False,
+            temperature=0.2
         )
         return result[0]["generated_text"].strip()
     except Exception as e:
 # ==========================================================
+# 7️⃣ Optional Local Test
 # ==========================================================
 if __name__ == "__main__":
     dummy_chunks = [
         "Integration with SAP ERP allows for seamless data synchronization."
     ]
     from vectorstore import build_faiss_index
+    import numpy as np
     index = build_faiss_index([
+        _query_model.encode([f"passage: {chunk}"], convert_to_numpy=True, normalize_embeddings=True)[0]
         for chunk in dummy_chunks
     ])
     query = "What is SAP Ariba used for?"
     retrieved = retrieve_chunks(query, index, dummy_chunks)
     print("🔍 Retrieved:", retrieved)