Spaces:

Shubham170793
/

enterprise-knowledge-assistant

Running

App Files Files Community

Shubham170793 commited on Oct 5

Commit

cd266a5

verified ·

1 Parent(s): 4da661f

Update src/qa.py

Browse files

Files changed (1) hide show

src/qa.py +90 -49

src/qa.py CHANGED Viewed

@@ -1,3 +1,13 @@
 import os
 from sentence_transformers import SentenceTransformer
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
@@ -5,93 +15,124 @@ from vectorstore import search_faiss
 print("✅ qa.py loaded from:", __file__)
-# ----------------------------
-# Hugging Face cache setup
-# ----------------------------
 CACHE_DIR = "/tmp/hf_cache"
 os.makedirs(CACHE_DIR, exist_ok=True)
-os.environ["HF_HOME"] = CACHE_DIR
-os.environ["TRANSFORMERS_CACHE"] = CACHE_DIR
-os.environ["HF_DATASETS_CACHE"] = CACHE_DIR
-os.environ["HF_MODULES_CACHE"] = CACHE_DIR
-# ----------------------------
-# Query embedding model
-# ----------------------------
 _query_model = SentenceTransformer(
     "sentence-transformers/all-MiniLM-L6-v2",
     cache_folder=CACHE_DIR
 )
-# ----------------------------
-# LLM for answers (FLAN)
-# ----------------------------
-MODEL_NAME = "google/flan-t5-large"   # you can switch to flan-t5-base if Codespace is low on RAM
 _tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
 _model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
 _answer_model = pipeline(
     "text2text-generation",
     model=_model,
-    tokenizer=_tokenizer
 )
-# ----------------------------
-# Prompt Template
-# ----------------------------
-PROMPT_CONCISE = """
-You are an expert analyst. Using ONLY the CONTEXT below, answer the QUESTION clearly and concisely.
-If the answer cannot be found in the context, reply exactly: "I don't know based on the provided document."
-Instructions:
-• Start with a one-sentence answer.
-• Then give up to 3 short numbered supporting points (each ≤ 25 words).
-• After that, list the sources referenced as [Chunk N].
 Context:
 {context}
 Question:
 {query}
-Answer:
-"""
-# ----------------------------
-# Functions
-# ----------------------------
-def retrieve_chunks(query, index, chunks, top_k=3):
     """
-    Embed the query and retrieve top-k chunks from FAISS.
     """
-    q_emb = _query_model.encode([query], convert_to_numpy=True)[0]
-    return search_faiss(q_emb, index, chunks, top_k)
-def generate_answer(query, retrieved_chunks):
     """
-    Generate an answer using FLAN and the retrieved chunks as context.
     """
     if not retrieved_chunks:
         return "Sorry, I couldn’t find relevant information in the document."
-    # Format chunks for context clarity
     context = "\n\n".join([f"[Chunk {i+1}]: {chunk}" for i, chunk in enumerate(retrieved_chunks)])
-    # Build prompt using the concise structured template
-    prompt = PROMPT_CONCISE.format(context=context, query=query)
     try:
         result = _answer_model(
             prompt,
-            max_new_tokens=300,
             do_sample=False,
-            temperature=0.2
         )
-        answer = result[0]["generated_text"].strip()
     except Exception as e:
-        print("⚠️ FLAN generation failed:", e)
-        answer = "Sorry, I couldn’t generate an answer at the moment."
-    return answer

+"""
+qa.py — Retrieval + Generation Layer
+-------------------------------------
+Handles:
+• Query embedding (SentenceTransformer)
+• Chunk retrieval (FAISS)
+• Answer generation (Flan-T5)
+Optimized for Hugging Face Spaces & Streamlit.
+"""
 import os
 from sentence_transformers import SentenceTransformer
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 print("✅ qa.py loaded from:", __file__)
+# ==========================================================
+# 1️⃣ Cache Configuration (Hugging Face safe /tmp folder)
+# ==========================================================
 CACHE_DIR = "/tmp/hf_cache"
 os.makedirs(CACHE_DIR, exist_ok=True)
+os.environ.update({
+    "HF_HOME": CACHE_DIR,
+    "TRANSFORMERS_CACHE": CACHE_DIR,
+    "HF_DATASETS_CACHE": CACHE_DIR,
+    "HF_MODULES_CACHE": CACHE_DIR
+})
+# ==========================================================
+# 2️⃣ Embedding Model (for Query Encoding)
+# ==========================================================
 _query_model = SentenceTransformer(
     "sentence-transformers/all-MiniLM-L6-v2",
     cache_folder=CACHE_DIR
 )
+print("✅ Loaded embedding model: all-MiniLM-L6-v2")
+# ==========================================================
+# 3️⃣ LLM for Answers (Google FLAN-T5)
+# ==========================================================
+MODEL_NAME = "google/flan-t5-base"   # lighter & faster; can switch to 'large' for higher accuracy
+print(f"✅ Loading LLM: {MODEL_NAME}")
 _tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
 _model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
+# Efficient text2text generation pipeline
 _answer_model = pipeline(
     "text2text-generation",
     model=_model,
+    tokenizer=_tokenizer,
+    device=-1  # ensures CPU-safe execution (avoid GPU dependency)
 )
+# ==========================================================
+# 4️⃣ Prompt Template
+# ==========================================================
+PROMPT_TEMPLATE = """You are an expert enterprise assistant.
+Using ONLY the context provided below, answer the question clearly and factually.
+If the context doesn’t contain the answer, reply exactly:
+"I don't know based on the provided document."
+---
 Context:
 {context}
+---
 Question:
 {query}
+---
+Answer:"""
+# ==========================================================
+# 5️⃣ Retrieval Function
+# ==========================================================
+def retrieve_chunks(query: str, index, chunks: list, top_k: int = 3):
     """
+    Encodes the user query and retrieves top-k most relevant chunks from FAISS.
     """
+    if not index or not chunks:
+        return []
+    try:
+        q_emb = _query_model.encode([query], convert_to_numpy=True)[0]
+        results = search_faiss(q_emb, index, chunks, top_k)
+        return results
+    except Exception as e:
+        print(f"⚠️ Retrieval error: {e}")
+        return []
+# ==========================================================
+# 6️⃣ Answer Generation Function
+# ==========================================================
+def generate_answer(query: str, retrieved_chunks: list):
     """
+    Generates an answer using FLAN-T5 and retrieved chunks as context.
     """
     if not retrieved_chunks:
         return "Sorry, I couldn’t find relevant information in the document."
+    # Merge top chunks into one context block
     context = "\n\n".join([f"[Chunk {i+1}]: {chunk}" for i, chunk in enumerate(retrieved_chunks)])
+    prompt = PROMPT_TEMPLATE.format(context=context, query=query)
     try:
         result = _answer_model(
             prompt,
+            max_new_tokens=250,
             do_sample=False,
+            temperature=0.3
         )
+        return result[0]["generated_text"].strip()
     except Exception as e:
+        print(f"⚠️ Generation failed: {e}")
+        return "⚠️ Error: Could not generate an answer at the moment."
+# ==========================================================
+# 7️⃣ Optional: Test Run
+# ==========================================================
+if __name__ == "__main__":
+    dummy_chunks = [
+        "SAP Ariba is a cloud-based procurement solution.",
+        "It helps companies manage suppliers and sourcing processes efficiently.",
+        "Integration with SAP ERP allows for seamless data synchronization."
+    ]
+    from vectorstore import build_faiss_index
+    index = build_faiss_index([
+        _query_model.encode([chunk], convert_to_numpy=True)[0]
+        for chunk in dummy_chunks
+    ])
+    query = "What is SAP Ariba used for?"
+    retrieved = retrieve_chunks(query, index, dummy_chunks)
+    print("🔍 Retrieved:", retrieved)
+    print("💬 Answer:", generate_answer(query, retrieved))