Spaces:

Shubham170793
/

enterprise-knowledge-assistant

Sleeping

App Files Files Community

Shubham170793 commited on Oct 4, 2025

Commit

6d7ba5b

verified ·

1 Parent(s): 32a3bbb

Update src/qa.py

Browse files

Files changed (1) hide show

src/qa.py +51 -21

src/qa.py CHANGED Viewed

@@ -25,9 +25,9 @@ _query_model = SentenceTransformer(
 )
 # ----------------------------
-# LLM for answers (manual load)
 # ----------------------------
-MODEL_NAME = "google/flan-t5-large"
 _tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
 _model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
@@ -37,31 +37,61 @@ _answer_model = pipeline(
     tokenizer=_tokenizer
 )
 # ----------------------------
 # Functions
 # ----------------------------
 def retrieve_chunks(query, index, chunks, top_k=3):
-    """Embed the query and retrieve top-k chunks from FAISS."""
     q_emb = _query_model.encode([query], convert_to_numpy=True)[0]
     return search_faiss(q_emb, index, chunks, top_k)
 def generate_answer(query, retrieved_chunks):
-    """Generate an answer using retrieved chunks as context."""
     if not retrieved_chunks:
-        return "Sorry, I could not find relevant information."
-    context = " ".join(retrieved_chunks)
-    prompt = (
-        "You are an assistant. Use the context below to answer the question clearly.\n\n"
-        f"Context:\n{context}\n\n"
-        f"Question:\n{query}\n\n"
-        "Answer:"
-    )
-    # ✅ Safe call: no cache_dir leaks here
-    result = _answer_model(
-        prompt,
-        max_new_tokens=300,
-        do_sample=False
-    )
-    return result[0]["generated_text"].strip()

 )
 # ----------------------------
+# LLM for answers (FLAN)
 # ----------------------------
+MODEL_NAME = "google/flan-t5-large"   # you can switch to flan-t5-base if Codespace is low on RAM
 _tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
 _model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
     tokenizer=_tokenizer
 )
+# ----------------------------
+# Prompt Template
+# ----------------------------
+PROMPT_CONCISE = """
+You are an expert analyst. Using ONLY the CONTEXT below, answer the QUESTION clearly and concisely.
+If the answer cannot be found in the context, reply exactly: "I don't know based on the provided document."
+Instructions:
+• Start with a one-sentence answer.
+• Then give up to 3 short numbered supporting points (each ≤ 25 words).
+• After that, list the sources referenced as [Chunk N].
+Context:
+{context}
+Question:
+{query}
+Answer:
+"""
 # ----------------------------
 # Functions
 # ----------------------------
 def retrieve_chunks(query, index, chunks, top_k=3):
+    """
+    Embed the query and retrieve top-k chunks from FAISS.
+    """
     q_emb = _query_model.encode([query], convert_to_numpy=True)[0]
     return search_faiss(q_emb, index, chunks, top_k)
 def generate_answer(query, retrieved_chunks):
+    """
+    Generate an answer using FLAN and the retrieved chunks as context.
+    """
     if not retrieved_chunks:
+        return "Sorry, I couldn’t find relevant information in the document."
+    # Format chunks for context clarity
+    context = "\n\n".join([f"[Chunk {i+1}]: {chunk}" for i, chunk in enumerate(retrieved_chunks)])
+    # Build prompt using the concise structured template
+    prompt = PROMPT_CONCISE.format(context=context, query=query)
+    try:
+        result = _answer_model(
+            prompt,
+            max_new_tokens=300,
+            do_sample=False,
+            temperature=0.2
+        )
+        answer = result[0]["generated_text"].strip()
+    except Exception as e:
+        print("⚠️ FLAN generation failed:", e)
+        answer = "Sorry, I couldn’t generate an answer at the moment."
+    return answer