Shubham170793 commited on
Commit
6d7ba5b
·
verified ·
1 Parent(s): 32a3bbb

Update src/qa.py

Browse files
Files changed (1) hide show
  1. src/qa.py +51 -21
src/qa.py CHANGED
@@ -25,9 +25,9 @@ _query_model = SentenceTransformer(
25
  )
26
 
27
  # ----------------------------
28
- # LLM for answers (manual load)
29
  # ----------------------------
30
- MODEL_NAME = "google/flan-t5-large"
31
  _tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
32
  _model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
33
 
@@ -37,31 +37,61 @@ _answer_model = pipeline(
37
  tokenizer=_tokenizer
38
  )
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  # ----------------------------
41
  # Functions
42
  # ----------------------------
43
  def retrieve_chunks(query, index, chunks, top_k=3):
44
- """Embed the query and retrieve top-k chunks from FAISS."""
 
 
45
  q_emb = _query_model.encode([query], convert_to_numpy=True)[0]
46
  return search_faiss(q_emb, index, chunks, top_k)
47
 
 
48
  def generate_answer(query, retrieved_chunks):
49
- """Generate an answer using retrieved chunks as context."""
 
 
50
  if not retrieved_chunks:
51
- return "Sorry, I could not find relevant information."
52
-
53
- context = " ".join(retrieved_chunks)
54
- prompt = (
55
- "You are an assistant. Use the context below to answer the question clearly.\n\n"
56
- f"Context:\n{context}\n\n"
57
- f"Question:\n{query}\n\n"
58
- "Answer:"
59
- )
60
-
61
- # ✅ Safe call: no cache_dir leaks here
62
- result = _answer_model(
63
- prompt,
64
- max_new_tokens=300,
65
- do_sample=False
66
- )
67
- return result[0]["generated_text"].strip()
 
 
 
 
 
25
  )
26
 
27
  # ----------------------------
28
+ # LLM for answers (FLAN)
29
  # ----------------------------
30
+ MODEL_NAME = "google/flan-t5-large" # you can switch to flan-t5-base if Codespace is low on RAM
31
  _tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
32
  _model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
33
 
 
37
  tokenizer=_tokenizer
38
  )
39
 
40
+ # ----------------------------
41
+ # Prompt Template
42
+ # ----------------------------
43
+ PROMPT_CONCISE = """
44
+ You are an expert analyst. Using ONLY the CONTEXT below, answer the QUESTION clearly and concisely.
45
+ If the answer cannot be found in the context, reply exactly: "I don't know based on the provided document."
46
+
47
+ Instructions:
48
+ • Start with a one-sentence answer.
49
+ • Then give up to 3 short numbered supporting points (each ≤ 25 words).
50
+ • After that, list the sources referenced as [Chunk N].
51
+
52
+ Context:
53
+ {context}
54
+
55
+ Question:
56
+ {query}
57
+
58
+ Answer:
59
+ """
60
+
61
  # ----------------------------
62
  # Functions
63
  # ----------------------------
64
  def retrieve_chunks(query, index, chunks, top_k=3):
65
+ """
66
+ Embed the query and retrieve top-k chunks from FAISS.
67
+ """
68
  q_emb = _query_model.encode([query], convert_to_numpy=True)[0]
69
  return search_faiss(q_emb, index, chunks, top_k)
70
 
71
+
72
  def generate_answer(query, retrieved_chunks):
73
+ """
74
+ Generate an answer using FLAN and the retrieved chunks as context.
75
+ """
76
  if not retrieved_chunks:
77
+ return "Sorry, I couldn’t find relevant information in the document."
78
+
79
+ # Format chunks for context clarity
80
+ context = "\n\n".join([f"[Chunk {i+1}]: {chunk}" for i, chunk in enumerate(retrieved_chunks)])
81
+
82
+ # Build prompt using the concise structured template
83
+ prompt = PROMPT_CONCISE.format(context=context, query=query)
84
+
85
+ try:
86
+ result = _answer_model(
87
+ prompt,
88
+ max_new_tokens=300,
89
+ do_sample=False,
90
+ temperature=0.2
91
+ )
92
+ answer = result[0]["generated_text"].strip()
93
+ except Exception as e:
94
+ print("⚠️ FLAN generation failed:", e)
95
+ answer = "Sorry, I couldn’t generate an answer at the moment."
96
+
97
+ return answer