Shubham170793 commited on
Commit
9466a37
·
verified ·
1 Parent(s): fbd4778

Update src/qa.py

Browse files
Files changed (1) hide show
  1. src/qa.py +36 -44
src/qa.py CHANGED
@@ -1,9 +1,9 @@
1
  """
2
- qa.py — Phi-2 FAST + SMART RETRIEVAL (Stable)
3
- ---------------------------------------------
4
- intfloat/e5-small-v2 embeddings
5
- microsoft/phi-2 generation
6
- Optimized for: speed, factual accuracy, low hallucination
7
  """
8
 
9
  import os
@@ -13,10 +13,10 @@ from sklearn.metrics.pairwise import cosine_similarity
13
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
14
  import torch
15
 
16
- print("✅ qa.py (Phi-2 FAST + Smart Retrieval) loaded from:", __file__)
17
 
18
  # ==========================================================
19
- # 1️⃣ Cache Setup (Hugging Face /tmp cache)
20
  # ==========================================================
21
  CACHE_DIR = "/tmp/hf_cache"
22
  os.makedirs(CACHE_DIR, exist_ok=True)
@@ -38,7 +38,7 @@ except Exception as e:
38
  _query_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=CACHE_DIR)
39
 
40
  # ==========================================================
41
- # 3️⃣ Phi-2 LLM Setup
42
  # ==========================================================
43
  MODEL_NAME = "microsoft/phi-2"
44
  print(f"✅ Loading LLM: {MODEL_NAME}")
@@ -61,63 +61,58 @@ _answer_model = pipeline(
61
  print("✅ Phi-2 text-generation pipeline ready (optimized).")
62
 
63
  # ==========================================================
64
- # 4️⃣ Prompt Templates
65
  # ==========================================================
66
  STRICT_PROMPT = (
67
  "You are an enterprise documentation assistant.\n"
68
- "Use ONLY the CONTEXT below to answer the QUESTION.\n"
69
- "If the answer isn’t present, reply exactly:\n"
70
  "'I don't know based on the provided document.'\n\n"
71
  "Context:\n{context}\n\nQuestion: {query}\nAnswer:"
72
  )
73
 
74
  REASONING_PROMPT = (
75
- "You are an enterprise assistant with reasoning ability.\n"
76
- "Think carefully, but use the document context first.\n"
77
- "If you must infer, say so explicitly.\n"
78
- "If answer not in the document, reply exactly:\n"
79
  "'I don't know based on the provided document.'\n\n"
80
- "Context:\n{context}\n\nQuestion: {query}\nAnswer:"
81
  )
82
 
83
  # ==========================================================
84
- # 5️⃣ Smart Retrieval (Re-rank + Neighbor Fill)
85
  # ==========================================================
86
  def retrieve_chunks(query: str, index, chunks: list, top_k: int = 5,
87
  min_similarity: float = 0.6, candidate_multiplier: int = 3):
88
- """FAISS → Re-rank by cosine sim Filter Neighbor fill (only if needed)."""
89
  if not index or not chunks:
90
  return []
91
 
92
  try:
93
- # 1️⃣ Encode query
94
  q_emb = _query_model.encode(
95
- [f"query: {query.strip()}"],
96
- convert_to_numpy=True,
97
- normalize_embeddings=True
98
  )[0]
99
 
100
- # 2️⃣ Initial FAISS retrieval (larger candidate pool)
101
- num_candidates = top_k * candidate_multiplier
102
- distances, indices = index.search(np.array([q_emb]).astype("float32"), num_candidates)
103
- candidate_indices = list(dict.fromkeys(indices[0])) # dedup, preserve order
104
 
105
- # 3️⃣ Re-rank by cosine similarity
106
- candidate_texts = [chunks[i] for i in candidate_indices]
107
  doc_embs = _query_model.encode(
108
- [f"passage: {c}" for c in candidate_texts],
109
  convert_to_numpy=True,
110
- normalize_embeddings=True
111
  )
112
  sims = cosine_similarity([q_emb], doc_embs)[0]
113
  ranked = sorted(zip(candidate_indices, sims), key=lambda x: x[1], reverse=True)
114
 
115
- # 4️⃣ Filter low-similarity
116
  filtered = [idx for idx, sim in ranked if sim >= min_similarity]
117
  if len(filtered) > top_k:
118
  filtered = filtered[:top_k]
119
 
120
- # 5️⃣ Neighbor fill (only if fewer than top_k)
121
  if len(filtered) < top_k:
122
  expanded = set(filtered)
123
  for idx in filtered:
@@ -130,7 +125,6 @@ def retrieve_chunks(query: str, index, chunks: list, top_k: int = 5,
130
  break
131
  filtered = sorted(expanded)[:top_k]
132
 
133
- print(f"✅ Retrieved {len(filtered)} chunks (top_k={top_k}, min_sim={min_similarity})")
134
  return [chunks[i] for i in filtered]
135
 
136
  except Exception as e:
@@ -138,25 +132,22 @@ def retrieve_chunks(query: str, index, chunks: list, top_k: int = 5,
138
  return []
139
 
140
  # ==========================================================
141
- # 6️⃣ Answer Generation
142
  # ==========================================================
143
  def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = False):
144
- """Generate concise, factual or reasoning-based answers using Phi-2."""
145
  if not retrieved_chunks:
146
  return "Sorry, I couldn’t find relevant information in the document."
147
 
148
- # Include [Chunk N] markers
149
  context = "\n".join(f"[Chunk {i+1}] {chunk.strip()}" for i, chunk in enumerate(retrieved_chunks))
150
-
151
- prompt = (REASONING_PROMPT if reasoning_mode else STRICT_PROMPT).format(
152
- context=context, query=query
153
- )
154
 
155
  try:
156
  result = _answer_model(
157
  prompt,
158
- max_new_tokens=180 if reasoning_mode else 140,
159
- temperature=0.5 if reasoning_mode else 0.2,
 
160
  do_sample=reasoning_mode,
161
  pad_token_id=_tokenizer.eos_token_id,
162
  early_stopping=True,
@@ -166,12 +157,13 @@ def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = F
166
  if "Answer:" in text:
167
  text = text.split("Answer:")[-1].strip()
168
 
169
- return text or "⚠️ No answer generated."
170
 
171
  except Exception as e:
172
  print(f"⚠️ Generation failed: {e}")
173
  return "⚠️ Error: Could not generate an answer."
174
 
 
175
  # ==========================================================
176
  # 7️⃣ Local Test
177
  # ==========================================================
@@ -193,4 +185,4 @@ if __name__ == "__main__":
193
  query = "How do I create a communication user?"
194
  retrieved = retrieve_chunks(query, index, dummy_chunks)
195
  print("🔍 Retrieved:", retrieved)
196
- print("💬 Answer:", generate_answer(query, retrieved))
 
1
  """
2
+ qa.py — Phi-2 FAST + ReRank (with FULL Reasoning Mode)
3
+ -------------------------------------------------------
4
+ Semantic retrieval (FAISS + cosine re-rank + neighbor-fill)
5
+ Smart factual mode
6
+ Deep reasoning mode (ChatGPT-like)
7
  """
8
 
9
  import os
 
13
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
14
  import torch
15
 
16
+ print("✅ qa.py (Phi-2 FAST + ReRank + Full Reasoning) loaded from:", __file__)
17
 
18
  # ==========================================================
19
+ # 1️⃣ Cache Setup
20
  # ==========================================================
21
  CACHE_DIR = "/tmp/hf_cache"
22
  os.makedirs(CACHE_DIR, exist_ok=True)
 
38
  _query_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=CACHE_DIR)
39
 
40
  # ==========================================================
41
+ # 3️⃣ Phi-2 Model Setup
42
  # ==========================================================
43
  MODEL_NAME = "microsoft/phi-2"
44
  print(f"✅ Loading LLM: {MODEL_NAME}")
 
61
  print("✅ Phi-2 text-generation pipeline ready (optimized).")
62
 
63
  # ==========================================================
64
+ # 4️⃣ Prompts
65
  # ==========================================================
66
  STRICT_PROMPT = (
67
  "You are an enterprise documentation assistant.\n"
68
+ "Use ONLY the CONTEXT below to answer the QUESTION clearly and factually.\n"
69
+ "If the answer isn’t in the document, reply exactly:\n"
70
  "'I don't know based on the provided document.'\n\n"
71
  "Context:\n{context}\n\nQuestion: {query}\nAnswer:"
72
  )
73
 
74
  REASONING_PROMPT = (
75
+ "You are an expert enterprise assistant capable of deep reasoning.\n"
76
+ "Think step by step before answering. Use the CONTEXT below first, but also apply your world knowledge logically.\n"
77
+ "Explain your reasoning concisely if it helps clarity.\n"
78
+ "Avoid hallucination — if the document does not include the answer, say:\n"
79
  "'I don't know based on the provided document.'\n\n"
80
+ "Context:\n{context}\n\nQuestion: {query}\nLet's reason this out carefully:\nAnswer:"
81
  )
82
 
83
  # ==========================================================
84
+ # 5️⃣ Retrieval — FAISS + Re-rank + Neighbor Fill
85
  # ==========================================================
86
  def retrieve_chunks(query: str, index, chunks: list, top_k: int = 5,
87
  min_similarity: float = 0.6, candidate_multiplier: int = 3):
88
+ """Re-rank and optionally fill with neighbors for context continuity."""
89
  if not index or not chunks:
90
  return []
91
 
92
  try:
 
93
  q_emb = _query_model.encode(
94
+ [f"query: {query.strip()}"], convert_to_numpy=True, normalize_embeddings=True
 
 
95
  )[0]
96
 
97
+ # Initial FAISS search
98
+ distances, indices = index.search(np.array([q_emb]).astype("float32"), top_k * candidate_multiplier)
99
+ candidate_indices = list(dict.fromkeys(indices[0])) # dedup
 
100
 
101
+ # Re-rank by cosine similarity
 
102
  doc_embs = _query_model.encode(
103
+ [f"passage: {chunks[i]}" for i in candidate_indices],
104
  convert_to_numpy=True,
105
+ normalize_embeddings=True,
106
  )
107
  sims = cosine_similarity([q_emb], doc_embs)[0]
108
  ranked = sorted(zip(candidate_indices, sims), key=lambda x: x[1], reverse=True)
109
 
110
+ # Filter by min_similarity
111
  filtered = [idx for idx, sim in ranked if sim >= min_similarity]
112
  if len(filtered) > top_k:
113
  filtered = filtered[:top_k]
114
 
115
+ # Neighbor fill if needed
116
  if len(filtered) < top_k:
117
  expanded = set(filtered)
118
  for idx in filtered:
 
125
  break
126
  filtered = sorted(expanded)[:top_k]
127
 
 
128
  return [chunks[i] for i in filtered]
129
 
130
  except Exception as e:
 
132
  return []
133
 
134
  # ==========================================================
135
+ # 6️⃣ Answer Generation (Restored Full Reasoning)
136
  # ==========================================================
137
  def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = False):
138
+ """Generate detailed, human-like reasoning when enabled."""
139
  if not retrieved_chunks:
140
  return "Sorry, I couldn’t find relevant information in the document."
141
 
 
142
  context = "\n".join(f"[Chunk {i+1}] {chunk.strip()}" for i, chunk in enumerate(retrieved_chunks))
143
+ prompt = (REASONING_PROMPT if reasoning_mode else STRICT_PROMPT).format(context=context, query=query)
 
 
 
144
 
145
  try:
146
  result = _answer_model(
147
  prompt,
148
+ max_new_tokens=260 if reasoning_mode else 140,
149
+ temperature=0.7 if reasoning_mode else 0.2,
150
+ top_p=0.95 if reasoning_mode else 1.0,
151
  do_sample=reasoning_mode,
152
  pad_token_id=_tokenizer.eos_token_id,
153
  early_stopping=True,
 
157
  if "Answer:" in text:
158
  text = text.split("Answer:")[-1].strip()
159
 
160
+ return text
161
 
162
  except Exception as e:
163
  print(f"⚠️ Generation failed: {e}")
164
  return "⚠️ Error: Could not generate an answer."
165
 
166
+
167
  # ==========================================================
168
  # 7️⃣ Local Test
169
  # ==========================================================
 
185
  query = "How do I create a communication user?"
186
  retrieved = retrieve_chunks(query, index, dummy_chunks)
187
  print("🔍 Retrieved:", retrieved)
188
+ print("💬 Answer:", generate_answer(query, retrieved, reasoning_mode=True))