raviix46 commited on
Commit
a6ba136
·
verified ·
1 Parent(s): bc87aad

Update rag_retrieval.py

Browse files
Files changed (1) hide show
  1. rag_retrieval.py +45 -8
rag_retrieval.py CHANGED
@@ -1,9 +1,9 @@
1
- # rag_retrieval.py
2
  import json
3
  import time
4
  import uuid
5
  import numpy as np
6
  import re
 
7
 
8
  from rag_config import RUNS_DIR, ROOT_DIR
9
  from rag_data import chunks, bm25, embeddings, sem_model, THREAD_OPTIONS
@@ -106,11 +106,10 @@ def retrieve_chunks(rewrite: str, session: dict, search_outside_thread: bool):
106
 
107
  def build_answer(user_text: str, rewrite: str, retrieved):
108
  """
109
- Answer builder with a simple 'no clear answer' heuristic.
110
-
111
- - If scores are very low OR none of the retrieved snippets share
112
- meaningful words with the question, we return a graceful fallback.
113
- - Otherwise, we list relevant snippets with citations.
114
  """
115
  if not retrieved:
116
  return (
@@ -136,9 +135,42 @@ def build_answer(user_text: str, rewrite: str, retrieved):
136
  []
137
  )
138
 
139
- # ---- Normal snippet-based answer ----
140
- lines = [f"**Question:** {user_text}", "", "**Relevant information:**"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  citations = []
 
142
 
143
  for r in retrieved:
144
  msg_id = r["message_id"]
@@ -146,6 +178,11 @@ def build_answer(user_text: str, rewrite: str, retrieved):
146
  snippet = r["text"].replace("\n", " ")
147
  snippet = (snippet[:300] + "…") if len(snippet) > 300 else snippet
148
 
 
 
 
 
 
149
  if page_no is not None:
150
  cite = f"[msg: {msg_id}, page: {page_no}]"
151
  else:
 
 
1
  import json
2
  import time
3
  import uuid
4
  import numpy as np
5
  import re
6
+ from datetime import datetime
7
 
8
  from rag_config import RUNS_DIR, ROOT_DIR
9
  from rag_data import chunks, bm25, embeddings, sem_model, THREAD_OPTIONS
 
106
 
107
  def build_answer(user_text: str, rewrite: str, retrieved):
108
  """
109
+ Answer builder with:
110
+ - 'no clear answer' heuristic
111
+ - special handling for simple 'when' questions using email dates
112
+ - snippet list with citations for grounding
 
113
  """
114
  if not retrieved:
115
  return (
 
135
  []
136
  )
137
 
138
+ # ---- Optional: direct answer for 'when' questions ----
139
+ direct_answer_line = None
140
+ if "when" in user_text.lower():
141
+ dated = []
142
+ for r in retrieved:
143
+ date_str = r.get("date")
144
+ if not date_str:
145
+ continue
146
+ try:
147
+ dt = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
148
+ dated.append((dt, r))
149
+ except Exception:
150
+ continue
151
+
152
+ if dated:
153
+ # pick the latest email as the likely final approval/confirmation
154
+ dt_best, r_best = max(dated, key=lambda x: x[0])
155
+ nice_date = dt_best.strftime("%Y-%m-%d %H:%M")
156
+ direct_answer_line = (
157
+ f"**Answer:** The most relevant approval email in this thread "
158
+ f"was sent on **{nice_date}** "
159
+ f"[msg: {r_best['message_id']}]."
160
+ )
161
+
162
+ # ---- Build snippet-based explanation ----
163
+ lines = []
164
+ if direct_answer_line:
165
+ lines.append(direct_answer_line)
166
+ lines.append("")
167
+
168
+ lines.append(f"**Question:** {user_text}")
169
+ lines.append("")
170
+ lines.append("**Relevant information:**")
171
+
172
  citations = []
173
+ seen = set() # avoid exact duplicate snippet+msg combos
174
 
175
  for r in retrieved:
176
  msg_id = r["message_id"]
 
178
  snippet = r["text"].replace("\n", " ")
179
  snippet = (snippet[:300] + "…") if len(snippet) > 300 else snippet
180
 
181
+ key = (msg_id, snippet)
182
+ if key in seen:
183
+ continue
184
+ seen.add(key)
185
+
186
  if page_no is not None:
187
  cite = f"[msg: {msg_id}, page: {page_no}]"
188
  else: