MichaelEdou Claude Opus 4.6 commited on
Commit
6178d50
·
1 Parent(s): ec6d762

Add post-fetch date filter for precise email matching

Browse files

Gmail API only filters by day granularity, so emails at the boundary
can slip through. Now each fetched email's actual timestamp is checked
against the exact requested range (startDate to endDate) and emails
outside the range are skipped.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

packages/server/src/services/scanService.ts CHANGED
@@ -209,7 +209,7 @@ async function runScan(jobId: string, scanRequest: ScanRequest, userId: string,
209
  const batch = newIds.slice(i, i + BATCH_SIZE);
210
 
211
  // Fetch emails with concurrency limit
212
- const emails: EmailMessage[] = [];
213
  for (let j = 0; j < batch.length; j += CONCURRENT_FETCH) {
214
  const chunk = batch.slice(j, j + CONCURRENT_FETCH);
215
  const results = await Promise.allSettled(
@@ -218,7 +218,7 @@ async function runScan(jobId: string, scanRequest: ScanRequest, userId: string,
218
 
219
  for (const result of results) {
220
  if (result.status === 'fulfilled') {
221
- emails.push(result.value);
222
  } else {
223
  jobState.progress.emailsErrored++;
224
  const errMsg = result.reason?.message || 'Fetch error';
@@ -237,6 +237,29 @@ async function runScan(jobId: string, scanRequest: ScanRequest, userId: string,
237
  }
238
  }
239
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  // Phase 3: AI parsing with concurrency limit
241
  for (let j = 0; j < emails.length; j += CONCURRENT_PARSE) {
242
  const parseChunk = emails.slice(j, j + CONCURRENT_PARSE);
 
209
  const batch = newIds.slice(i, i + BATCH_SIZE);
210
 
211
  // Fetch emails with concurrency limit
212
+ const fetchedEmails: EmailMessage[] = [];
213
  for (let j = 0; j < batch.length; j += CONCURRENT_FETCH) {
214
  const chunk = batch.slice(j, j + CONCURRENT_FETCH);
215
  const results = await Promise.allSettled(
 
218
 
219
  for (const result of results) {
220
  if (result.status === 'fulfilled') {
221
+ fetchedEmails.push(result.value);
222
  } else {
223
  jobState.progress.emailsErrored++;
224
  const errMsg = result.reason?.message || 'Fetch error';
 
237
  }
238
  }
239
 
240
+ // Post-fetch date filter: Gmail only filters by day, so we enforce exact range here
241
+ const rangeStart = new Date(dateRange.startDate).getTime();
242
+ const rangeEnd = new Date(dateRange.endDate).getTime();
243
+ const emails: EmailMessage[] = [];
244
+
245
+ for (const email of fetchedEmails) {
246
+ const emailTime = new Date(email.date).getTime();
247
+ if (emailTime >= rangeStart && emailTime <= rangeEnd) {
248
+ emails.push(email);
249
+ } else {
250
+ jobState.progress.emailsSkipped++;
251
+ logger.info({ emailId: email.emailId, emailDate: email.date, rangeStart: dateRange.startDate, rangeEnd: dateRange.endDate }, 'Skipping email outside date range');
252
+
253
+ emitToUser(userId, 'scan:progress', {
254
+ jobId,
255
+ processed: jobState.progress.emailsProcessed,
256
+ total: newIds.length,
257
+ skipped: jobState.progress.emailsSkipped,
258
+ errored: jobState.progress.emailsErrored,
259
+ });
260
+ }
261
+ }
262
+
263
  // Phase 3: AI parsing with concurrency limit
264
  for (let j = 0; j < emails.length; j += CONCURRENT_PARSE) {
265
  const parseChunk = emails.slice(j, j + CONCURRENT_PARSE);