Spaces:
Sleeping
Sleeping
MichaelEdou Claude Opus 4.6 commited on
Commit ·
6178d50
1
Parent(s): ec6d762
Add post-fetch date filter for precise email matching
Browse filesGmail API only filters by day granularity, so emails at the boundary
can slip through. Now each fetched email's actual timestamp is checked
against the exact requested range (startDate to endDate) and emails
outside the range are skipped.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
packages/server/src/services/scanService.ts
CHANGED
|
@@ -209,7 +209,7 @@ async function runScan(jobId: string, scanRequest: ScanRequest, userId: string,
|
|
| 209 |
const batch = newIds.slice(i, i + BATCH_SIZE);
|
| 210 |
|
| 211 |
// Fetch emails with concurrency limit
|
| 212 |
-
const
|
| 213 |
for (let j = 0; j < batch.length; j += CONCURRENT_FETCH) {
|
| 214 |
const chunk = batch.slice(j, j + CONCURRENT_FETCH);
|
| 215 |
const results = await Promise.allSettled(
|
|
@@ -218,7 +218,7 @@ async function runScan(jobId: string, scanRequest: ScanRequest, userId: string,
|
|
| 218 |
|
| 219 |
for (const result of results) {
|
| 220 |
if (result.status === 'fulfilled') {
|
| 221 |
-
|
| 222 |
} else {
|
| 223 |
jobState.progress.emailsErrored++;
|
| 224 |
const errMsg = result.reason?.message || 'Fetch error';
|
|
@@ -237,6 +237,29 @@ async function runScan(jobId: string, scanRequest: ScanRequest, userId: string,
|
|
| 237 |
}
|
| 238 |
}
|
| 239 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
// Phase 3: AI parsing with concurrency limit
|
| 241 |
for (let j = 0; j < emails.length; j += CONCURRENT_PARSE) {
|
| 242 |
const parseChunk = emails.slice(j, j + CONCURRENT_PARSE);
|
|
|
|
| 209 |
const batch = newIds.slice(i, i + BATCH_SIZE);
|
| 210 |
|
| 211 |
// Fetch emails with concurrency limit
|
| 212 |
+
const fetchedEmails: EmailMessage[] = [];
|
| 213 |
for (let j = 0; j < batch.length; j += CONCURRENT_FETCH) {
|
| 214 |
const chunk = batch.slice(j, j + CONCURRENT_FETCH);
|
| 215 |
const results = await Promise.allSettled(
|
|
|
|
| 218 |
|
| 219 |
for (const result of results) {
|
| 220 |
if (result.status === 'fulfilled') {
|
| 221 |
+
fetchedEmails.push(result.value);
|
| 222 |
} else {
|
| 223 |
jobState.progress.emailsErrored++;
|
| 224 |
const errMsg = result.reason?.message || 'Fetch error';
|
|
|
|
| 237 |
}
|
| 238 |
}
|
| 239 |
|
| 240 |
+
// Post-fetch date filter: Gmail only filters by day, so we enforce exact range here
|
| 241 |
+
const rangeStart = new Date(dateRange.startDate).getTime();
|
| 242 |
+
const rangeEnd = new Date(dateRange.endDate).getTime();
|
| 243 |
+
const emails: EmailMessage[] = [];
|
| 244 |
+
|
| 245 |
+
for (const email of fetchedEmails) {
|
| 246 |
+
const emailTime = new Date(email.date).getTime();
|
| 247 |
+
if (emailTime >= rangeStart && emailTime <= rangeEnd) {
|
| 248 |
+
emails.push(email);
|
| 249 |
+
} else {
|
| 250 |
+
jobState.progress.emailsSkipped++;
|
| 251 |
+
logger.info({ emailId: email.emailId, emailDate: email.date, rangeStart: dateRange.startDate, rangeEnd: dateRange.endDate }, 'Skipping email outside date range');
|
| 252 |
+
|
| 253 |
+
emitToUser(userId, 'scan:progress', {
|
| 254 |
+
jobId,
|
| 255 |
+
processed: jobState.progress.emailsProcessed,
|
| 256 |
+
total: newIds.length,
|
| 257 |
+
skipped: jobState.progress.emailsSkipped,
|
| 258 |
+
errored: jobState.progress.emailsErrored,
|
| 259 |
+
});
|
| 260 |
+
}
|
| 261 |
+
}
|
| 262 |
+
|
| 263 |
// Phase 3: AI parsing with concurrency limit
|
| 264 |
for (let j = 0; j < emails.length; j += CONCURRENT_PARSE) {
|
| 265 |
const parseChunk = emails.slice(j, j + CONCURRENT_PARSE);
|