MichaelEdou
Fix Gmail scan date range: stop scanning emails from day before start date
d2cc9d2
import { google } from 'googleapis';
import type { ScanDateRange } from '@icc/shared';
import { db } from '../db/index.js';
import { users } from '../db/schema.js';
import { eq } from 'drizzle-orm';
import { config } from '../config/env.js';
export interface EmailMessage {
emailId: string;
body: string;
bodyHtml: string;
subject: string;
from: string;
to: string;
date: string;
}
export function buildGmailQuery(dateRange: ScanDateRange): string {
const start = new Date(dateRange.startDate);
const end = new Date(dateRange.endDate);
// Gmail after: is inclusive (includes emails from that date onward).
// Gmail before: is exclusive (excludes the specified date).
// So we use the start date as-is and add 1 day to the end date.
const afterDate = new Date(start);
const beforeDate = new Date(end);
beforeDate.setDate(beforeDate.getDate() + 1);
const fmt = (d: Date) => `${d.getFullYear()}/${d.getMonth() + 1}/${d.getDate()}`;
// Broader query: match Interac notification sender OR subject keywords
return `(from:notify@payments.interac.ca OR from:interac.ca OR subject:"virement Interac" OR subject:"Interac e-Transfer") after:${fmt(afterDate)} before:${fmt(beforeDate)}`;
}
function getOAuth2Client() {
return new google.auth.OAuth2(
config.VITE_GOOGLE_CLIENT_ID,
config.GOOGLE_CLIENT_SECRET,
`http://localhost:${config.PORT}/api/auth/google/callback`
);
}
async function getAuthenticatedClient(userId: string) {
const user = await db.select().from(users).where(eq(users.id, userId)).get();
if (!user || !user.accessToken) {
throw new Error('Vous devez vous connecter via Google OAuth pour scanner vos courriels. Aucun token trouvé pour votre compte.');
}
const oauth2Client = getOAuth2Client();
oauth2Client.setCredentials({
access_token: user.accessToken,
refresh_token: user.refreshToken,
expiry_date: user.tokenExpires ? new Date(user.tokenExpires).getTime() : undefined,
});
// Auto-refresh token if expired
oauth2Client.on('tokens', async (tokens) => {
await db.update(users).set({
accessToken: tokens.access_token ?? user.accessToken,
refreshToken: tokens.refresh_token ?? user.refreshToken,
tokenExpires: tokens.expiry_date ? new Date(tokens.expiry_date).toISOString() : user.tokenExpires,
updatedAt: new Date().toISOString(),
}).where(eq(users.id, userId));
});
return oauth2Client;
}
export async function fetchAllMessageIds(userId: string, query: string): Promise<string[]> {
const auth = await getAuthenticatedClient(userId);
const gmail = google.gmail({ version: 'v1', auth });
const messageIds: string[] = [];
let pageToken: string | undefined;
do {
const response = await gmail.users.messages.list({
userId: 'me',
q: query,
maxResults: 500,
pageToken,
});
if (response.data.messages) {
for (const msg of response.data.messages) {
if (msg.id) messageIds.push(msg.id);
}
}
pageToken = response.data.nextPageToken ?? undefined;
} while (pageToken);
return messageIds;
}
export async function fetchMessage(userId: string, messageId: string): Promise<EmailMessage> {
const auth = await getAuthenticatedClient(userId);
const gmail = google.gmail({ version: 'v1', auth });
const response = await gmail.users.messages.get({
userId: 'me',
id: messageId,
format: 'full',
});
const headers = response.data.payload?.headers ?? [];
const subject = headers.find(h => h.name?.toLowerCase() === 'subject')?.value ?? '';
const from = headers.find(h => h.name?.toLowerCase() === 'from')?.value ?? '';
const to = headers.find(h => h.name?.toLowerCase() === 'to')?.value ?? '';
const date = headers.find(h => h.name?.toLowerCase() === 'date')?.value ?? '';
const { body, bodyHtml } = extractBodies(response.data.payload);
return { emailId: messageId, body, bodyHtml, subject, from, to, date };
}
/** Extract both text and HTML bodies from the email payload */
function extractBodies(payload: any): { body: string; bodyHtml: string } {
if (!payload) return { body: '', bodyHtml: '' };
// Single-part email — check mimeType to categorize
if (payload.body?.data && !payload.parts) {
const decoded = decodeBase64Url(payload.body.data);
if (payload.mimeType === 'text/html') {
return { body: stripHtml(decoded), bodyHtml: decoded };
}
return { body: decoded, bodyHtml: '' };
}
// Multipart email — extract both text/plain and text/html
if (payload.parts) {
let bodyText = '';
let bodyHtml = '';
const textPart = findPart(payload.parts, 'text/plain');
if (textPart?.body?.data) {
bodyText = decodeBase64Url(textPart.body.data);
}
const htmlPart = findPart(payload.parts, 'text/html');
if (htmlPart?.body?.data) {
bodyHtml = decodeBase64Url(htmlPart.body.data);
}
// If no text/plain found, derive from HTML
if (!bodyText && bodyHtml) {
bodyText = stripHtml(bodyHtml);
}
// If we found something, return it
if (bodyText || bodyHtml) {
return { body: bodyText, bodyHtml };
}
// Recurse into nested multipart
for (const part of payload.parts) {
if (part.parts) {
const nested = extractBodies(part);
if (nested.body || nested.bodyHtml) return nested;
}
}
}
return { body: '', bodyHtml: '' };
}
/** Simple HTML tag stripper for text fallback */
function stripHtml(html: string): string {
return html
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
.replace(/<[^>]+>/g, ' ')
.replace(/&nbsp;/g, ' ')
.replace(/&amp;/g, '&')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/\s+/g, ' ')
.trim();
}
function findPart(parts: any[], mimeType: string): any | undefined {
for (const part of parts) {
if (part.mimeType === mimeType) return part;
if (part.parts) {
const found = findPart(part.parts, mimeType);
if (found) return found;
}
}
return undefined;
}
function decodeBase64Url(data: string): string {
const base64 = data.replace(/-/g, '+').replace(/_/g, '/');
return Buffer.from(base64, 'base64').toString('utf-8');
}