Heaven K
fix: AI must only extract actual sender message, not org names
4f1f8fe
import type { ParsedTransaction } from '@icc/shared';
import { AIProviderFactory } from '../providers/index.js';
import { AIProviderPool } from '../providers/pool.js';
export interface AIProvider {
name: string;
parse(emailBody: string): Promise<ParsedTransaction>;
}
export const EXTRACTION_PROMPT = `You are a financial data extraction assistant. Given the raw text/HTML of an Interac e-Transfer notification email from notify@payments.interac.ca, extract the following fields into a JSON object:
- sender: The name of the person who sent the money
- amount: The dollar amount (numeric, no $ sign)
- currency: Always "CAD"
- reference: The Interac reference number
- message: ONLY the personal message or memo written by the sender (sometimes labeled "Message:" or "dime" in French in the email). This is a short note the sender typed when sending the transfer. Do NOT confuse it with the organization name, email subject, sender name, or any other text in the email. If there is no explicit personal message from the sender, set this to null.
- recipient_email: The email address the transfer was sent TO (the ICC branch email)
- date: The date/time of the transfer in ISO 8601 format
- status: One of "deposited", "pending", "expired", "cancelled"
Rules:
- NEVER translate any text. Extract names, messages, emails, and all fields EXACTLY as they appear in the original email. Keep French, English, or any other language as-is. For example "Trois-Rivières" must stay "Trois-Rivières", NOT "Three Rivers".
- If a field is not found, set it to null.
- The amount must be a number (e.g., 150.00 not "$150.00").
- If the email is NOT an Interac e-Transfer notification, return all fields as null with amount as 0.
- Return ONLY valid JSON, no markdown, no backticks, no explanation.`;
/** Strip HTML tags, decode entities, collapse whitespace, and truncate to fit within token limits */
export function cleanEmailBody(html: string, maxChars = 3000): string {
let text = html;
// Remove <style>, <script> blocks entirely
text = text.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '');
text = text.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '');
// Replace <br>, <p>, <div>, <tr>, <li> with newlines
text = text.replace(/<\s*(br|\/p|\/div|\/tr|\/li)[^>]*>/gi, '\n');
// Remove all remaining HTML tags
text = text.replace(/<[^>]+>/g, ' ');
// Decode common HTML entities
text = text.replace(/&nbsp;/gi, ' ')
.replace(/&amp;/gi, '&')
.replace(/&lt;/gi, '<')
.replace(/&gt;/gi, '>')
.replace(/&quot;/gi, '"')
.replace(/&#39;/gi, "'")
.replace(/&#(\d+);/g, (_, code) => String.fromCharCode(Number(code)));
// Collapse multiple whitespace/newlines
text = text.replace(/[ \t]+/g, ' ').replace(/\n{3,}/g, '\n\n').trim();
// Truncate
if (text.length > maxChars) {
text = text.slice(0, maxChars) + '\n...[truncated]';
}
return text;
}
let _pool: AIProviderPool | null = null;
function getPool(): AIProviderPool {
if (!_pool) {
_pool = AIProviderFactory.createFreePool();
}
return _pool;
}
export class AIService {
/**
* Parse an Interac email using the auto-switching provider pool.
* Automatically rotates between Groq and Mistral free-tier models on rate limits.
*/
static async parseEmail(emailBody: string): Promise<ParsedTransaction> {
const pool = getPool();
const cleaned = cleanEmailBody(emailBody);
return pool.parse(cleaned);
}
/**
* Parse using a specific single provider (no auto-switching).
*/
static async parseEmailWith(providerName: string, emailBody: string): Promise<ParsedTransaction> {
const provider = AIProviderFactory.create(providerName);
const cleaned = cleanEmailBody(emailBody);
return provider.parse(cleaned);
}
/** Get status of all provider slots (for the dashboard widget) */
static getProviderStatus() {
const pool = getPool();
return pool.getStatus();
}
/** Reset daily counters (call at midnight UTC) */
static resetDailyCounters() {
const pool = getPool();
pool.resetDaily();
}
}