|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import { SystemMessage, HumanMessage, Runnable, LlamaCppLLM } from './src/index.js';
|
|
|
import { BaseCallback } from './src/utils/callbacks.js';
|
|
|
import { readFileSync } from 'fs';
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const CATEGORIES = {
|
|
|
SPAM: 'Spam',
|
|
|
INVOICE: 'Invoice',
|
|
|
MEETING: 'Meeting Request',
|
|
|
URGENT: 'Urgent',
|
|
|
PERSONAL: 'Personal',
|
|
|
OTHER: 'Other'
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class EmailParserRunnable extends Runnable {
|
|
|
async _call(input, config) {
|
|
|
|
|
|
if (!input.subject || !input.body || !input.from) {
|
|
|
throw new Error('Email must have subject, body, and from fields');
|
|
|
}
|
|
|
|
|
|
|
|
|
return {
|
|
|
subject: input.subject.trim(),
|
|
|
body: input.body.trim(),
|
|
|
from: input.from.trim(),
|
|
|
timestamp: new Date().toISOString()
|
|
|
};
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class EmailClassifierRunnable extends Runnable {
|
|
|
constructor(llm) {
|
|
|
super();
|
|
|
this.llm = llm;
|
|
|
}
|
|
|
|
|
|
async _call(input, config) {
|
|
|
|
|
|
const messages = this._buildPrompt(input);
|
|
|
|
|
|
|
|
|
const response = await this.llm.invoke(messages, config);
|
|
|
|
|
|
|
|
|
const classification = this._parseClassification(response.content);
|
|
|
|
|
|
|
|
|
return {
|
|
|
...input,
|
|
|
category: classification.category,
|
|
|
confidence: classification.confidence,
|
|
|
reason: classification.reason
|
|
|
};
|
|
|
}
|
|
|
|
|
|
_buildPrompt(email) {
|
|
|
const systemPrompt = new SystemMessage(`You are an email classification assistant. Your task is to classify emails into one of these categories:
|
|
|
|
|
|
Categories:
|
|
|
- Spam: Unsolicited promotional emails, advertisements with excessive punctuation/caps, phishing attempts, scams
|
|
|
- Invoice: Bills, payment requests, financial documents, receipts
|
|
|
- Meeting Request: Meeting invitations, calendar requests, scheduling, availability inquiries
|
|
|
- Urgent: Time-sensitive matters requiring immediate attention, security alerts, critical notifications
|
|
|
- Personal: Personal correspondence from friends/family (look for personal tone and familiar email addresses)
|
|
|
- Other: Legitimate newsletters, updates, informational content, everything else that doesn't fit above
|
|
|
|
|
|
Important distinctions:
|
|
|
- Legitimate newsletters (tech updates, subscriptions) should be "Other", not Spam
|
|
|
- Spam has excessive punctuation (!!!, ALL CAPS), pushy language, or suspicious intent
|
|
|
- Personal emails have familiar sender addresses and casual tone
|
|
|
|
|
|
Respond in this exact JSON format:
|
|
|
{
|
|
|
"category": "Category Name",
|
|
|
"confidence": 0.95,
|
|
|
"reason": "Brief explanation"
|
|
|
}
|
|
|
|
|
|
Confidence should be between 0 and 1.`);
|
|
|
|
|
|
const userPrompt = new HumanMessage(`Classify this email:
|
|
|
|
|
|
From: ${email.from}
|
|
|
Subject: ${email.subject}
|
|
|
Body: ${email.body}
|
|
|
|
|
|
Provide your classification in JSON format.`);
|
|
|
|
|
|
return [systemPrompt, userPrompt];
|
|
|
}
|
|
|
|
|
|
_parseClassification(response) {
|
|
|
try {
|
|
|
|
|
|
const jsonMatch = response.match(/\{[\s\S]*\}/);
|
|
|
if (!jsonMatch) {
|
|
|
throw new Error('No JSON found in response');
|
|
|
}
|
|
|
|
|
|
const parsed = JSON.parse(jsonMatch[0]);
|
|
|
|
|
|
|
|
|
if (!parsed.category || parsed.confidence === undefined || !parsed.reason) {
|
|
|
throw new Error('Invalid classification format');
|
|
|
}
|
|
|
|
|
|
|
|
|
const confidence = Math.max(0, Math.min(1, parseFloat(parsed.confidence)));
|
|
|
|
|
|
return {
|
|
|
category: parsed.category,
|
|
|
confidence: confidence,
|
|
|
reason: parsed.reason
|
|
|
};
|
|
|
} catch (error) {
|
|
|
|
|
|
console.warn('Failed to parse LLM response, using fallback:', error.message);
|
|
|
return {
|
|
|
category: CATEGORIES.OTHER,
|
|
|
confidence: 0.5,
|
|
|
reason: 'Failed to parse classification'
|
|
|
};
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ClassificationHistoryCallback extends BaseCallback {
|
|
|
constructor() {
|
|
|
super();
|
|
|
this.history = [];
|
|
|
}
|
|
|
|
|
|
async onEnd(runnable, output, config) {
|
|
|
|
|
|
if (runnable.name === 'EmailClassifierRunnable' && output.category) {
|
|
|
this.history.push({
|
|
|
timestamp: output.timestamp,
|
|
|
from: output.from,
|
|
|
subject: output.subject,
|
|
|
category: output.category,
|
|
|
confidence: output.confidence,
|
|
|
reason: output.reason
|
|
|
});
|
|
|
}
|
|
|
}
|
|
|
|
|
|
getHistory() {
|
|
|
return this.history;
|
|
|
}
|
|
|
|
|
|
getStatistics() {
|
|
|
if (this.history.length === 0) {
|
|
|
return {
|
|
|
total: 0,
|
|
|
byCategory: {},
|
|
|
averageConfidence: 0
|
|
|
};
|
|
|
}
|
|
|
|
|
|
|
|
|
const byCategory = {};
|
|
|
let totalConfidence = 0;
|
|
|
|
|
|
for (const entry of this.history) {
|
|
|
byCategory[entry.category] = (byCategory[entry.category] || 0) + 1;
|
|
|
totalConfidence += entry.confidence;
|
|
|
}
|
|
|
|
|
|
return {
|
|
|
total: this.history.length,
|
|
|
byCategory: byCategory,
|
|
|
averageConfidence: totalConfidence / this.history.length
|
|
|
};
|
|
|
}
|
|
|
|
|
|
printHistory() {
|
|
|
console.log('\nπ§ Classification History:');
|
|
|
console.log('β'.repeat(70));
|
|
|
|
|
|
for (const entry of this.history) {
|
|
|
console.log(`\nβοΈ From: ${entry.from}`);
|
|
|
console.log(` Subject: ${entry.subject}`);
|
|
|
console.log(` Category: ${entry.category}`);
|
|
|
console.log(` Confidence: ${(entry.confidence * 100).toFixed(1)}%`);
|
|
|
console.log(` Reason: ${entry.reason}`);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
printStatistics() {
|
|
|
const stats = this.getStatistics();
|
|
|
|
|
|
console.log('\nπ Classification Statistics:');
|
|
|
console.log('β'.repeat(70));
|
|
|
console.log(`Total Emails: ${stats.total}\n`);
|
|
|
|
|
|
if (stats.total > 0) {
|
|
|
console.log('By Category:');
|
|
|
for (const [category, count] of Object.entries(stats.byCategory)) {
|
|
|
const percentage = ((count / stats.total) * 100).toFixed(1);
|
|
|
console.log(` ${category}: ${count} (${percentage}%)`);
|
|
|
}
|
|
|
|
|
|
console.log(`\nAverage Confidence: ${(stats.averageConfidence * 100).toFixed(1)}%`);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class EmailClassificationPipeline {
|
|
|
constructor(llm) {
|
|
|
this.parser = new EmailParserRunnable();
|
|
|
this.classifier = new EmailClassifierRunnable(llm);
|
|
|
this.historyCallback = new ClassificationHistoryCallback();
|
|
|
|
|
|
|
|
|
this.pipeline = this.parser.pipe(this.classifier);
|
|
|
}
|
|
|
|
|
|
async classify(email) {
|
|
|
|
|
|
const config = {
|
|
|
callbacks: [this.historyCallback]
|
|
|
};
|
|
|
|
|
|
return await this.pipeline.invoke(email, config);
|
|
|
}
|
|
|
|
|
|
getHistory() {
|
|
|
return this.historyCallback.getHistory();
|
|
|
}
|
|
|
|
|
|
getStatistics() {
|
|
|
return this.historyCallback.getStatistics();
|
|
|
}
|
|
|
|
|
|
printHistory() {
|
|
|
this.historyCallback.printHistory();
|
|
|
}
|
|
|
|
|
|
printStatistics() {
|
|
|
this.historyCallback.printStatistics();
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const TEST_EMAILS = JSON.parse(
|
|
|
readFileSync(new URL('./test-emails.json', import.meta.url), 'utf-8')
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async function main() {
|
|
|
console.log('=== Part 1 Capstone: Smart Email Classifier ===\n');
|
|
|
|
|
|
|
|
|
const llm = new LlamaCppLLM({
|
|
|
modelPath: './models/Qwen3-1.7B-Q8_0.gguf',
|
|
|
temperature: 0.1,
|
|
|
maxTokens: 200
|
|
|
});
|
|
|
|
|
|
|
|
|
const pipeline = new EmailClassificationPipeline(llm);
|
|
|
|
|
|
console.log('π¬ Processing emails...\n');
|
|
|
|
|
|
|
|
|
for (const email of TEST_EMAILS) {
|
|
|
try {
|
|
|
const result = await pipeline.classify(email);
|
|
|
|
|
|
console.log(`βοΈ Email from: ${result.from}`);
|
|
|
console.log(` Subject: ${result.subject}`);
|
|
|
console.log(` Category: ${result.category}`);
|
|
|
console.log(` Confidence: ${(result.confidence * 100).toFixed(1)}%`);
|
|
|
console.log(` Reason: ${result.reason}\n`);
|
|
|
} catch (error) {
|
|
|
console.error(`β Error classifying email from ${email.from}:`, error.message);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
pipeline.printHistory();
|
|
|
pipeline.printStatistics();
|
|
|
|
|
|
|
|
|
await llm.dispose();
|
|
|
|
|
|
console.log('\nβ Capstone Project Complete!');
|
|
|
}
|
|
|
|
|
|
|
|
|
main().catch(console.error);
|
|
|
|