Spaces:

lenzcom
/

Email

Running

App Files Files Community

Email / run_classifier.js

lenzcom's picture

Upload folder using huggingface_hub

e706de2 verified about 18 hours ago

history blame contribute delete

11.5 kB

	/**
	* Part 1 Capstone Solution: Smart Email Classifier
	*
	* Build an AI system that organizes your inbox by classifying emails into categories.
	*
	* Skills Used:
	* - Runnables for processing pipeline
	* - Messages for structured classification
	* - LLM wrapper for flexible model switching
	* - Context for classification history
	*
	* Difficulty: ⭐⭐☆☆☆
	*/

	import { SystemMessage, HumanMessage, Runnable, LlamaCppLLM } from './src/index.js';
	import { BaseCallback } from './src/utils/callbacks.js';
	import { readFileSync } from 'fs';

	// ============================================================================
	// EMAIL CLASSIFICATION CATEGORIES
	// ============================================================================

	const CATEGORIES = {
	SPAM: 'Spam',
	INVOICE: 'Invoice',
	MEETING: 'Meeting Request',
	URGENT: 'Urgent',
	PERSONAL: 'Personal',
	OTHER: 'Other'
	};

	// ============================================================================
	// Email Parser Runnable
	// ============================================================================

	/**
	* Parses raw email text into structured format
	*
	* Input: { subject: string, body: string, from: string }
	* Output: { subject, body, from, timestamp }
	*/
	class EmailParserRunnable extends Runnable {
	async _call(input, config) {
	// Validate required fields
	if (!input.subject \|\| !input.body \|\| !input.from) {
	throw new Error('Email must have subject, body, and from fields');
	}

	// Parse and structure the email
	return {
	subject: input.subject.trim(),
	body: input.body.trim(),
	from: input.from.trim(),
	timestamp: new Date().toISOString()
	};
	}
	}

	// ============================================================================
	// Email Classifier Runnable
	// ============================================================================

	/**
	* Classifies email using LLM
	*
	* Input: { subject, body, from, timestamp }
	* Output: { ...email, category, confidence, reason }
	*/
	class EmailClassifierRunnable extends Runnable {
	constructor(llm) {
	super();
	this.llm = llm;
	}

	async _call(input, config) {
	// Build the classification prompt
	const messages = this._buildPrompt(input);

	// Call the LLM
	const response = await this.llm.invoke(messages, config);

	// Parse the LLM response
	const classification = this._parseClassification(response.content);

	// Return email with classification
	return {
	...input,
	category: classification.category,
	confidence: classification.confidence,
	reason: classification.reason
	};
	}

	_buildPrompt(email) {
	const systemPrompt = new SystemMessage(`You are an email classification assistant. Your task is to classify emails into one of these categories:

	Categories:
	- Spam: Unsolicited promotional emails, advertisements with excessive punctuation/caps, phishing attempts, scams
	- Invoice: Bills, payment requests, financial documents, receipts
	- Meeting Request: Meeting invitations, calendar requests, scheduling, availability inquiries
	- Urgent: Time-sensitive matters requiring immediate attention, security alerts, critical notifications
	- Personal: Personal correspondence from friends/family (look for personal tone and familiar email addresses)
	- Other: Legitimate newsletters, updates, informational content, everything else that doesn't fit above

	Important distinctions:
	- Legitimate newsletters (tech updates, subscriptions) should be "Other", not Spam
	- Spam has excessive punctuation (!!!, ALL CAPS), pushy language, or suspicious intent
	- Personal emails have familiar sender addresses and casual tone

	Respond in this exact JSON format:
	{
	"category": "Category Name",
	"confidence": 0.95,
	"reason": "Brief explanation"
	}

	Confidence should be between 0 and 1.`);

	const userPrompt = new HumanMessage(`Classify this email:

	From: ${email.from}
	Subject: ${email.subject}
	Body: ${email.body}

	Provide your classification in JSON format.`);

	return [systemPrompt, userPrompt];
	}

	_parseClassification(response) {
	try {
	// Try to find JSON in the response
	const jsonMatch = response.match(/\{[\s\S]*\}/);
	if (!jsonMatch) {
	throw new Error('No JSON found in response');
	}

	const parsed = JSON.parse(jsonMatch[0]);

	// Validate the parsed response
	if (!parsed.category \|\| parsed.confidence === undefined \|\| !parsed.reason) {
	throw new Error('Invalid classification format');
	}

	// Ensure confidence is a number between 0 and 1
	const confidence = Math.max(0, Math.min(1, parseFloat(parsed.confidence)));

	return {
	category: parsed.category,
	confidence: confidence,
	reason: parsed.reason
	};
	} catch (error) {
	// Fallback classification if parsing fails
	console.warn('Failed to parse LLM response, using fallback:', error.message);
	return {
	category: CATEGORIES.OTHER,
	confidence: 0.5,
	reason: 'Failed to parse classification'
	};
	}
	}
	}

	// ============================================================================
	// Classification History Callback
	// ============================================================================

	/**
	* Tracks classification history using callbacks
	*/
	class ClassificationHistoryCallback extends BaseCallback {
	constructor() {
	super();
	this.history = [];
	}

	async onEnd(runnable, output, config) {
	// Only track EmailClassifierRunnable results
	if (runnable.name === 'EmailClassifierRunnable' && output.category) {
	this.history.push({
	timestamp: output.timestamp,
	from: output.from,
	subject: output.subject,
	category: output.category,
	confidence: output.confidence,
	reason: output.reason
	});
	}
	}

	getHistory() {
	return this.history;
	}

	getStatistics() {
	if (this.history.length === 0) {
	return {
	total: 0,
	byCategory: {},
	averageConfidence: 0
	};
	}

	// Count by category
	const byCategory = {};
	let totalConfidence = 0;

	for (const entry of this.history) {
	byCategory[entry.category] = (byCategory[entry.category] \|\| 0) + 1;
	totalConfidence += entry.confidence;
	}

	return {
	total: this.history.length,
	byCategory: byCategory,
	averageConfidence: totalConfidence / this.history.length
	};
	}

	printHistory() {
	console.log('\n📧 Classification History:');
	console.log('─'.repeat(70));

	for (const entry of this.history) {
	console.log(`\n✉️ From: ${entry.from}`);
	console.log(` Subject: ${entry.subject}`);
	console.log(` Category: ${entry.category}`);
	console.log(` Confidence: ${(entry.confidence * 100).toFixed(1)}%`);
	console.log(` Reason: ${entry.reason}`);
	}
	}

	printStatistics() {
	const stats = this.getStatistics();

	console.log('\n📊 Classification Statistics:');
	console.log('─'.repeat(70));
	console.log(`Total Emails: ${stats.total}\n`);

	if (stats.total > 0) {
	console.log('By Category:');
	for (const [category, count] of Object.entries(stats.byCategory)) {
	const percentage = ((count / stats.total) * 100).toFixed(1);
	console.log(` ${category}: ${count} (${percentage}%)`);
	}

	console.log(`\nAverage Confidence: ${(stats.averageConfidence * 100).toFixed(1)}%`);
	}
	}
	}

	// ============================================================================
	// Email Classification Pipeline
	// ============================================================================

	/**
	* Complete pipeline: Parse → Classify → Store
	*/
	class EmailClassificationPipeline {
	constructor(llm) {
	this.parser = new EmailParserRunnable();
	this.classifier = new EmailClassifierRunnable(llm);
	this.historyCallback = new ClassificationHistoryCallback();

	// Build the pipeline: parser -> classifier
	this.pipeline = this.parser.pipe(this.classifier);
	}

	async classify(email) {
	// Run the email through the pipeline with history callback
	const config = {
	callbacks: [this.historyCallback]
	};

	return await this.pipeline.invoke(email, config);
	}

	getHistory() {
	return this.historyCallback.getHistory();
	}

	getStatistics() {
	return this.historyCallback.getStatistics();
	}

	printHistory() {
	this.historyCallback.printHistory();
	}

	printStatistics() {
	this.historyCallback.printStatistics();
	}
	}

	// ============================================================================
	// TEST DATA
	// ============================================================================

	const TEST_EMAILS = JSON.parse(
	readFileSync(new URL('./test-emails.json', import.meta.url), 'utf-8')
	);

	// ============================================================================
	// MAIN FUNCTION
	// ============================================================================

	async function main() {
	console.log('=== Part 1 Capstone: Smart Email Classifier ===\n');

	// Initialize the LLM
	const llm = new LlamaCppLLM({
	modelPath: './models/Qwen3-1.7B-Q8_0.gguf', // Adjust to your model
	temperature: 0.1, // Low temperature for consistent classification
	maxTokens: 200
	});

	// Create the classification pipeline
	const pipeline = new EmailClassificationPipeline(llm);

	console.log('📬 Processing emails...\n');

	// Classify each test email
	for (const email of TEST_EMAILS) {
	try {
	const result = await pipeline.classify(email);

	console.log(`✉️ Email from: ${result.from}`);
	console.log(` Subject: ${result.subject}`);
	console.log(` Category: ${result.category}`);
	console.log(` Confidence: ${(result.confidence * 100).toFixed(1)}%`);
	console.log(` Reason: ${result.reason}\n`);
	} catch (error) {
	console.error(`❌ Error classifying email from ${email.from}:`, error.message);
	}
	}

	// Print history and statistics
	pipeline.printHistory();
	pipeline.printStatistics();

	// Cleanup
	await llm.dispose();

	console.log('\n✓ Capstone Project Complete!');
	}

	// Run the project
	main().catch(console.error);