/** * Email Classifier — 3-Tier Decision System * * Tier 1: Hard REJECT (noreply, support, jobs → instant discard) * Tier 2: LLM Context Check (operations, admin, system → depends on company size/industry) * Tier 3: High confidence KEEP (personal format, ceo@, partnerships@) * * Key insight: admin@ at a 5-person dental clinic reaches the owner. * admin@ at a 500-person corp reaches an assistant. Context matters. */ import { callLLM } from "../../shared/llm/nvidia-client"; import { SYSTEM_PROMPTS, buildEmailClassifyPrompt } from "../../shared/llm/prompts"; import { MODELS } from "../../shared/llm/nvidia-client"; import { logger } from "../../shared/utils/logger"; export type EmailTier = "reject" | "context_check" | "keep"; export type EmailVerdict = "personal" | "authority" | "context_verified" | "outsourcing" | "rejected"; export interface ClassificationResult { email: string; tier: EmailTier; verdict: EmailVerdict; confidence: number; likelyReaches: string; reason: string; } // ─── Tier 1: ALWAYS REJECT ────────────────────────────────── const HARD_REJECT_PREFIXES = new Set([ // Automated / system "noreply", "no-reply", "no_reply", "donotreply", "do-not-reply", "notifications", "automated", "bounces", "mailer", "postmaster", "unsubscribe", "spam", "abuse", // Support (never reaches decision-maker) "support", "helpdesk", "tickets", "complaints", "feedback", // Jobs (irrelevant) "jobs", "careers", "apply", "recruitment", "hiring", "talent", ]); // ─── Tier 2: CONTEXT-DEPENDENT (LLM decides) ──────────────── const CONTEXT_CHECK_PREFIXES = new Set([ "operations", "admin", "system", "info", "office", "hello", "contact", "enquiries", "general", "team", "accounts", "finance", "billing", "sales", "marketing", "hr", "legal", "compliance", "reception", "manager", ]); // ─── Tier 3: HIGH CONFIDENCE KEEP ─────────────────────────── const AUTHORITY_PREFIXES = new Set([ "ceo", "owner", "founder", "president", "cto", "coo", "partner", "principal", "director", "md", "gm", "head", ]); const OUTSOURCING_PREFIXES = new Set([ "partnerships", "vendors", "procurement", "outsource", "collaborate", "projects", "business", "growth", ]); // ─── Personal email pattern (firstname, firstname.lastname) ─ const PERSONAL_PATTERN = /^[a-z]{2,}(\.[a-z]{2,})?$/; const INITIAL_PATTERN = /^[a-z]\.[a-z]{2,}$/; // j.smith /** * Main classifier — determines if email is worth pursuing. */ export async function classifyEmail( email: string, companyContext: { name: string; employeeCount: number | null; industry: string; websiteSnippet: string; }, traceId: string ): Promise { const prefix = email.split("@")[0].toLowerCase().replace(/[^a-z]/g, ""); const fullPrefix = email.split("@")[0].toLowerCase(); // ── Tier 1: Hard reject ──────────────────────────────────── if (HARD_REJECT_PREFIXES.has(prefix)) { return { email, tier: "reject", verdict: "rejected", confidence: 1.0, likelyReaches: "automated inbox or department queue", reason: `"${fullPrefix}@" is a known non-personal email type`, }; } // ── Tier 3: Personal format → instant keep ───────────────── if (PERSONAL_PATTERN.test(fullPrefix) || INITIAL_PATTERN.test(fullPrefix)) { return { email, tier: "keep", verdict: "personal", confidence: 0.95, likelyReaches: "individual person (personal email format)", reason: `"${fullPrefix}@" matches personal email pattern`, }; } // ── Tier 3: Authority prefix → instant keep ──────────────── if (AUTHORITY_PREFIXES.has(prefix)) { return { email, tier: "keep", verdict: "authority", confidence: 0.90, likelyReaches: `${prefix.toUpperCase()} or equivalent executive`, reason: `"${fullPrefix}@" is a known decision-maker prefix`, }; } // ── Tier 3: Outsourcing signal → keep ────────────────────── if (OUTSOURCING_PREFIXES.has(prefix)) { return { email, tier: "keep", verdict: "outsourcing", confidence: 0.80, likelyReaches: "vendor/partnership manager (purchasing authority likely)", reason: `"${fullPrefix}@" signals company outsources services`, }; } // ── Tier 2: Context check needed → ask LLM ──────────────── if (CONTEXT_CHECK_PREFIXES.has(prefix)) { return contextCheckWithLLM(email, companyContext, traceId); } // ── Unknown prefix → default to LLM context check ───────── return contextCheckWithLLM(email, companyContext, traceId); } /** * LLM-powered context check for ambiguous email prefixes. * Uses FAST model (8B) to save tokens — this is a simple classification. */ async function contextCheckWithLLM( email: string, context: { name: string; employeeCount: number | null; industry: string; websiteSnippet: string; }, traceId: string ): Promise { try { const response = await callLLM({ operation: "email_classify", model: MODELS.FAST, // 8B model — fast + cheap for simple classification systemPrompt: SYSTEM_PROMPTS.EMAIL_CLASSIFIER, userPrompt: buildEmailClassifyPrompt({ email, company_name: context.name, company_size: context.employeeCount, industry: context.industry, website_snippet: context.websiteSnippet, }), temperature: 0.1, maxTokens: 200, jsonMode: true, traceId, }); if (response.parsed) { const keep = response.parsed.keep === true; const confidence = Number(response.parsed.confidence ?? 0.5); return { email, tier: "context_check", verdict: keep ? "context_verified" : "rejected", confidence, likelyReaches: String(response.parsed.likely_reaches ?? "unknown"), reason: String(response.parsed.reason ?? "LLM context check"), }; } // LLM failed to respond properly → conservative: keep it, low confidence return { email, tier: "context_check", verdict: "context_verified", confidence: 0.5, likelyReaches: "unknown — LLM parse failed", reason: "LLM context check failed — keeping with low confidence", }; } catch (err) { logger.warn({ email, err }, "Email LLM classify failed — keeping conservatively"); // Fallback: rule-based size heuristic const isSmall = (context.employeeCount ?? 0) < 30; return { email, tier: "context_check", verdict: isSmall ? "context_verified" : "rejected", confidence: 0.4, likelyReaches: isSmall ? "likely owner/manager (small company)" : "likely department inbox (large company)", reason: `Fallback: company size ${context.employeeCount ?? "unknown"} → ${isSmall ? "small=keep" : "large=reject"}`, }; } }