Spaces:
Running
Running
| /** | |
| * Email Classifier β 3-Tier Decision System | |
| * | |
| * Tier 1: Hard REJECT (noreply, support, jobs β instant discard) | |
| * Tier 2: LLM Context Check (operations, admin, system β depends on company size/industry) | |
| * Tier 3: High confidence KEEP (personal format, ceo@, partnerships@) | |
| * | |
| * Key insight: admin@ at a 5-person dental clinic reaches the owner. | |
| * admin@ at a 500-person corp reaches an assistant. Context matters. | |
| */ | |
| import { callLLM } from "../../shared/llm/nvidia-client"; | |
| import { SYSTEM_PROMPTS, buildEmailClassifyPrompt } from "../../shared/llm/prompts"; | |
| import { MODELS } from "../../shared/llm/nvidia-client"; | |
| import { logger } from "../../shared/utils/logger"; | |
| export type EmailTier = "reject" | "context_check" | "keep"; | |
| export type EmailVerdict = "personal" | "authority" | "context_verified" | "outsourcing" | "rejected"; | |
| export interface ClassificationResult { | |
| email: string; | |
| tier: EmailTier; | |
| verdict: EmailVerdict; | |
| confidence: number; | |
| likelyReaches: string; | |
| reason: string; | |
| } | |
| // βββ Tier 1: ALWAYS REJECT ββββββββββββββββββββββββββββββββββ | |
| const HARD_REJECT_PREFIXES = new Set([ | |
| // Automated / system | |
| "noreply", "no-reply", "no_reply", "donotreply", "do-not-reply", | |
| "notifications", "automated", "bounces", "mailer", | |
| "postmaster", "unsubscribe", "spam", "abuse", | |
| // Support (never reaches decision-maker) | |
| "support", "helpdesk", "tickets", "complaints", "feedback", | |
| // Jobs (irrelevant) | |
| "jobs", "careers", "apply", "recruitment", "hiring", "talent", | |
| ]); | |
| // βββ Tier 2: CONTEXT-DEPENDENT (LLM decides) ββββββββββββββββ | |
| const CONTEXT_CHECK_PREFIXES = new Set([ | |
| "operations", "admin", "system", "info", "office", | |
| "hello", "contact", "enquiries", "general", "team", | |
| "accounts", "finance", "billing", "sales", "marketing", | |
| "hr", "legal", "compliance", "reception", "manager", | |
| ]); | |
| // βββ Tier 3: HIGH CONFIDENCE KEEP βββββββββββββββββββββββββββ | |
| const AUTHORITY_PREFIXES = new Set([ | |
| "ceo", "owner", "founder", "president", "cto", "coo", | |
| "partner", "principal", "director", "md", "gm", "head", | |
| ]); | |
| const OUTSOURCING_PREFIXES = new Set([ | |
| "partnerships", "vendors", "procurement", "outsource", | |
| "collaborate", "projects", "business", "growth", | |
| ]); | |
| // βββ Personal email pattern (firstname, firstname.lastname) β | |
| const PERSONAL_PATTERN = /^[a-z]{2,}(\.[a-z]{2,})?$/; | |
| const INITIAL_PATTERN = /^[a-z]\.[a-z]{2,}$/; // j.smith | |
| /** | |
| * Main classifier β determines if email is worth pursuing. | |
| */ | |
| export async function classifyEmail( | |
| email: string, | |
| companyContext: { | |
| name: string; | |
| employeeCount: number | null; | |
| industry: string; | |
| websiteSnippet: string; | |
| }, | |
| traceId: string | |
| ): Promise<ClassificationResult> { | |
| const prefix = email.split("@")[0].toLowerCase().replace(/[^a-z]/g, ""); | |
| const fullPrefix = email.split("@")[0].toLowerCase(); | |
| // ββ Tier 1: Hard reject ββββββββββββββββββββββββββββββββββββ | |
| if (HARD_REJECT_PREFIXES.has(prefix)) { | |
| return { | |
| email, | |
| tier: "reject", | |
| verdict: "rejected", | |
| confidence: 1.0, | |
| likelyReaches: "automated inbox or department queue", | |
| reason: `"${fullPrefix}@" is a known non-personal email type`, | |
| }; | |
| } | |
| // ββ Tier 3: Personal format β instant keep βββββββββββββββββ | |
| if (PERSONAL_PATTERN.test(fullPrefix) || INITIAL_PATTERN.test(fullPrefix)) { | |
| return { | |
| email, | |
| tier: "keep", | |
| verdict: "personal", | |
| confidence: 0.95, | |
| likelyReaches: "individual person (personal email format)", | |
| reason: `"${fullPrefix}@" matches personal email pattern`, | |
| }; | |
| } | |
| // ββ Tier 3: Authority prefix β instant keep ββββββββββββββββ | |
| if (AUTHORITY_PREFIXES.has(prefix)) { | |
| return { | |
| email, | |
| tier: "keep", | |
| verdict: "authority", | |
| confidence: 0.90, | |
| likelyReaches: `${prefix.toUpperCase()} or equivalent executive`, | |
| reason: `"${fullPrefix}@" is a known decision-maker prefix`, | |
| }; | |
| } | |
| // ββ Tier 3: Outsourcing signal β keep ββββββββββββββββββββββ | |
| if (OUTSOURCING_PREFIXES.has(prefix)) { | |
| return { | |
| email, | |
| tier: "keep", | |
| verdict: "outsourcing", | |
| confidence: 0.80, | |
| likelyReaches: "vendor/partnership manager (purchasing authority likely)", | |
| reason: `"${fullPrefix}@" signals company outsources services`, | |
| }; | |
| } | |
| // ββ Tier 2: Context check needed β ask LLM ββββββββββββββββ | |
| if (CONTEXT_CHECK_PREFIXES.has(prefix)) { | |
| return contextCheckWithLLM(email, companyContext, traceId); | |
| } | |
| // ββ Unknown prefix β default to LLM context check βββββββββ | |
| return contextCheckWithLLM(email, companyContext, traceId); | |
| } | |
| /** | |
| * LLM-powered context check for ambiguous email prefixes. | |
| * Uses FAST model (8B) to save tokens β this is a simple classification. | |
| */ | |
| async function contextCheckWithLLM( | |
| email: string, | |
| context: { | |
| name: string; | |
| employeeCount: number | null; | |
| industry: string; | |
| websiteSnippet: string; | |
| }, | |
| traceId: string | |
| ): Promise<ClassificationResult> { | |
| try { | |
| const response = await callLLM({ | |
| operation: "email_classify", | |
| model: MODELS.FAST, // 8B model β fast + cheap for simple classification | |
| systemPrompt: SYSTEM_PROMPTS.EMAIL_CLASSIFIER, | |
| userPrompt: buildEmailClassifyPrompt({ | |
| email, | |
| company_name: context.name, | |
| company_size: context.employeeCount, | |
| industry: context.industry, | |
| website_snippet: context.websiteSnippet, | |
| }), | |
| temperature: 0.1, | |
| maxTokens: 200, | |
| jsonMode: true, | |
| traceId, | |
| }); | |
| if (response.parsed) { | |
| const keep = response.parsed.keep === true; | |
| const confidence = Number(response.parsed.confidence ?? 0.5); | |
| return { | |
| email, | |
| tier: "context_check", | |
| verdict: keep ? "context_verified" : "rejected", | |
| confidence, | |
| likelyReaches: String(response.parsed.likely_reaches ?? "unknown"), | |
| reason: String(response.parsed.reason ?? "LLM context check"), | |
| }; | |
| } | |
| // LLM failed to respond properly β conservative: keep it, low confidence | |
| return { | |
| email, | |
| tier: "context_check", | |
| verdict: "context_verified", | |
| confidence: 0.5, | |
| likelyReaches: "unknown β LLM parse failed", | |
| reason: "LLM context check failed β keeping with low confidence", | |
| }; | |
| } catch (err) { | |
| logger.warn({ email, err }, "Email LLM classify failed β keeping conservatively"); | |
| // Fallback: rule-based size heuristic | |
| const isSmall = (context.employeeCount ?? 0) < 30; | |
| return { | |
| email, | |
| tier: "context_check", | |
| verdict: isSmall ? "context_verified" : "rejected", | |
| confidence: 0.4, | |
| likelyReaches: isSmall ? "likely owner/manager (small company)" : "likely department inbox (large company)", | |
| reason: `Fallback: company size ${context.employeeCount ?? "unknown"} β ${isSmall ? "small=keep" : "large=reject"}`, | |
| }; | |
| } | |
| } | |