clienttarget / src /discovery /lib /email-classifier.ts
iDevBuddy
feat: Phase 1 β€” AI Client Acquisition System
bd28470
/**
* Email Classifier β€” 3-Tier Decision System
*
* Tier 1: Hard REJECT (noreply, support, jobs β†’ instant discard)
* Tier 2: LLM Context Check (operations, admin, system β†’ depends on company size/industry)
* Tier 3: High confidence KEEP (personal format, ceo@, partnerships@)
*
* Key insight: admin@ at a 5-person dental clinic reaches the owner.
* admin@ at a 500-person corp reaches an assistant. Context matters.
*/
import { callLLM } from "../../shared/llm/nvidia-client";
import { SYSTEM_PROMPTS, buildEmailClassifyPrompt } from "../../shared/llm/prompts";
import { MODELS } from "../../shared/llm/nvidia-client";
import { logger } from "../../shared/utils/logger";
export type EmailTier = "reject" | "context_check" | "keep";
export type EmailVerdict = "personal" | "authority" | "context_verified" | "outsourcing" | "rejected";
export interface ClassificationResult {
email: string;
tier: EmailTier;
verdict: EmailVerdict;
confidence: number;
likelyReaches: string;
reason: string;
}
// ─── Tier 1: ALWAYS REJECT ──────────────────────────────────
const HARD_REJECT_PREFIXES = new Set([
// Automated / system
"noreply", "no-reply", "no_reply", "donotreply", "do-not-reply",
"notifications", "automated", "bounces", "mailer",
"postmaster", "unsubscribe", "spam", "abuse",
// Support (never reaches decision-maker)
"support", "helpdesk", "tickets", "complaints", "feedback",
// Jobs (irrelevant)
"jobs", "careers", "apply", "recruitment", "hiring", "talent",
]);
// ─── Tier 2: CONTEXT-DEPENDENT (LLM decides) ────────────────
const CONTEXT_CHECK_PREFIXES = new Set([
"operations", "admin", "system", "info", "office",
"hello", "contact", "enquiries", "general", "team",
"accounts", "finance", "billing", "sales", "marketing",
"hr", "legal", "compliance", "reception", "manager",
]);
// ─── Tier 3: HIGH CONFIDENCE KEEP ───────────────────────────
const AUTHORITY_PREFIXES = new Set([
"ceo", "owner", "founder", "president", "cto", "coo",
"partner", "principal", "director", "md", "gm", "head",
]);
const OUTSOURCING_PREFIXES = new Set([
"partnerships", "vendors", "procurement", "outsource",
"collaborate", "projects", "business", "growth",
]);
// ─── Personal email pattern (firstname, firstname.lastname) ─
const PERSONAL_PATTERN = /^[a-z]{2,}(\.[a-z]{2,})?$/;
const INITIAL_PATTERN = /^[a-z]\.[a-z]{2,}$/; // j.smith
/**
* Main classifier β€” determines if email is worth pursuing.
*/
export async function classifyEmail(
email: string,
companyContext: {
name: string;
employeeCount: number | null;
industry: string;
websiteSnippet: string;
},
traceId: string
): Promise<ClassificationResult> {
const prefix = email.split("@")[0].toLowerCase().replace(/[^a-z]/g, "");
const fullPrefix = email.split("@")[0].toLowerCase();
// ── Tier 1: Hard reject ────────────────────────────────────
if (HARD_REJECT_PREFIXES.has(prefix)) {
return {
email,
tier: "reject",
verdict: "rejected",
confidence: 1.0,
likelyReaches: "automated inbox or department queue",
reason: `"${fullPrefix}@" is a known non-personal email type`,
};
}
// ── Tier 3: Personal format β†’ instant keep ─────────────────
if (PERSONAL_PATTERN.test(fullPrefix) || INITIAL_PATTERN.test(fullPrefix)) {
return {
email,
tier: "keep",
verdict: "personal",
confidence: 0.95,
likelyReaches: "individual person (personal email format)",
reason: `"${fullPrefix}@" matches personal email pattern`,
};
}
// ── Tier 3: Authority prefix β†’ instant keep ────────────────
if (AUTHORITY_PREFIXES.has(prefix)) {
return {
email,
tier: "keep",
verdict: "authority",
confidence: 0.90,
likelyReaches: `${prefix.toUpperCase()} or equivalent executive`,
reason: `"${fullPrefix}@" is a known decision-maker prefix`,
};
}
// ── Tier 3: Outsourcing signal β†’ keep ──────────────────────
if (OUTSOURCING_PREFIXES.has(prefix)) {
return {
email,
tier: "keep",
verdict: "outsourcing",
confidence: 0.80,
likelyReaches: "vendor/partnership manager (purchasing authority likely)",
reason: `"${fullPrefix}@" signals company outsources services`,
};
}
// ── Tier 2: Context check needed β†’ ask LLM ────────────────
if (CONTEXT_CHECK_PREFIXES.has(prefix)) {
return contextCheckWithLLM(email, companyContext, traceId);
}
// ── Unknown prefix β†’ default to LLM context check ─────────
return contextCheckWithLLM(email, companyContext, traceId);
}
/**
* LLM-powered context check for ambiguous email prefixes.
* Uses FAST model (8B) to save tokens β€” this is a simple classification.
*/
async function contextCheckWithLLM(
email: string,
context: {
name: string;
employeeCount: number | null;
industry: string;
websiteSnippet: string;
},
traceId: string
): Promise<ClassificationResult> {
try {
const response = await callLLM({
operation: "email_classify",
model: MODELS.FAST, // 8B model β€” fast + cheap for simple classification
systemPrompt: SYSTEM_PROMPTS.EMAIL_CLASSIFIER,
userPrompt: buildEmailClassifyPrompt({
email,
company_name: context.name,
company_size: context.employeeCount,
industry: context.industry,
website_snippet: context.websiteSnippet,
}),
temperature: 0.1,
maxTokens: 200,
jsonMode: true,
traceId,
});
if (response.parsed) {
const keep = response.parsed.keep === true;
const confidence = Number(response.parsed.confidence ?? 0.5);
return {
email,
tier: "context_check",
verdict: keep ? "context_verified" : "rejected",
confidence,
likelyReaches: String(response.parsed.likely_reaches ?? "unknown"),
reason: String(response.parsed.reason ?? "LLM context check"),
};
}
// LLM failed to respond properly β†’ conservative: keep it, low confidence
return {
email,
tier: "context_check",
verdict: "context_verified",
confidence: 0.5,
likelyReaches: "unknown β€” LLM parse failed",
reason: "LLM context check failed β€” keeping with low confidence",
};
} catch (err) {
logger.warn({ email, err }, "Email LLM classify failed β€” keeping conservatively");
// Fallback: rule-based size heuristic
const isSmall = (context.employeeCount ?? 0) < 30;
return {
email,
tier: "context_check",
verdict: isSmall ? "context_verified" : "rejected",
confidence: 0.4,
likelyReaches: isSmall ? "likely owner/manager (small company)" : "likely department inbox (large company)",
reason: `Fallback: company size ${context.employeeCount ?? "unknown"} β†’ ${isSmall ? "small=keep" : "large=reject"}`,
};
}
}