clienttarget-python / src /discovery /lib /contact-enricher.ts
iDevBuddy
feat: Phase 1 β€” AI Client Acquisition System
bd28470
/**
* Contact Enricher v2 β€” Full Pipeline
*
* Step 1: Find emails (Hunter.io + Pattern Generator + SMTP verify)
* Snov.io REMOVED β€” replaced by FREE email pattern generation
* Step 2: Classify emails (Tier 1/2/3)
* Step 3: Verify emails (7-layer deep)
* Step 4: Find personal LinkedIn
* Step 5: Find social profiles
* Step 6: Filter for decision-makers only
*
* Output: Verified, classified contacts ready for Phase 2
*/
import { searchHunterContacts, type HunterContact } from "../providers/hunter";
import { generateAndVerifyEmails, findEmailForPerson } from "./email-pattern-generator";
import { classifyEmail, type ClassificationResult } from "./email-classifier";
import { verifyEmailDeep, type VerificationResult } from "./email-verifier";
import { findPersonalLinkedIn, type PersonalLinkedIn } from "./linkedin-person-finder";
import { findSocialProfiles, type SocialProfiles } from "./social-finder";
import { getSupabaseClient } from "../../shared/supabase/client";
import { logger } from "../../shared/utils/logger";
import { randomUUID } from "crypto";
export interface EnrichedContact {
id: string;
companyId: string;
fullName: string;
title: string | null;
seniority: string | null;
// Email intelligence
email: string | null;
emailTier: string; // 'personal' | 'authority' | 'context_verified' | 'rejected'
emailVerification: VerificationResult | null;
emailClassification: ClassificationResult | null;
// LinkedIn (both company and personal)
linkedinPersonalUrl: string | null;
linkedinPersonalConfidence: number;
// Social
socialProfiles: SocialProfiles | null;
// Authority
authorityConfirmed: boolean;
authorityReason: string;
// Source tracking
source: "hunter" | "pattern" | "combined";
providerConfidence: number;
}
/**
* Full contact enrichment pipeline for a company.
*/
export async function enrichContacts(
companyId: string,
domain: string,
companyName: string,
employeeCount: number | null,
industry: string,
websiteSnippet: string,
websiteHtml: string,
companyLinkedInUrl: string | null,
traceId: string
): Promise<EnrichedContact[]> {
logger.info({ domain, companyName }, "Starting contact enrichment pipeline");
// ── Step 1: Find emails from all providers ─────────────────
const rawContacts = await findAllContacts(domain);
if (rawContacts.length === 0) {
logger.info({ domain }, "No contacts found from any provider");
return [];
}
logger.info({ domain, found: rawContacts.length }, "Raw contacts from providers");
// ── Step 2-6: Process each contact ─────────────────────────
const enriched: EnrichedContact[] = [];
for (const raw of rawContacts) {
if (!raw.email) continue;
// Step 2: Classify email (Tier 1/2/3)
const classification = await classifyEmail(
raw.email,
{ name: companyName, employeeCount, industry, websiteSnippet },
traceId
);
// Rejected by classifier β†’ skip entirely
if (classification.verdict === "rejected") {
logger.debug({ email: raw.email, reason: classification.reason }, "Email rejected by classifier");
continue;
}
// Step 3: Deep verification (7 layers)
const verification = await verifyEmailDeep(
raw.email,
domain,
raw.confidence
);
// Hard invalid β†’ skip
if (verification.status === "rejected_invalid") {
logger.debug({ email: raw.email }, "Email rejected by 7-layer verifier");
continue;
}
// Step 4: Find personal LinkedIn
let linkedin: PersonalLinkedIn | null = null;
if (raw.fullName && raw.fullName.length > 3) {
linkedin = await findPersonalLinkedIn(
raw.fullName,
companyName,
domain,
companyLinkedInUrl
);
}
// Step 5: Social profiles (once per company, not per contact)
// Social will be fetched separately at company level
// Step 6: Authority check
const { confirmed, reason } = checkAuthority(raw, classification);
const contact: EnrichedContact = {
id: randomUUID(),
companyId,
fullName: raw.fullName,
title: raw.title,
seniority: raw.seniority,
email: raw.email,
emailTier: classification.verdict,
emailVerification: verification,
emailClassification: classification,
linkedinPersonalUrl: linkedin?.url ?? null,
linkedinPersonalConfidence: linkedin?.confidence ?? 0,
socialProfiles: null, // set at company level
authorityConfirmed: confirmed,
authorityReason: reason,
source: raw.source,
providerConfidence: raw.confidence,
};
enriched.push(contact);
}
// Sort: authority-confirmed first, then by verification confidence
enriched.sort((a, b) => {
if (a.authorityConfirmed !== b.authorityConfirmed) return a.authorityConfirmed ? -1 : 1;
return (b.emailVerification?.overallConfidence ?? 0) - (a.emailVerification?.overallConfidence ?? 0);
});
// Step 5: Social profiles for company (once)
if (enriched.length > 0) {
const social = await findSocialProfiles(domain, companyName, websiteHtml);
for (const c of enriched) {
c.socialProfiles = social;
}
}
logger.info({
domain,
rawFound: rawContacts.length,
afterClassification: enriched.length,
authorityConfirmed: enriched.filter(c => c.authorityConfirmed).length,
withLinkedIn: enriched.filter(c => c.linkedinPersonalUrl).length,
}, "Contact enrichment pipeline complete");
// Save to database
await saveContacts(enriched);
return enriched;
}
// ─── Find contacts from all providers ─────────────────────────
// Strategy: Hunter.io (free 25/mo) for names+titles+emails
// Pattern Generator (FREE, unlimited) to find more emails
// Snov.io REMOVED β€” replaced by pattern generation
interface RawContact {
fullName: string;
email: string;
title: string | null;
seniority: string | null;
confidence: number;
source: "hunter" | "pattern";
}
async function findAllContacts(domain: string): Promise<RawContact[]> {
const contacts: RawContact[] = [];
const seenEmails = new Set<string>();
const namesFromHunter: { firstName: string; lastName: string; title: string | null; seniority: string | null }[] = [];
// ── Source 1: Hunter.io (25 free/month) ─────────────────────
// Hunter gives us NAMES + TITLES + EMAILS
try {
const hunterResults = await searchHunterContacts(domain);
for (const h of hunterResults) {
const email = h.value?.toLowerCase();
const firstName = h.first_name ?? "";
const lastName = h.last_name ?? "";
const fullName = `${firstName} ${lastName}`.trim();
// Save name for pattern generation later
if (firstName && lastName) {
namesFromHunter.push({
firstName,
lastName,
title: h.position ?? null,
seniority: h.seniority ?? null,
});
}
if (email && !seenEmails.has(email)) {
seenEmails.add(email);
contacts.push({
fullName,
email,
title: h.position ?? null,
seniority: h.seniority ?? null,
confidence: h.confidence ?? 0,
source: "hunter",
});
}
}
} catch (err) {
logger.warn({ domain, err }, "Hunter search failed β€” falling back to pattern generation");
}
// ── Source 2: Pattern Generator (FREE, UNLIMITED) ──────────
// For names we got from Hunter that DON'T have emails,
// OR if Hunter returned no results at all
for (const person of namesFromHunter) {
// Check if we already have an email for this person
const hasEmail = contacts.some(c =>
c.fullName.toLowerCase().includes(person.firstName.toLowerCase()) &&
c.fullName.toLowerCase().includes(person.lastName.toLowerCase())
);
if (!hasEmail) {
// Generate email patterns and SMTP verify (FREE)
const generated = await findEmailForPerson(
`${person.firstName} ${person.lastName}`,
domain
);
if (generated && generated.smtpStatus === "deliverable" && !seenEmails.has(generated.email)) {
seenEmails.add(generated.email);
contacts.push({
fullName: `${person.firstName} ${person.lastName}`,
email: generated.email,
title: person.title,
seniority: person.seniority,
confidence: generated.confidence * 100,
source: "pattern",
});
}
}
}
// ── Source 3: If still no contacts, try common owner patterns ─
if (contacts.length === 0) {
// Try generic owner/manager patterns
const ownerPatterns = ["info", "contact", "hello", "admin"];
for (const prefix of ownerPatterns) {
const email = `${prefix}@${domain}`;
if (!seenEmails.has(email)) {
seenEmails.add(email);
contacts.push({
fullName: "Unknown",
email,
title: null,
seniority: null,
confidence: 20,
source: "pattern",
});
}
}
}
logger.info({
domain,
hunterContacts: contacts.filter(c => c.source === "hunter").length,
patternContacts: contacts.filter(c => c.source === "pattern").length,
total: contacts.length,
}, "Contact finding complete (Hunter + Pattern Generator)");
return contacts;
}
// ─── Authority check ─────────────────────────────────────────
function checkAuthority(
contact: RawContact,
classification: ClassificationResult
): { confirmed: boolean; reason: string } {
// Personal email with senior title β†’ confirmed
const seniorTitles = /\b(ceo|cto|coo|cfo|cmo|founder|co-founder|owner|partner|director|vp|vice\s*president|president|head|principal|managing|general\s*manager)\b/i;
if (classification.verdict === "personal" && contact.title && seniorTitles.test(contact.title)) {
return { confirmed: true, reason: `Personal email + senior title: ${contact.title}` };
}
if (classification.verdict === "authority") {
return { confirmed: true, reason: `Authority email prefix: ${contact.email.split("@")[0]}` };
}
if (classification.verdict === "personal") {
return { confirmed: true, reason: "Personal email format β€” likely individual decision maker" };
}
if (classification.verdict === "context_verified" && classification.confidence >= 0.7) {
return { confirmed: true, reason: classification.reason };
}
if (classification.verdict === "outsourcing") {
return { confirmed: false, reason: "Outsourcing/vendor email β€” may reach procurement, not decision maker" };
}
return { confirmed: false, reason: "Authority not confirmed" };
}
// ─── Save to database ────────────────────────────────────────
async function saveContacts(contacts: EnrichedContact[]): Promise<void> {
const db = getSupabaseClient();
for (const c of contacts) {
try {
await db.from("contacts").upsert({
id: c.id,
company_id: c.companyId,
full_name: c.fullName,
title: c.title,
seniority: c.seniority,
email: c.email,
email_verified: c.emailVerification?.status === "verified_deliverable",
email_tier: c.emailTier,
email_verification_layers: c.emailVerification?.layers ?? {},
linkedin_personal_url: c.linkedinPersonalUrl,
social_profiles: c.socialProfiles ?? {},
authority_confirmed: c.authorityConfirmed,
confidence: c.emailVerification?.overallConfidence ?? c.providerConfidence,
source: c.source,
}, { onConflict: "company_id,email" });
} catch (err) {
logger.warn({ email: c.email, err }, "Contact save failed β€” continuing");
}
}
}