/** * Contact Enricher v2 — Full Pipeline * * Step 1: Find emails (Hunter.io + Pattern Generator + SMTP verify) * Snov.io REMOVED — replaced by FREE email pattern generation * Step 2: Classify emails (Tier 1/2/3) * Step 3: Verify emails (7-layer deep) * Step 4: Find personal LinkedIn * Step 5: Find social profiles * Step 6: Filter for decision-makers only * * Output: Verified, classified contacts ready for Phase 2 */ import { searchHunterContacts, type HunterContact } from "../providers/hunter"; import { generateAndVerifyEmails, findEmailForPerson } from "./email-pattern-generator"; import { classifyEmail, type ClassificationResult } from "./email-classifier"; import { verifyEmailDeep, type VerificationResult } from "./email-verifier"; import { findPersonalLinkedIn, type PersonalLinkedIn } from "./linkedin-person-finder"; import { findSocialProfiles, type SocialProfiles } from "./social-finder"; import { getSupabaseClient } from "../../shared/supabase/client"; import { logger } from "../../shared/utils/logger"; import { randomUUID } from "crypto"; export interface EnrichedContact { id: string; companyId: string; fullName: string; title: string | null; seniority: string | null; // Email intelligence email: string | null; emailTier: string; // 'personal' | 'authority' | 'context_verified' | 'rejected' emailVerification: VerificationResult | null; emailClassification: ClassificationResult | null; // LinkedIn (both company and personal) linkedinPersonalUrl: string | null; linkedinPersonalConfidence: number; // Social socialProfiles: SocialProfiles | null; // Authority authorityConfirmed: boolean; authorityReason: string; // Source tracking source: "hunter" | "pattern" | "combined"; providerConfidence: number; } /** * Full contact enrichment pipeline for a company. */ export async function enrichContacts( companyId: string, domain: string, companyName: string, employeeCount: number | null, industry: string, websiteSnippet: string, websiteHtml: string, companyLinkedInUrl: string | null, traceId: string ): Promise { logger.info({ domain, companyName }, "Starting contact enrichment pipeline"); // ── Step 1: Find emails from all providers ───────────────── const rawContacts = await findAllContacts(domain); if (rawContacts.length === 0) { logger.info({ domain }, "No contacts found from any provider"); return []; } logger.info({ domain, found: rawContacts.length }, "Raw contacts from providers"); // ── Step 2-6: Process each contact ───────────────────────── const enriched: EnrichedContact[] = []; for (const raw of rawContacts) { if (!raw.email) continue; // Step 2: Classify email (Tier 1/2/3) const classification = await classifyEmail( raw.email, { name: companyName, employeeCount, industry, websiteSnippet }, traceId ); // Rejected by classifier → skip entirely if (classification.verdict === "rejected") { logger.debug({ email: raw.email, reason: classification.reason }, "Email rejected by classifier"); continue; } // Step 3: Deep verification (7 layers) const verification = await verifyEmailDeep( raw.email, domain, raw.confidence ); // Hard invalid → skip if (verification.status === "rejected_invalid") { logger.debug({ email: raw.email }, "Email rejected by 7-layer verifier"); continue; } // Step 4: Find personal LinkedIn let linkedin: PersonalLinkedIn | null = null; if (raw.fullName && raw.fullName.length > 3) { linkedin = await findPersonalLinkedIn( raw.fullName, companyName, domain, companyLinkedInUrl ); } // Step 5: Social profiles (once per company, not per contact) // Social will be fetched separately at company level // Step 6: Authority check const { confirmed, reason } = checkAuthority(raw, classification); const contact: EnrichedContact = { id: randomUUID(), companyId, fullName: raw.fullName, title: raw.title, seniority: raw.seniority, email: raw.email, emailTier: classification.verdict, emailVerification: verification, emailClassification: classification, linkedinPersonalUrl: linkedin?.url ?? null, linkedinPersonalConfidence: linkedin?.confidence ?? 0, socialProfiles: null, // set at company level authorityConfirmed: confirmed, authorityReason: reason, source: raw.source, providerConfidence: raw.confidence, }; enriched.push(contact); } // Sort: authority-confirmed first, then by verification confidence enriched.sort((a, b) => { if (a.authorityConfirmed !== b.authorityConfirmed) return a.authorityConfirmed ? -1 : 1; return (b.emailVerification?.overallConfidence ?? 0) - (a.emailVerification?.overallConfidence ?? 0); }); // Step 5: Social profiles for company (once) if (enriched.length > 0) { const social = await findSocialProfiles(domain, companyName, websiteHtml); for (const c of enriched) { c.socialProfiles = social; } } logger.info({ domain, rawFound: rawContacts.length, afterClassification: enriched.length, authorityConfirmed: enriched.filter(c => c.authorityConfirmed).length, withLinkedIn: enriched.filter(c => c.linkedinPersonalUrl).length, }, "Contact enrichment pipeline complete"); // Save to database await saveContacts(enriched); return enriched; } // ─── Find contacts from all providers ───────────────────────── // Strategy: Hunter.io (free 25/mo) for names+titles+emails // Pattern Generator (FREE, unlimited) to find more emails // Snov.io REMOVED — replaced by pattern generation interface RawContact { fullName: string; email: string; title: string | null; seniority: string | null; confidence: number; source: "hunter" | "pattern"; } async function findAllContacts(domain: string): Promise { const contacts: RawContact[] = []; const seenEmails = new Set(); const namesFromHunter: { firstName: string; lastName: string; title: string | null; seniority: string | null }[] = []; // ── Source 1: Hunter.io (25 free/month) ───────────────────── // Hunter gives us NAMES + TITLES + EMAILS try { const hunterResults = await searchHunterContacts(domain); for (const h of hunterResults) { const email = h.value?.toLowerCase(); const firstName = h.first_name ?? ""; const lastName = h.last_name ?? ""; const fullName = `${firstName} ${lastName}`.trim(); // Save name for pattern generation later if (firstName && lastName) { namesFromHunter.push({ firstName, lastName, title: h.position ?? null, seniority: h.seniority ?? null, }); } if (email && !seenEmails.has(email)) { seenEmails.add(email); contacts.push({ fullName, email, title: h.position ?? null, seniority: h.seniority ?? null, confidence: h.confidence ?? 0, source: "hunter", }); } } } catch (err) { logger.warn({ domain, err }, "Hunter search failed — falling back to pattern generation"); } // ── Source 2: Pattern Generator (FREE, UNLIMITED) ────────── // For names we got from Hunter that DON'T have emails, // OR if Hunter returned no results at all for (const person of namesFromHunter) { // Check if we already have an email for this person const hasEmail = contacts.some(c => c.fullName.toLowerCase().includes(person.firstName.toLowerCase()) && c.fullName.toLowerCase().includes(person.lastName.toLowerCase()) ); if (!hasEmail) { // Generate email patterns and SMTP verify (FREE) const generated = await findEmailForPerson( `${person.firstName} ${person.lastName}`, domain ); if (generated && generated.smtpStatus === "deliverable" && !seenEmails.has(generated.email)) { seenEmails.add(generated.email); contacts.push({ fullName: `${person.firstName} ${person.lastName}`, email: generated.email, title: person.title, seniority: person.seniority, confidence: generated.confidence * 100, source: "pattern", }); } } } // ── Source 3: If still no contacts, try common owner patterns ─ if (contacts.length === 0) { // Try generic owner/manager patterns const ownerPatterns = ["info", "contact", "hello", "admin"]; for (const prefix of ownerPatterns) { const email = `${prefix}@${domain}`; if (!seenEmails.has(email)) { seenEmails.add(email); contacts.push({ fullName: "Unknown", email, title: null, seniority: null, confidence: 20, source: "pattern", }); } } } logger.info({ domain, hunterContacts: contacts.filter(c => c.source === "hunter").length, patternContacts: contacts.filter(c => c.source === "pattern").length, total: contacts.length, }, "Contact finding complete (Hunter + Pattern Generator)"); return contacts; } // ─── Authority check ───────────────────────────────────────── function checkAuthority( contact: RawContact, classification: ClassificationResult ): { confirmed: boolean; reason: string } { // Personal email with senior title → confirmed const seniorTitles = /\b(ceo|cto|coo|cfo|cmo|founder|co-founder|owner|partner|director|vp|vice\s*president|president|head|principal|managing|general\s*manager)\b/i; if (classification.verdict === "personal" && contact.title && seniorTitles.test(contact.title)) { return { confirmed: true, reason: `Personal email + senior title: ${contact.title}` }; } if (classification.verdict === "authority") { return { confirmed: true, reason: `Authority email prefix: ${contact.email.split("@")[0]}` }; } if (classification.verdict === "personal") { return { confirmed: true, reason: "Personal email format — likely individual decision maker" }; } if (classification.verdict === "context_verified" && classification.confidence >= 0.7) { return { confirmed: true, reason: classification.reason }; } if (classification.verdict === "outsourcing") { return { confirmed: false, reason: "Outsourcing/vendor email — may reach procurement, not decision maker" }; } return { confirmed: false, reason: "Authority not confirmed" }; } // ─── Save to database ──────────────────────────────────────── async function saveContacts(contacts: EnrichedContact[]): Promise { const db = getSupabaseClient(); for (const c of contacts) { try { await db.from("contacts").upsert({ id: c.id, company_id: c.companyId, full_name: c.fullName, title: c.title, seniority: c.seniority, email: c.email, email_verified: c.emailVerification?.status === "verified_deliverable", email_tier: c.emailTier, email_verification_layers: c.emailVerification?.layers ?? {}, linkedin_personal_url: c.linkedinPersonalUrl, social_profiles: c.socialProfiles ?? {}, authority_confirmed: c.authorityConfirmed, confidence: c.emailVerification?.overallConfidence ?? c.providerConfidence, source: c.source, }, { onConflict: "company_id,email" }); } catch (err) { logger.warn({ email: c.email, err }, "Contact save failed — continuing"); } } }