Spaces:
Running
Running
| /** | |
| * Contact Enricher v2 β Full Pipeline | |
| * | |
| * Step 1: Find emails (Hunter.io + Pattern Generator + SMTP verify) | |
| * Snov.io REMOVED β replaced by FREE email pattern generation | |
| * Step 2: Classify emails (Tier 1/2/3) | |
| * Step 3: Verify emails (7-layer deep) | |
| * Step 4: Find personal LinkedIn | |
| * Step 5: Find social profiles | |
| * Step 6: Filter for decision-makers only | |
| * | |
| * Output: Verified, classified contacts ready for Phase 2 | |
| */ | |
| import { searchHunterContacts, type HunterContact } from "../providers/hunter"; | |
| import { generateAndVerifyEmails, findEmailForPerson } from "./email-pattern-generator"; | |
| import { classifyEmail, type ClassificationResult } from "./email-classifier"; | |
| import { verifyEmailDeep, type VerificationResult } from "./email-verifier"; | |
| import { findPersonalLinkedIn, type PersonalLinkedIn } from "./linkedin-person-finder"; | |
| import { findSocialProfiles, type SocialProfiles } from "./social-finder"; | |
| import { getSupabaseClient } from "../../shared/supabase/client"; | |
| import { logger } from "../../shared/utils/logger"; | |
| import { randomUUID } from "crypto"; | |
| export interface EnrichedContact { | |
| id: string; | |
| companyId: string; | |
| fullName: string; | |
| title: string | null; | |
| seniority: string | null; | |
| // Email intelligence | |
| email: string | null; | |
| emailTier: string; // 'personal' | 'authority' | 'context_verified' | 'rejected' | |
| emailVerification: VerificationResult | null; | |
| emailClassification: ClassificationResult | null; | |
| // LinkedIn (both company and personal) | |
| linkedinPersonalUrl: string | null; | |
| linkedinPersonalConfidence: number; | |
| // Social | |
| socialProfiles: SocialProfiles | null; | |
| // Authority | |
| authorityConfirmed: boolean; | |
| authorityReason: string; | |
| // Source tracking | |
| source: "hunter" | "pattern" | "combined"; | |
| providerConfidence: number; | |
| } | |
| /** | |
| * Full contact enrichment pipeline for a company. | |
| */ | |
| export async function enrichContacts( | |
| companyId: string, | |
| domain: string, | |
| companyName: string, | |
| employeeCount: number | null, | |
| industry: string, | |
| websiteSnippet: string, | |
| websiteHtml: string, | |
| companyLinkedInUrl: string | null, | |
| traceId: string | |
| ): Promise<EnrichedContact[]> { | |
| logger.info({ domain, companyName }, "Starting contact enrichment pipeline"); | |
| // ββ Step 1: Find emails from all providers βββββββββββββββββ | |
| const rawContacts = await findAllContacts(domain); | |
| if (rawContacts.length === 0) { | |
| logger.info({ domain }, "No contacts found from any provider"); | |
| return []; | |
| } | |
| logger.info({ domain, found: rawContacts.length }, "Raw contacts from providers"); | |
| // ββ Step 2-6: Process each contact βββββββββββββββββββββββββ | |
| const enriched: EnrichedContact[] = []; | |
| for (const raw of rawContacts) { | |
| if (!raw.email) continue; | |
| // Step 2: Classify email (Tier 1/2/3) | |
| const classification = await classifyEmail( | |
| raw.email, | |
| { name: companyName, employeeCount, industry, websiteSnippet }, | |
| traceId | |
| ); | |
| // Rejected by classifier β skip entirely | |
| if (classification.verdict === "rejected") { | |
| logger.debug({ email: raw.email, reason: classification.reason }, "Email rejected by classifier"); | |
| continue; | |
| } | |
| // Step 3: Deep verification (7 layers) | |
| const verification = await verifyEmailDeep( | |
| raw.email, | |
| domain, | |
| raw.confidence | |
| ); | |
| // Hard invalid β skip | |
| if (verification.status === "rejected_invalid") { | |
| logger.debug({ email: raw.email }, "Email rejected by 7-layer verifier"); | |
| continue; | |
| } | |
| // Step 4: Find personal LinkedIn | |
| let linkedin: PersonalLinkedIn | null = null; | |
| if (raw.fullName && raw.fullName.length > 3) { | |
| linkedin = await findPersonalLinkedIn( | |
| raw.fullName, | |
| companyName, | |
| domain, | |
| companyLinkedInUrl | |
| ); | |
| } | |
| // Step 5: Social profiles (once per company, not per contact) | |
| // Social will be fetched separately at company level | |
| // Step 6: Authority check | |
| const { confirmed, reason } = checkAuthority(raw, classification); | |
| const contact: EnrichedContact = { | |
| id: randomUUID(), | |
| companyId, | |
| fullName: raw.fullName, | |
| title: raw.title, | |
| seniority: raw.seniority, | |
| email: raw.email, | |
| emailTier: classification.verdict, | |
| emailVerification: verification, | |
| emailClassification: classification, | |
| linkedinPersonalUrl: linkedin?.url ?? null, | |
| linkedinPersonalConfidence: linkedin?.confidence ?? 0, | |
| socialProfiles: null, // set at company level | |
| authorityConfirmed: confirmed, | |
| authorityReason: reason, | |
| source: raw.source, | |
| providerConfidence: raw.confidence, | |
| }; | |
| enriched.push(contact); | |
| } | |
| // Sort: authority-confirmed first, then by verification confidence | |
| enriched.sort((a, b) => { | |
| if (a.authorityConfirmed !== b.authorityConfirmed) return a.authorityConfirmed ? -1 : 1; | |
| return (b.emailVerification?.overallConfidence ?? 0) - (a.emailVerification?.overallConfidence ?? 0); | |
| }); | |
| // Step 5: Social profiles for company (once) | |
| if (enriched.length > 0) { | |
| const social = await findSocialProfiles(domain, companyName, websiteHtml); | |
| for (const c of enriched) { | |
| c.socialProfiles = social; | |
| } | |
| } | |
| logger.info({ | |
| domain, | |
| rawFound: rawContacts.length, | |
| afterClassification: enriched.length, | |
| authorityConfirmed: enriched.filter(c => c.authorityConfirmed).length, | |
| withLinkedIn: enriched.filter(c => c.linkedinPersonalUrl).length, | |
| }, "Contact enrichment pipeline complete"); | |
| // Save to database | |
| await saveContacts(enriched); | |
| return enriched; | |
| } | |
| // βββ Find contacts from all providers βββββββββββββββββββββββββ | |
| // Strategy: Hunter.io (free 25/mo) for names+titles+emails | |
| // Pattern Generator (FREE, unlimited) to find more emails | |
| // Snov.io REMOVED β replaced by pattern generation | |
| interface RawContact { | |
| fullName: string; | |
| email: string; | |
| title: string | null; | |
| seniority: string | null; | |
| confidence: number; | |
| source: "hunter" | "pattern"; | |
| } | |
| async function findAllContacts(domain: string): Promise<RawContact[]> { | |
| const contacts: RawContact[] = []; | |
| const seenEmails = new Set<string>(); | |
| const namesFromHunter: { firstName: string; lastName: string; title: string | null; seniority: string | null }[] = []; | |
| // ββ Source 1: Hunter.io (25 free/month) βββββββββββββββββββββ | |
| // Hunter gives us NAMES + TITLES + EMAILS | |
| try { | |
| const hunterResults = await searchHunterContacts(domain); | |
| for (const h of hunterResults) { | |
| const email = h.value?.toLowerCase(); | |
| const firstName = h.first_name ?? ""; | |
| const lastName = h.last_name ?? ""; | |
| const fullName = `${firstName} ${lastName}`.trim(); | |
| // Save name for pattern generation later | |
| if (firstName && lastName) { | |
| namesFromHunter.push({ | |
| firstName, | |
| lastName, | |
| title: h.position ?? null, | |
| seniority: h.seniority ?? null, | |
| }); | |
| } | |
| if (email && !seenEmails.has(email)) { | |
| seenEmails.add(email); | |
| contacts.push({ | |
| fullName, | |
| email, | |
| title: h.position ?? null, | |
| seniority: h.seniority ?? null, | |
| confidence: h.confidence ?? 0, | |
| source: "hunter", | |
| }); | |
| } | |
| } | |
| } catch (err) { | |
| logger.warn({ domain, err }, "Hunter search failed β falling back to pattern generation"); | |
| } | |
| // ββ Source 2: Pattern Generator (FREE, UNLIMITED) ββββββββββ | |
| // For names we got from Hunter that DON'T have emails, | |
| // OR if Hunter returned no results at all | |
| for (const person of namesFromHunter) { | |
| // Check if we already have an email for this person | |
| const hasEmail = contacts.some(c => | |
| c.fullName.toLowerCase().includes(person.firstName.toLowerCase()) && | |
| c.fullName.toLowerCase().includes(person.lastName.toLowerCase()) | |
| ); | |
| if (!hasEmail) { | |
| // Generate email patterns and SMTP verify (FREE) | |
| const generated = await findEmailForPerson( | |
| `${person.firstName} ${person.lastName}`, | |
| domain | |
| ); | |
| if (generated && generated.smtpStatus === "deliverable" && !seenEmails.has(generated.email)) { | |
| seenEmails.add(generated.email); | |
| contacts.push({ | |
| fullName: `${person.firstName} ${person.lastName}`, | |
| email: generated.email, | |
| title: person.title, | |
| seniority: person.seniority, | |
| confidence: generated.confidence * 100, | |
| source: "pattern", | |
| }); | |
| } | |
| } | |
| } | |
| // ββ Source 3: If still no contacts, try common owner patterns β | |
| if (contacts.length === 0) { | |
| // Try generic owner/manager patterns | |
| const ownerPatterns = ["info", "contact", "hello", "admin"]; | |
| for (const prefix of ownerPatterns) { | |
| const email = `${prefix}@${domain}`; | |
| if (!seenEmails.has(email)) { | |
| seenEmails.add(email); | |
| contacts.push({ | |
| fullName: "Unknown", | |
| email, | |
| title: null, | |
| seniority: null, | |
| confidence: 20, | |
| source: "pattern", | |
| }); | |
| } | |
| } | |
| } | |
| logger.info({ | |
| domain, | |
| hunterContacts: contacts.filter(c => c.source === "hunter").length, | |
| patternContacts: contacts.filter(c => c.source === "pattern").length, | |
| total: contacts.length, | |
| }, "Contact finding complete (Hunter + Pattern Generator)"); | |
| return contacts; | |
| } | |
| // βββ Authority check βββββββββββββββββββββββββββββββββββββββββ | |
| function checkAuthority( | |
| contact: RawContact, | |
| classification: ClassificationResult | |
| ): { confirmed: boolean; reason: string } { | |
| // Personal email with senior title β confirmed | |
| const seniorTitles = /\b(ceo|cto|coo|cfo|cmo|founder|co-founder|owner|partner|director|vp|vice\s*president|president|head|principal|managing|general\s*manager)\b/i; | |
| if (classification.verdict === "personal" && contact.title && seniorTitles.test(contact.title)) { | |
| return { confirmed: true, reason: `Personal email + senior title: ${contact.title}` }; | |
| } | |
| if (classification.verdict === "authority") { | |
| return { confirmed: true, reason: `Authority email prefix: ${contact.email.split("@")[0]}` }; | |
| } | |
| if (classification.verdict === "personal") { | |
| return { confirmed: true, reason: "Personal email format β likely individual decision maker" }; | |
| } | |
| if (classification.verdict === "context_verified" && classification.confidence >= 0.7) { | |
| return { confirmed: true, reason: classification.reason }; | |
| } | |
| if (classification.verdict === "outsourcing") { | |
| return { confirmed: false, reason: "Outsourcing/vendor email β may reach procurement, not decision maker" }; | |
| } | |
| return { confirmed: false, reason: "Authority not confirmed" }; | |
| } | |
| // βββ Save to database ββββββββββββββββββββββββββββββββββββββββ | |
| async function saveContacts(contacts: EnrichedContact[]): Promise<void> { | |
| const db = getSupabaseClient(); | |
| for (const c of contacts) { | |
| try { | |
| await db.from("contacts").upsert({ | |
| id: c.id, | |
| company_id: c.companyId, | |
| full_name: c.fullName, | |
| title: c.title, | |
| seniority: c.seniority, | |
| email: c.email, | |
| email_verified: c.emailVerification?.status === "verified_deliverable", | |
| email_tier: c.emailTier, | |
| email_verification_layers: c.emailVerification?.layers ?? {}, | |
| linkedin_personal_url: c.linkedinPersonalUrl, | |
| social_profiles: c.socialProfiles ?? {}, | |
| authority_confirmed: c.authorityConfirmed, | |
| confidence: c.emailVerification?.overallConfidence ?? c.providerConfidence, | |
| source: c.source, | |
| }, { onConflict: "company_id,email" }); | |
| } catch (err) { | |
| logger.warn({ email: c.email, err }, "Contact save failed β continuing"); | |
| } | |
| } | |
| } | |