/** * 7-Layer Email Verification * * Layer 1: RFC 5322 format check (instant, free) * Layer 2: Domain ownership — email domain = company domain (instant, free) * Layer 3: MX record lookup (free, DNS) * Layer 4: Catch-all detection (Reoon API) * Layer 5: SMTP handshake — ask mail server "does this user exist?" (free, direct) * Layer 6: Disposable email check (free, local list) * Layer 7: Provider confidence score (Hunter/Snov score) * * Each layer produces a boolean. Final status is computed from all 7. */ import dns from "dns/promises"; import net from "net"; import axios from "axios"; import { getEnv } from "../../shared/config/env"; import { logger } from "../../shared/utils/logger"; export type EmailStatus = | "verified_deliverable" // all layers pass | "verified_catch_all" // valid but catch-all domain | "pattern_smtp_confirmed" // pattern-generated + SMTP confirmed | "uncertain" // some layers pass, some unknown | "rejected_invalid"; // hard failure export interface VerificationResult { email: string; status: EmailStatus; layers: { format: boolean; domainMatch: boolean; mxRecord: boolean; catchAll: boolean | null; // null = couldn't determine smtpHandshake: boolean | null; disposable: boolean; // true = IS disposable (bad) providerConfidence: number; // 0-100 from Hunter/Snov }; overallConfidence: number; // 0-100 computed from layers } /** * Run all 7 verification layers on an email. */ export async function verifyEmailDeep( email: string, companyDomain: string, providerConfidence: number = 0 ): Promise { const layers = { format: false, domainMatch: false, mxRecord: false, catchAll: null as boolean | null, smtpHandshake: null as boolean | null, disposable: false, providerConfidence, }; const emailDomain = email.split("@")[1]?.toLowerCase(); if (!emailDomain) { return makeResult(email, "rejected_invalid", layers, 0); } // ── Layer 1: Format check ────────────────────────────────── layers.format = isValidFormat(email); if (!layers.format) { return makeResult(email, "rejected_invalid", layers, 0); } // ── Layer 2: Domain ownership ────────────────────────────── layers.domainMatch = isDomainMatch(emailDomain, companyDomain); if (!layers.domainMatch) { logger.warn({ email, emailDomain, companyDomain }, "Domain mismatch — rejecting"); return makeResult(email, "rejected_invalid", layers, 0); } // ── Layer 3: MX record ──────────────────────────────────── layers.mxRecord = await hasMxRecord(emailDomain); if (!layers.mxRecord) { return makeResult(email, "rejected_invalid", layers, 5); } // ── Layer 4: Catch-all detection (Reoon) ─────────────────── layers.catchAll = await checkCatchAll(emailDomain); // ── Layer 5: SMTP handshake ───────────────────────────────── layers.smtpHandshake = await smtpHandshake(email, emailDomain); // ── Layer 6: Disposable check ────────────────────────────── layers.disposable = isDisposable(emailDomain); if (layers.disposable) { return makeResult(email, "rejected_invalid", layers, 0); } // ── Layer 7: Provider confidence ────────────────────────── // Already set from Hunter/Snov response // ── Compute final status ─────────────────────────────────── return computeFinalStatus(email, layers); } // ─── Layer 1: RFC 5322 Format ──────────────────────────────── function isValidFormat(email: string): boolean { // Strict-ish RFC 5322 check const pattern = /^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/; if (!pattern.test(email)) return false; if (email.length > 254) return false; const local = email.split("@")[0]; if (local.length > 64) return false; if (local.startsWith(".") || local.endsWith(".")) return false; if (local.includes("..")) return false; return true; } // ─── Layer 2: Domain Match ────────────────────────────────── function isDomainMatch(emailDomain: string, companyDomain: string): boolean { const normalize = (d: string) => d.toLowerCase().replace(/^www\./, "").trim(); const eDomain = normalize(emailDomain); const cDomain = normalize(companyDomain); // Exact match if (eDomain === cDomain) return true; // Subdomain match (e.g., mail.company.com → company.com) if (eDomain.endsWith(`.${cDomain}`)) return true; // Common email domain variants (company uses Google Workspace etc.) // This is fine — john@company.com matches company.com return false; } // ─── Layer 3: MX Record ───────────────────────────────────── async function hasMxRecord(domain: string): Promise { try { const records = await dns.resolveMx(domain); return records.length > 0; } catch { return false; } } // ─── Layer 4: Catch-All Detection (CREDIT-OPTIMIZED) ──────── // Strategy: Try FREE SMTP probe first → only use Reoon if SMTP can't determine // This saves Reoon credits (only 20/day) for when they're truly needed let _reoonUsedToday = 0; let _reoonResetDate = new Date().toDateString(); const REOON_DAILY_LIMIT = 18; // keep 2 credits as buffer async function checkCatchAll(domain: string): Promise { // ── Attempt 1: FREE SMTP catch-all probe ─────────────────── // Send RCPT TO with a random gibberish address. // If server accepts it → catch-all. If 550 → NOT catch-all. try { const fakeEmail = `xqz7k2m4n_test_${Date.now() % 10000}@${domain}`; const smtpResult = await smtpHandshake(fakeEmail, domain); if (smtpResult === true) { // Server accepted gibberish email → CATCH-ALL logger.debug({ domain }, "Catch-all detected via FREE SMTP probe (Reoon credit saved)"); return true; } if (smtpResult === false) { // Server rejected gibberish email → NOT catch-all logger.debug({ domain }, "NOT catch-all — confirmed via FREE SMTP probe"); return false; } // smtpResult === null → SMTP couldn't determine, fall through to Reoon } catch { // SMTP probe failed, fall through to Reoon } // ── Attempt 2: Reoon API (only if SMTP couldn't determine) ─ // Reset counter if new day const today = new Date().toDateString(); if (_reoonResetDate !== today) { _reoonUsedToday = 0; _reoonResetDate = today; } // Check budget if (_reoonUsedToday >= REOON_DAILY_LIMIT) { logger.warn({ domain, used: _reoonUsedToday }, "Reoon daily limit reached — skipping"); return null; } try { const env = getEnv(); _reoonUsedToday++; const response = await axios.get("https://emailverifier.reoon.com/api/v1/verify", { params: { email: `definitely_not_real_${Date.now()}@${domain}`, key: env.REOON_API_KEY, mode: "quick", }, timeout: 8_000, }); logger.debug({ domain, reoonUsed: _reoonUsedToday }, "Reoon credit used for catch-all check"); return response.data?.status === "valid"; } catch { return null; } } // ─── Layer 5: SMTP Handshake ──────────────────────────────── async function smtpHandshake(email: string, domain: string): Promise { try { // Resolve MX to get mail server const mxRecords = await dns.resolveMx(domain); if (!mxRecords.length) return null; // Pick highest priority (lowest number) const mailServer = mxRecords.sort((a, b) => a.priority - b.priority)[0].exchange; return new Promise((resolve) => { const socket = new net.Socket(); let step = 0; let result = false; const timeout = setTimeout(() => { socket.destroy(); resolve(null); }, 10_000); socket.connect(25, mailServer, () => { // Connected to mail server }); socket.on("data", (data) => { const response = data.toString(); if (step === 0 && response.startsWith("220")) { // Server greeting → send EHLO socket.write("EHLO verify.local\r\n"); step = 1; } else if (step === 1 && response.startsWith("250")) { // EHLO accepted → send MAIL FROM socket.write("MAIL FROM:\r\n"); step = 2; } else if (step === 2 && response.startsWith("250")) { // MAIL FROM accepted → send RCPT TO (the actual check) socket.write(`RCPT TO:<${email}>\r\n`); step = 3; } else if (step === 3) { if (response.startsWith("250")) { result = true; // 250 = user exists! } else if (response.startsWith("550") || response.startsWith("553")) { result = false; // 550 = user doesn't exist } // Cleanup socket.write("QUIT\r\n"); clearTimeout(timeout); socket.destroy(); resolve(result); } }); socket.on("error", () => { clearTimeout(timeout); resolve(null); // can't determine }); }); } catch { return null; // can't determine } } // ─── Layer 6: Disposable Email ────────────────────────────── const DISPOSABLE_DOMAINS = new Set([ "mailinator.com", "tempmail.com", "throwaway.email", "guerrillamail.com", "guerrillamail.info", "yopmail.com", "trashmail.com", "maildrop.cc", "10minutemail.com", "temp-mail.org", "fakeinbox.com", "sharklasers.com", "guerrillamail.net", "grr.la", "dispostable.com", "tempr.email", "mohmal.com", "burpcollaborator.net", "mailnesia.com", ]); function isDisposable(domain: string): boolean { return DISPOSABLE_DOMAINS.has(domain.toLowerCase()); } // ─── Final Status Computation ──────────────────────────────── function computeFinalStatus( email: string, layers: VerificationResult["layers"] ): VerificationResult { // All layers pass (including SMTP) if (layers.format && layers.domainMatch && layers.mxRecord && layers.smtpHandshake === true && !layers.disposable && !layers.catchAll) { const confidence = Math.min( 95, 60 + (layers.providerConfidence > 0 ? Math.round(layers.providerConfidence * 0.35) : 15) ); return makeResult(email, "verified_deliverable", layers, confidence); } // Catch-all domain — uncertain but not invalid if (layers.catchAll === true && layers.mxRecord) { return makeResult(email, "verified_catch_all", layers, 45); } // SMTP confirmed but no provider data if (layers.smtpHandshake === true && layers.providerConfidence === 0) { return makeResult(email, "pattern_smtp_confirmed", layers, 70); } // MX exists, provider says good, SMTP unknown if (layers.mxRecord && layers.providerConfidence >= 70 && layers.smtpHandshake === null) { return makeResult(email, "verified_deliverable", layers, layers.providerConfidence); } // MX exists but everything else uncertain if (layers.mxRecord && !layers.disposable) { return makeResult(email, "uncertain", layers, 30); } return makeResult(email, "rejected_invalid", layers, 0); } function makeResult( email: string, status: EmailStatus, layers: VerificationResult["layers"], overallConfidence: number ): VerificationResult { return { email, status, layers, overallConfidence }; }