Spaces:
Running
Running
| /** | |
| * 7-Layer Email Verification | |
| * | |
| * Layer 1: RFC 5322 format check (instant, free) | |
| * Layer 2: Domain ownership β email domain = company domain (instant, free) | |
| * Layer 3: MX record lookup (free, DNS) | |
| * Layer 4: Catch-all detection (Reoon API) | |
| * Layer 5: SMTP handshake β ask mail server "does this user exist?" (free, direct) | |
| * Layer 6: Disposable email check (free, local list) | |
| * Layer 7: Provider confidence score (Hunter/Snov score) | |
| * | |
| * Each layer produces a boolean. Final status is computed from all 7. | |
| */ | |
| import dns from "dns/promises"; | |
| import net from "net"; | |
| import axios from "axios"; | |
| import { getEnv } from "../../shared/config/env"; | |
| import { logger } from "../../shared/utils/logger"; | |
| export type EmailStatus = | |
| | "verified_deliverable" // all layers pass | |
| | "verified_catch_all" // valid but catch-all domain | |
| | "pattern_smtp_confirmed" // pattern-generated + SMTP confirmed | |
| | "uncertain" // some layers pass, some unknown | |
| | "rejected_invalid"; // hard failure | |
| export interface VerificationResult { | |
| email: string; | |
| status: EmailStatus; | |
| layers: { | |
| format: boolean; | |
| domainMatch: boolean; | |
| mxRecord: boolean; | |
| catchAll: boolean | null; // null = couldn't determine | |
| smtpHandshake: boolean | null; | |
| disposable: boolean; // true = IS disposable (bad) | |
| providerConfidence: number; // 0-100 from Hunter/Snov | |
| }; | |
| overallConfidence: number; // 0-100 computed from layers | |
| } | |
| /** | |
| * Run all 7 verification layers on an email. | |
| */ | |
| export async function verifyEmailDeep( | |
| email: string, | |
| companyDomain: string, | |
| providerConfidence: number = 0 | |
| ): Promise<VerificationResult> { | |
| const layers = { | |
| format: false, | |
| domainMatch: false, | |
| mxRecord: false, | |
| catchAll: null as boolean | null, | |
| smtpHandshake: null as boolean | null, | |
| disposable: false, | |
| providerConfidence, | |
| }; | |
| const emailDomain = email.split("@")[1]?.toLowerCase(); | |
| if (!emailDomain) { | |
| return makeResult(email, "rejected_invalid", layers, 0); | |
| } | |
| // ββ Layer 1: Format check ββββββββββββββββββββββββββββββββββ | |
| layers.format = isValidFormat(email); | |
| if (!layers.format) { | |
| return makeResult(email, "rejected_invalid", layers, 0); | |
| } | |
| // ββ Layer 2: Domain ownership ββββββββββββββββββββββββββββββ | |
| layers.domainMatch = isDomainMatch(emailDomain, companyDomain); | |
| if (!layers.domainMatch) { | |
| logger.warn({ email, emailDomain, companyDomain }, "Domain mismatch β rejecting"); | |
| return makeResult(email, "rejected_invalid", layers, 0); | |
| } | |
| // ββ Layer 3: MX record ββββββββββββββββββββββββββββββββββββ | |
| layers.mxRecord = await hasMxRecord(emailDomain); | |
| if (!layers.mxRecord) { | |
| return makeResult(email, "rejected_invalid", layers, 5); | |
| } | |
| // ββ Layer 4: Catch-all detection (Reoon) βββββββββββββββββββ | |
| layers.catchAll = await checkCatchAll(emailDomain); | |
| // ββ Layer 5: SMTP handshake βββββββββββββββββββββββββββββββββ | |
| layers.smtpHandshake = await smtpHandshake(email, emailDomain); | |
| // ββ Layer 6: Disposable check ββββββββββββββββββββββββββββββ | |
| layers.disposable = isDisposable(emailDomain); | |
| if (layers.disposable) { | |
| return makeResult(email, "rejected_invalid", layers, 0); | |
| } | |
| // ββ Layer 7: Provider confidence ββββββββββββββββββββββββββ | |
| // Already set from Hunter/Snov response | |
| // ββ Compute final status βββββββββββββββββββββββββββββββββββ | |
| return computeFinalStatus(email, layers); | |
| } | |
| // βββ Layer 1: RFC 5322 Format ββββββββββββββββββββββββββββββββ | |
| function isValidFormat(email: string): boolean { | |
| // Strict-ish RFC 5322 check | |
| const pattern = /^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/; | |
| if (!pattern.test(email)) return false; | |
| if (email.length > 254) return false; | |
| const local = email.split("@")[0]; | |
| if (local.length > 64) return false; | |
| if (local.startsWith(".") || local.endsWith(".")) return false; | |
| if (local.includes("..")) return false; | |
| return true; | |
| } | |
| // βββ Layer 2: Domain Match ββββββββββββββββββββββββββββββββββ | |
| function isDomainMatch(emailDomain: string, companyDomain: string): boolean { | |
| const normalize = (d: string) => d.toLowerCase().replace(/^www\./, "").trim(); | |
| const eDomain = normalize(emailDomain); | |
| const cDomain = normalize(companyDomain); | |
| // Exact match | |
| if (eDomain === cDomain) return true; | |
| // Subdomain match (e.g., mail.company.com β company.com) | |
| if (eDomain.endsWith(`.${cDomain}`)) return true; | |
| // Common email domain variants (company uses Google Workspace etc.) | |
| // This is fine β john@company.com matches company.com | |
| return false; | |
| } | |
| // βββ Layer 3: MX Record βββββββββββββββββββββββββββββββββββββ | |
| async function hasMxRecord(domain: string): Promise<boolean> { | |
| try { | |
| const records = await dns.resolveMx(domain); | |
| return records.length > 0; | |
| } catch { | |
| return false; | |
| } | |
| } | |
| // βββ Layer 4: Catch-All Detection (CREDIT-OPTIMIZED) ββββββββ | |
| // Strategy: Try FREE SMTP probe first β only use Reoon if SMTP can't determine | |
| // This saves Reoon credits (only 20/day) for when they're truly needed | |
| let _reoonUsedToday = 0; | |
| let _reoonResetDate = new Date().toDateString(); | |
| const REOON_DAILY_LIMIT = 18; // keep 2 credits as buffer | |
| async function checkCatchAll(domain: string): Promise<boolean | null> { | |
| // ββ Attempt 1: FREE SMTP catch-all probe βββββββββββββββββββ | |
| // Send RCPT TO with a random gibberish address. | |
| // If server accepts it β catch-all. If 550 β NOT catch-all. | |
| try { | |
| const fakeEmail = `xqz7k2m4n_test_${Date.now() % 10000}@${domain}`; | |
| const smtpResult = await smtpHandshake(fakeEmail, domain); | |
| if (smtpResult === true) { | |
| // Server accepted gibberish email β CATCH-ALL | |
| logger.debug({ domain }, "Catch-all detected via FREE SMTP probe (Reoon credit saved)"); | |
| return true; | |
| } | |
| if (smtpResult === false) { | |
| // Server rejected gibberish email β NOT catch-all | |
| logger.debug({ domain }, "NOT catch-all β confirmed via FREE SMTP probe"); | |
| return false; | |
| } | |
| // smtpResult === null β SMTP couldn't determine, fall through to Reoon | |
| } catch { | |
| // SMTP probe failed, fall through to Reoon | |
| } | |
| // ββ Attempt 2: Reoon API (only if SMTP couldn't determine) β | |
| // Reset counter if new day | |
| const today = new Date().toDateString(); | |
| if (_reoonResetDate !== today) { | |
| _reoonUsedToday = 0; | |
| _reoonResetDate = today; | |
| } | |
| // Check budget | |
| if (_reoonUsedToday >= REOON_DAILY_LIMIT) { | |
| logger.warn({ domain, used: _reoonUsedToday }, "Reoon daily limit reached β skipping"); | |
| return null; | |
| } | |
| try { | |
| const env = getEnv(); | |
| _reoonUsedToday++; | |
| const response = await axios.get("https://emailverifier.reoon.com/api/v1/verify", { | |
| params: { | |
| email: `definitely_not_real_${Date.now()}@${domain}`, | |
| key: env.REOON_API_KEY, | |
| mode: "quick", | |
| }, | |
| timeout: 8_000, | |
| }); | |
| logger.debug({ domain, reoonUsed: _reoonUsedToday }, "Reoon credit used for catch-all check"); | |
| return response.data?.status === "valid"; | |
| } catch { | |
| return null; | |
| } | |
| } | |
| // βββ Layer 5: SMTP Handshake ββββββββββββββββββββββββββββββββ | |
| async function smtpHandshake(email: string, domain: string): Promise<boolean | null> { | |
| try { | |
| // Resolve MX to get mail server | |
| const mxRecords = await dns.resolveMx(domain); | |
| if (!mxRecords.length) return null; | |
| // Pick highest priority (lowest number) | |
| const mailServer = mxRecords.sort((a, b) => a.priority - b.priority)[0].exchange; | |
| return new Promise((resolve) => { | |
| const socket = new net.Socket(); | |
| let step = 0; | |
| let result = false; | |
| const timeout = setTimeout(() => { | |
| socket.destroy(); | |
| resolve(null); | |
| }, 10_000); | |
| socket.connect(25, mailServer, () => { | |
| // Connected to mail server | |
| }); | |
| socket.on("data", (data) => { | |
| const response = data.toString(); | |
| if (step === 0 && response.startsWith("220")) { | |
| // Server greeting β send EHLO | |
| socket.write("EHLO verify.local\r\n"); | |
| step = 1; | |
| } else if (step === 1 && response.startsWith("250")) { | |
| // EHLO accepted β send MAIL FROM | |
| socket.write("MAIL FROM:<verify@verify.local>\r\n"); | |
| step = 2; | |
| } else if (step === 2 && response.startsWith("250")) { | |
| // MAIL FROM accepted β send RCPT TO (the actual check) | |
| socket.write(`RCPT TO:<${email}>\r\n`); | |
| step = 3; | |
| } else if (step === 3) { | |
| if (response.startsWith("250")) { | |
| result = true; // 250 = user exists! | |
| } else if (response.startsWith("550") || response.startsWith("553")) { | |
| result = false; // 550 = user doesn't exist | |
| } | |
| // Cleanup | |
| socket.write("QUIT\r\n"); | |
| clearTimeout(timeout); | |
| socket.destroy(); | |
| resolve(result); | |
| } | |
| }); | |
| socket.on("error", () => { | |
| clearTimeout(timeout); | |
| resolve(null); // can't determine | |
| }); | |
| }); | |
| } catch { | |
| return null; // can't determine | |
| } | |
| } | |
| // βββ Layer 6: Disposable Email ββββββββββββββββββββββββββββββ | |
| const DISPOSABLE_DOMAINS = new Set([ | |
| "mailinator.com", "tempmail.com", "throwaway.email", "guerrillamail.com", | |
| "guerrillamail.info", "yopmail.com", "trashmail.com", "maildrop.cc", | |
| "10minutemail.com", "temp-mail.org", "fakeinbox.com", "sharklasers.com", | |
| "guerrillamail.net", "grr.la", "dispostable.com", "tempr.email", | |
| "mohmal.com", "burpcollaborator.net", "mailnesia.com", | |
| ]); | |
| function isDisposable(domain: string): boolean { | |
| return DISPOSABLE_DOMAINS.has(domain.toLowerCase()); | |
| } | |
| // βββ Final Status Computation ββββββββββββββββββββββββββββββββ | |
| function computeFinalStatus( | |
| email: string, | |
| layers: VerificationResult["layers"] | |
| ): VerificationResult { | |
| // All layers pass (including SMTP) | |
| if (layers.format && layers.domainMatch && layers.mxRecord && | |
| layers.smtpHandshake === true && !layers.disposable && !layers.catchAll) { | |
| const confidence = Math.min( | |
| 95, | |
| 60 + (layers.providerConfidence > 0 ? Math.round(layers.providerConfidence * 0.35) : 15) | |
| ); | |
| return makeResult(email, "verified_deliverable", layers, confidence); | |
| } | |
| // Catch-all domain β uncertain but not invalid | |
| if (layers.catchAll === true && layers.mxRecord) { | |
| return makeResult(email, "verified_catch_all", layers, 45); | |
| } | |
| // SMTP confirmed but no provider data | |
| if (layers.smtpHandshake === true && layers.providerConfidence === 0) { | |
| return makeResult(email, "pattern_smtp_confirmed", layers, 70); | |
| } | |
| // MX exists, provider says good, SMTP unknown | |
| if (layers.mxRecord && layers.providerConfidence >= 70 && layers.smtpHandshake === null) { | |
| return makeResult(email, "verified_deliverable", layers, layers.providerConfidence); | |
| } | |
| // MX exists but everything else uncertain | |
| if (layers.mxRecord && !layers.disposable) { | |
| return makeResult(email, "uncertain", layers, 30); | |
| } | |
| return makeResult(email, "rejected_invalid", layers, 0); | |
| } | |
| function makeResult( | |
| email: string, | |
| status: EmailStatus, | |
| layers: VerificationResult["layers"], | |
| overallConfidence: number | |
| ): VerificationResult { | |
| return { email, status, layers, overallConfidence }; | |
| } | |