clienttarget-python / src /discovery /lib /email-verifier.ts
iDevBuddy
feat: Phase 1 β€” AI Client Acquisition System
bd28470
/**
* 7-Layer Email Verification
*
* Layer 1: RFC 5322 format check (instant, free)
* Layer 2: Domain ownership β€” email domain = company domain (instant, free)
* Layer 3: MX record lookup (free, DNS)
* Layer 4: Catch-all detection (Reoon API)
* Layer 5: SMTP handshake β€” ask mail server "does this user exist?" (free, direct)
* Layer 6: Disposable email check (free, local list)
* Layer 7: Provider confidence score (Hunter/Snov score)
*
* Each layer produces a boolean. Final status is computed from all 7.
*/
import dns from "dns/promises";
import net from "net";
import axios from "axios";
import { getEnv } from "../../shared/config/env";
import { logger } from "../../shared/utils/logger";
export type EmailStatus =
| "verified_deliverable" // all layers pass
| "verified_catch_all" // valid but catch-all domain
| "pattern_smtp_confirmed" // pattern-generated + SMTP confirmed
| "uncertain" // some layers pass, some unknown
| "rejected_invalid"; // hard failure
export interface VerificationResult {
email: string;
status: EmailStatus;
layers: {
format: boolean;
domainMatch: boolean;
mxRecord: boolean;
catchAll: boolean | null; // null = couldn't determine
smtpHandshake: boolean | null;
disposable: boolean; // true = IS disposable (bad)
providerConfidence: number; // 0-100 from Hunter/Snov
};
overallConfidence: number; // 0-100 computed from layers
}
/**
* Run all 7 verification layers on an email.
*/
export async function verifyEmailDeep(
email: string,
companyDomain: string,
providerConfidence: number = 0
): Promise<VerificationResult> {
const layers = {
format: false,
domainMatch: false,
mxRecord: false,
catchAll: null as boolean | null,
smtpHandshake: null as boolean | null,
disposable: false,
providerConfidence,
};
const emailDomain = email.split("@")[1]?.toLowerCase();
if (!emailDomain) {
return makeResult(email, "rejected_invalid", layers, 0);
}
// ── Layer 1: Format check ──────────────────────────────────
layers.format = isValidFormat(email);
if (!layers.format) {
return makeResult(email, "rejected_invalid", layers, 0);
}
// ── Layer 2: Domain ownership ──────────────────────────────
layers.domainMatch = isDomainMatch(emailDomain, companyDomain);
if (!layers.domainMatch) {
logger.warn({ email, emailDomain, companyDomain }, "Domain mismatch β€” rejecting");
return makeResult(email, "rejected_invalid", layers, 0);
}
// ── Layer 3: MX record ────────────────────────────────────
layers.mxRecord = await hasMxRecord(emailDomain);
if (!layers.mxRecord) {
return makeResult(email, "rejected_invalid", layers, 5);
}
// ── Layer 4: Catch-all detection (Reoon) ───────────────────
layers.catchAll = await checkCatchAll(emailDomain);
// ── Layer 5: SMTP handshake ─────────────────────────────────
layers.smtpHandshake = await smtpHandshake(email, emailDomain);
// ── Layer 6: Disposable check ──────────────────────────────
layers.disposable = isDisposable(emailDomain);
if (layers.disposable) {
return makeResult(email, "rejected_invalid", layers, 0);
}
// ── Layer 7: Provider confidence ──────────────────────────
// Already set from Hunter/Snov response
// ── Compute final status ───────────────────────────────────
return computeFinalStatus(email, layers);
}
// ─── Layer 1: RFC 5322 Format ────────────────────────────────
function isValidFormat(email: string): boolean {
// Strict-ish RFC 5322 check
const pattern = /^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/;
if (!pattern.test(email)) return false;
if (email.length > 254) return false;
const local = email.split("@")[0];
if (local.length > 64) return false;
if (local.startsWith(".") || local.endsWith(".")) return false;
if (local.includes("..")) return false;
return true;
}
// ─── Layer 2: Domain Match ──────────────────────────────────
function isDomainMatch(emailDomain: string, companyDomain: string): boolean {
const normalize = (d: string) => d.toLowerCase().replace(/^www\./, "").trim();
const eDomain = normalize(emailDomain);
const cDomain = normalize(companyDomain);
// Exact match
if (eDomain === cDomain) return true;
// Subdomain match (e.g., mail.company.com β†’ company.com)
if (eDomain.endsWith(`.${cDomain}`)) return true;
// Common email domain variants (company uses Google Workspace etc.)
// This is fine β€” john@company.com matches company.com
return false;
}
// ─── Layer 3: MX Record ─────────────────────────────────────
async function hasMxRecord(domain: string): Promise<boolean> {
try {
const records = await dns.resolveMx(domain);
return records.length > 0;
} catch {
return false;
}
}
// ─── Layer 4: Catch-All Detection (CREDIT-OPTIMIZED) ────────
// Strategy: Try FREE SMTP probe first β†’ only use Reoon if SMTP can't determine
// This saves Reoon credits (only 20/day) for when they're truly needed
let _reoonUsedToday = 0;
let _reoonResetDate = new Date().toDateString();
const REOON_DAILY_LIMIT = 18; // keep 2 credits as buffer
async function checkCatchAll(domain: string): Promise<boolean | null> {
// ── Attempt 1: FREE SMTP catch-all probe ───────────────────
// Send RCPT TO with a random gibberish address.
// If server accepts it β†’ catch-all. If 550 β†’ NOT catch-all.
try {
const fakeEmail = `xqz7k2m4n_test_${Date.now() % 10000}@${domain}`;
const smtpResult = await smtpHandshake(fakeEmail, domain);
if (smtpResult === true) {
// Server accepted gibberish email β†’ CATCH-ALL
logger.debug({ domain }, "Catch-all detected via FREE SMTP probe (Reoon credit saved)");
return true;
}
if (smtpResult === false) {
// Server rejected gibberish email β†’ NOT catch-all
logger.debug({ domain }, "NOT catch-all β€” confirmed via FREE SMTP probe");
return false;
}
// smtpResult === null β†’ SMTP couldn't determine, fall through to Reoon
} catch {
// SMTP probe failed, fall through to Reoon
}
// ── Attempt 2: Reoon API (only if SMTP couldn't determine) ─
// Reset counter if new day
const today = new Date().toDateString();
if (_reoonResetDate !== today) {
_reoonUsedToday = 0;
_reoonResetDate = today;
}
// Check budget
if (_reoonUsedToday >= REOON_DAILY_LIMIT) {
logger.warn({ domain, used: _reoonUsedToday }, "Reoon daily limit reached β€” skipping");
return null;
}
try {
const env = getEnv();
_reoonUsedToday++;
const response = await axios.get("https://emailverifier.reoon.com/api/v1/verify", {
params: {
email: `definitely_not_real_${Date.now()}@${domain}`,
key: env.REOON_API_KEY,
mode: "quick",
},
timeout: 8_000,
});
logger.debug({ domain, reoonUsed: _reoonUsedToday }, "Reoon credit used for catch-all check");
return response.data?.status === "valid";
} catch {
return null;
}
}
// ─── Layer 5: SMTP Handshake ────────────────────────────────
async function smtpHandshake(email: string, domain: string): Promise<boolean | null> {
try {
// Resolve MX to get mail server
const mxRecords = await dns.resolveMx(domain);
if (!mxRecords.length) return null;
// Pick highest priority (lowest number)
const mailServer = mxRecords.sort((a, b) => a.priority - b.priority)[0].exchange;
return new Promise((resolve) => {
const socket = new net.Socket();
let step = 0;
let result = false;
const timeout = setTimeout(() => {
socket.destroy();
resolve(null);
}, 10_000);
socket.connect(25, mailServer, () => {
// Connected to mail server
});
socket.on("data", (data) => {
const response = data.toString();
if (step === 0 && response.startsWith("220")) {
// Server greeting β†’ send EHLO
socket.write("EHLO verify.local\r\n");
step = 1;
} else if (step === 1 && response.startsWith("250")) {
// EHLO accepted β†’ send MAIL FROM
socket.write("MAIL FROM:<verify@verify.local>\r\n");
step = 2;
} else if (step === 2 && response.startsWith("250")) {
// MAIL FROM accepted β†’ send RCPT TO (the actual check)
socket.write(`RCPT TO:<${email}>\r\n`);
step = 3;
} else if (step === 3) {
if (response.startsWith("250")) {
result = true; // 250 = user exists!
} else if (response.startsWith("550") || response.startsWith("553")) {
result = false; // 550 = user doesn't exist
}
// Cleanup
socket.write("QUIT\r\n");
clearTimeout(timeout);
socket.destroy();
resolve(result);
}
});
socket.on("error", () => {
clearTimeout(timeout);
resolve(null); // can't determine
});
});
} catch {
return null; // can't determine
}
}
// ─── Layer 6: Disposable Email ──────────────────────────────
const DISPOSABLE_DOMAINS = new Set([
"mailinator.com", "tempmail.com", "throwaway.email", "guerrillamail.com",
"guerrillamail.info", "yopmail.com", "trashmail.com", "maildrop.cc",
"10minutemail.com", "temp-mail.org", "fakeinbox.com", "sharklasers.com",
"guerrillamail.net", "grr.la", "dispostable.com", "tempr.email",
"mohmal.com", "burpcollaborator.net", "mailnesia.com",
]);
function isDisposable(domain: string): boolean {
return DISPOSABLE_DOMAINS.has(domain.toLowerCase());
}
// ─── Final Status Computation ────────────────────────────────
function computeFinalStatus(
email: string,
layers: VerificationResult["layers"]
): VerificationResult {
// All layers pass (including SMTP)
if (layers.format && layers.domainMatch && layers.mxRecord &&
layers.smtpHandshake === true && !layers.disposable && !layers.catchAll) {
const confidence = Math.min(
95,
60 + (layers.providerConfidence > 0 ? Math.round(layers.providerConfidence * 0.35) : 15)
);
return makeResult(email, "verified_deliverable", layers, confidence);
}
// Catch-all domain β€” uncertain but not invalid
if (layers.catchAll === true && layers.mxRecord) {
return makeResult(email, "verified_catch_all", layers, 45);
}
// SMTP confirmed but no provider data
if (layers.smtpHandshake === true && layers.providerConfidence === 0) {
return makeResult(email, "pattern_smtp_confirmed", layers, 70);
}
// MX exists, provider says good, SMTP unknown
if (layers.mxRecord && layers.providerConfidence >= 70 && layers.smtpHandshake === null) {
return makeResult(email, "verified_deliverable", layers, layers.providerConfidence);
}
// MX exists but everything else uncertain
if (layers.mxRecord && !layers.disposable) {
return makeResult(email, "uncertain", layers, 30);
}
return makeResult(email, "rejected_invalid", layers, 0);
}
function makeResult(
email: string,
status: EmailStatus,
layers: VerificationResult["layers"],
overallConfidence: number
): VerificationResult {
return { email, status, layers, overallConfidence };
}