File size: 12,333 Bytes
bd28470
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
/**
 * 7-Layer Email Verification
 * 
 * Layer 1: RFC 5322 format check (instant, free)
 * Layer 2: Domain ownership β€” email domain = company domain (instant, free)
 * Layer 3: MX record lookup (free, DNS)
 * Layer 4: Catch-all detection (Reoon API)
 * Layer 5: SMTP handshake β€” ask mail server "does this user exist?" (free, direct)
 * Layer 6: Disposable email check (free, local list)
 * Layer 7: Provider confidence score (Hunter/Snov score)
 * 
 * Each layer produces a boolean. Final status is computed from all 7.
 */

import dns from "dns/promises";
import net from "net";
import axios from "axios";
import { getEnv } from "../../shared/config/env";
import { logger } from "../../shared/utils/logger";

export type EmailStatus =
  | "verified_deliverable"     // all layers pass
  | "verified_catch_all"       // valid but catch-all domain
  | "pattern_smtp_confirmed"   // pattern-generated + SMTP confirmed
  | "uncertain"                // some layers pass, some unknown
  | "rejected_invalid";        // hard failure

export interface VerificationResult {
  email: string;
  status: EmailStatus;
  layers: {
    format: boolean;
    domainMatch: boolean;
    mxRecord: boolean;
    catchAll: boolean | null;   // null = couldn't determine
    smtpHandshake: boolean | null;
    disposable: boolean;        // true = IS disposable (bad)
    providerConfidence: number; // 0-100 from Hunter/Snov
  };
  overallConfidence: number;    // 0-100 computed from layers
}

/**
 * Run all 7 verification layers on an email.
 */
export async function verifyEmailDeep(
  email: string,
  companyDomain: string,
  providerConfidence: number = 0
): Promise<VerificationResult> {
  const layers = {
    format: false,
    domainMatch: false,
    mxRecord: false,
    catchAll: null as boolean | null,
    smtpHandshake: null as boolean | null,
    disposable: false,
    providerConfidence,
  };

  const emailDomain = email.split("@")[1]?.toLowerCase();
  if (!emailDomain) {
    return makeResult(email, "rejected_invalid", layers, 0);
  }

  // ── Layer 1: Format check ──────────────────────────────────
  layers.format = isValidFormat(email);
  if (!layers.format) {
    return makeResult(email, "rejected_invalid", layers, 0);
  }

  // ── Layer 2: Domain ownership ──────────────────────────────
  layers.domainMatch = isDomainMatch(emailDomain, companyDomain);
  if (!layers.domainMatch) {
    logger.warn({ email, emailDomain, companyDomain }, "Domain mismatch β€” rejecting");
    return makeResult(email, "rejected_invalid", layers, 0);
  }

  // ── Layer 3: MX record ────────────────────────────────────
  layers.mxRecord = await hasMxRecord(emailDomain);
  if (!layers.mxRecord) {
    return makeResult(email, "rejected_invalid", layers, 5);
  }

  // ── Layer 4: Catch-all detection (Reoon) ───────────────────
  layers.catchAll = await checkCatchAll(emailDomain);

  // ── Layer 5: SMTP handshake ─────────────────────────────────
  layers.smtpHandshake = await smtpHandshake(email, emailDomain);

  // ── Layer 6: Disposable check ──────────────────────────────
  layers.disposable = isDisposable(emailDomain);
  if (layers.disposable) {
    return makeResult(email, "rejected_invalid", layers, 0);
  }

  // ── Layer 7: Provider confidence ──────────────────────────
  // Already set from Hunter/Snov response

  // ── Compute final status ───────────────────────────────────
  return computeFinalStatus(email, layers);
}

// ─── Layer 1: RFC 5322 Format ────────────────────────────────

function isValidFormat(email: string): boolean {
  // Strict-ish RFC 5322 check
  const pattern = /^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/;
  
  if (!pattern.test(email)) return false;
  if (email.length > 254) return false;
  
  const local = email.split("@")[0];
  if (local.length > 64) return false;
  if (local.startsWith(".") || local.endsWith(".")) return false;
  if (local.includes("..")) return false;
  
  return true;
}

// ─── Layer 2: Domain Match ──────────────────────────────────

function isDomainMatch(emailDomain: string, companyDomain: string): boolean {
  const normalize = (d: string) => d.toLowerCase().replace(/^www\./, "").trim();
  const eDomain = normalize(emailDomain);
  const cDomain = normalize(companyDomain);

  // Exact match
  if (eDomain === cDomain) return true;

  // Subdomain match (e.g., mail.company.com β†’ company.com)
  if (eDomain.endsWith(`.${cDomain}`)) return true;

  // Common email domain variants (company uses Google Workspace etc.)
  // This is fine β€” john@company.com matches company.com
  return false;
}

// ─── Layer 3: MX Record ─────────────────────────────────────

async function hasMxRecord(domain: string): Promise<boolean> {
  try {
    const records = await dns.resolveMx(domain);
    return records.length > 0;
  } catch {
    return false;
  }
}

// ─── Layer 4: Catch-All Detection (CREDIT-OPTIMIZED) ────────
// Strategy: Try FREE SMTP probe first β†’ only use Reoon if SMTP can't determine
// This saves Reoon credits (only 20/day) for when they're truly needed

let _reoonUsedToday = 0;
let _reoonResetDate = new Date().toDateString();
const REOON_DAILY_LIMIT = 18; // keep 2 credits as buffer

async function checkCatchAll(domain: string): Promise<boolean | null> {
  // ── Attempt 1: FREE SMTP catch-all probe ───────────────────
  // Send RCPT TO with a random gibberish address.
  // If server accepts it β†’ catch-all. If 550 β†’ NOT catch-all.
  try {
    const fakeEmail = `xqz7k2m4n_test_${Date.now() % 10000}@${domain}`;
    const smtpResult = await smtpHandshake(fakeEmail, domain);
    
    if (smtpResult === true) {
      // Server accepted gibberish email β†’ CATCH-ALL
      logger.debug({ domain }, "Catch-all detected via FREE SMTP probe (Reoon credit saved)");
      return true;
    }
    if (smtpResult === false) {
      // Server rejected gibberish email β†’ NOT catch-all
      logger.debug({ domain }, "NOT catch-all β€” confirmed via FREE SMTP probe");
      return false;
    }
    // smtpResult === null β†’ SMTP couldn't determine, fall through to Reoon
  } catch {
    // SMTP probe failed, fall through to Reoon
  }

  // ── Attempt 2: Reoon API (only if SMTP couldn't determine) ─
  // Reset counter if new day
  const today = new Date().toDateString();
  if (_reoonResetDate !== today) {
    _reoonUsedToday = 0;
    _reoonResetDate = today;
  }

  // Check budget
  if (_reoonUsedToday >= REOON_DAILY_LIMIT) {
    logger.warn({ domain, used: _reoonUsedToday }, "Reoon daily limit reached β€” skipping");
    return null;
  }

  try {
    const env = getEnv();
    _reoonUsedToday++;
    
    const response = await axios.get("https://emailverifier.reoon.com/api/v1/verify", {
      params: {
        email: `definitely_not_real_${Date.now()}@${domain}`,
        key: env.REOON_API_KEY,
        mode: "quick",
      },
      timeout: 8_000,
    });

    logger.debug({ domain, reoonUsed: _reoonUsedToday }, "Reoon credit used for catch-all check");
    return response.data?.status === "valid";
  } catch {
    return null;
  }
}

// ─── Layer 5: SMTP Handshake ────────────────────────────────

async function smtpHandshake(email: string, domain: string): Promise<boolean | null> {
  try {
    // Resolve MX to get mail server
    const mxRecords = await dns.resolveMx(domain);
    if (!mxRecords.length) return null;
    
    // Pick highest priority (lowest number)
    const mailServer = mxRecords.sort((a, b) => a.priority - b.priority)[0].exchange;

    return new Promise((resolve) => {
      const socket = new net.Socket();
      let step = 0;
      let result = false;
      const timeout = setTimeout(() => {
        socket.destroy();
        resolve(null);
      }, 10_000);

      socket.connect(25, mailServer, () => {
        // Connected to mail server
      });

      socket.on("data", (data) => {
        const response = data.toString();

        if (step === 0 && response.startsWith("220")) {
          // Server greeting β†’ send EHLO
          socket.write("EHLO verify.local\r\n");
          step = 1;
        } else if (step === 1 && response.startsWith("250")) {
          // EHLO accepted β†’ send MAIL FROM
          socket.write("MAIL FROM:<verify@verify.local>\r\n");
          step = 2;
        } else if (step === 2 && response.startsWith("250")) {
          // MAIL FROM accepted β†’ send RCPT TO (the actual check)
          socket.write(`RCPT TO:<${email}>\r\n`);
          step = 3;
        } else if (step === 3) {
          if (response.startsWith("250")) {
            result = true;  // 250 = user exists!
          } else if (response.startsWith("550") || response.startsWith("553")) {
            result = false; // 550 = user doesn't exist
          }
          // Cleanup
          socket.write("QUIT\r\n");
          clearTimeout(timeout);
          socket.destroy();
          resolve(result);
        }
      });

      socket.on("error", () => {
        clearTimeout(timeout);
        resolve(null); // can't determine
      });
    });
  } catch {
    return null; // can't determine
  }
}

// ─── Layer 6: Disposable Email ──────────────────────────────

const DISPOSABLE_DOMAINS = new Set([
  "mailinator.com", "tempmail.com", "throwaway.email", "guerrillamail.com",
  "guerrillamail.info", "yopmail.com", "trashmail.com", "maildrop.cc",
  "10minutemail.com", "temp-mail.org", "fakeinbox.com", "sharklasers.com",
  "guerrillamail.net", "grr.la", "dispostable.com", "tempr.email",
  "mohmal.com", "burpcollaborator.net", "mailnesia.com",
]);

function isDisposable(domain: string): boolean {
  return DISPOSABLE_DOMAINS.has(domain.toLowerCase());
}

// ─── Final Status Computation ────────────────────────────────

function computeFinalStatus(
  email: string,
  layers: VerificationResult["layers"]
): VerificationResult {
  // All layers pass (including SMTP)
  if (layers.format && layers.domainMatch && layers.mxRecord &&
      layers.smtpHandshake === true && !layers.disposable && !layers.catchAll) {
    const confidence = Math.min(
      95,
      60 + (layers.providerConfidence > 0 ? Math.round(layers.providerConfidence * 0.35) : 15)
    );
    return makeResult(email, "verified_deliverable", layers, confidence);
  }

  // Catch-all domain β€” uncertain but not invalid
  if (layers.catchAll === true && layers.mxRecord) {
    return makeResult(email, "verified_catch_all", layers, 45);
  }

  // SMTP confirmed but no provider data
  if (layers.smtpHandshake === true && layers.providerConfidence === 0) {
    return makeResult(email, "pattern_smtp_confirmed", layers, 70);
  }

  // MX exists, provider says good, SMTP unknown
  if (layers.mxRecord && layers.providerConfidence >= 70 && layers.smtpHandshake === null) {
    return makeResult(email, "verified_deliverable", layers, layers.providerConfidence);
  }

  // MX exists but everything else uncertain
  if (layers.mxRecord && !layers.disposable) {
    return makeResult(email, "uncertain", layers, 30);
  }

  return makeResult(email, "rejected_invalid", layers, 0);
}

function makeResult(
  email: string,
  status: EmailStatus,
  layers: VerificationResult["layers"],
  overallConfidence: number
): VerificationResult {
  return { email, status, layers, overallConfidence };
}