# app.py - Advanced Email Verifier (Optimized for 2026 Cloud Limitations + Excel Export) import re import socket import smtplib import pandas as pd import random import string import tempfile import os from typing import Dict import gradio as gr import dns.resolver # Cache for domain catch-all status domain_catchall_cache: Dict[str, bool] = {} def is_syntax_valid(email: str) -> bool: pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$' return bool(re.fullmatch(pattern, email.strip())) def get_mx_host(domain: str) -> str | None: try: answers = dns.resolver.resolve(domain, 'MX') if answers: mx = sorted(answers, key=lambda r: r.preference)[0] return str(mx.exchange).rstrip('.') except Exception: return None def smtp_rcpt_check(mx_host: str, email: str) -> str: """Returns: 'true', 'false', or 'unknown'""" try: with smtplib.SMTP(mx_host, port=25, timeout=15) as server: server.ehlo_or_helo_if_needed() server.mail('verifier@example.com') code, _ = server.rcpt(email) if code in (250, 251): return "true" elif code >= 500: return "false" else: return "unknown" except smtplib.SMTPRecipientsRefused: return "false" except Exception: return "unknown" def is_catchall_domain(domain: str, mx_host: str) -> bool: if domain in domain_catchall_cache: return domain_catchall_cache[domain] accepted_count = 0 for _ in range(2): random_local = ''.join(random.choices(string.ascii_lowercase + string.digits, k=16)) test_email = f"{random_local}@{domain}" if smtp_rcpt_check(mx_host, test_email) == "true": accepted_count += 1 is_catchall = accepted_count == 2 domain_catchall_cache[domain] = is_catchall return is_catchall def verify_email(email: str) -> Dict: email = email.strip().lower() result = { "email": email, "status": "Fake/Invalid", "comments": "" } if not is_syntax_valid(email): result["comments"] = "Invalid email format" return result domain = email.split('@')[1] mx_host = get_mx_host(domain) if not mx_host: result["comments"] = "No MX record - domain doesn't accept emails" return result # Check connectivity server_reachable = True try: socket.create_connection((mx_host, 25), timeout=10) except Exception: server_reachable = False big_providers = [ "gmail.com", "googlemail.com", "outlook.com", "hotmail.com", "live.com", "msn.com", "yahoo.com", "ymail.com", "rocketmail.com", "icloud.com", "me.com", "mac.com", "protonmail.com", "proton.me", "aol.com" ] is_big_provider = domain.endswith(tuple(big_providers)) if not server_reachable: if is_big_provider: result["status"] = "Likely Real" result["comments"] = "Major provider (Gmail/Outlook/etc.) blocks cloud verifiers → almost always valid" else: result["status"] = "Likely Fake" result["comments"] = "Mail server unreachable (may be blocked or down)" return result # Server is reachable → do full SMTP check mailbox_status = smtp_rcpt_check(mx_host, email) if mailbox_status == "true": result["status"] = "Real (Confirmed)" result["comments"] = "Server confirmed mailbox exists" elif mailbox_status == "false": result["status"] = "Fake (Rejected)" result["comments"] = "Server rejected - mailbox does not exist" else: # unknown if is_big_provider: result["status"] = "Likely Real" result["comments"] = "Inconclusive (major providers block deep verification)" else: if is_catchall_domain(domain, mx_host): result["status"] = "Deliverable (Catch-all)" result["comments"] = "Domain accepts all emails → deliverable but existence unknown" else: result["status"] = "Possibly Real" result["comments"] = "Inconclusive - server didn't confirm or reject" return result def verify_emails(input_text: str): emails = [e.strip() for e in input_text.splitlines() if e.strip()] if not emails: return None, "⚠️ Please enter at least one email.", None results = [verify_email(email) for email in emails] df = pd.DataFrame(results) # Count deliverable/likely good_statuses = df["status"].str.contains("Real|Deliverable|Likely", case=False) deliverable = len(df[good_statuses]) invalid = len(df) - deliverable summary = f"Processed: {len(emails)} | Deliverable/Likely Real: {deliverable} | Fake/Invalid: {invalid}" # Excel export df_to_save = df[["email", "status", "comments"]] temp_dir = tempfile.gettempdir() excel_filename = "email_verification_results.xlsx" excel_path = os.path.join(temp_dir, excel_filename) df_to_save.to_excel(excel_path, index=False, sheet_name="Results") return df_to_save, summary, excel_path # Gradio Interface with gr.Blocks(title="Advanced Email Verifier 2026", theme=gr.themes.Soft()) as demo: gr.Markdown("# 🛡️ Advanced Email Verifier (2026 Edition)") gr.Markdown(""" **Works great on custom/business domains** For Gmail, Outlook, Yahoo, etc.: Shows "Likely Real" because these providers block cloud-based verification tools (common in 2026). Paste emails (one per line) → get accurate results + Excel download. """) input_box = gr.Textbox( label="Emails to Verify", lines=15, placeholder="example@gmail.com\ninfo@business.com", info="One email per line" ) btn = gr.Button("🚀 Verify Emails", variant="primary", size="lg") output_table = gr.Dataframe( label="Verification Results", headers=["Email", "Status", "Comments"], datatype=["str", "str", "str"], wrap=True ) summary_text = gr.Textbox(label="Summary", interactive=False) excel_download = gr.File( label="📥 Download Results as Excel (.xlsx)", visible=False ) btn.click( fn=verify_emails, inputs=input_box, outputs=[output_table, summary_text, excel_download] ).then( fn=lambda: gr.update(visible=True), outputs=excel_download ) gr.Markdown(""" ### Status Meanings: - **Real (Confirmed)** → Server confirmed it exists - **Likely Real** → Gmail/Outlook/Yahoo/etc. or blocked server → 99% valid in practice - **Deliverable (Catch-all)** → Domain accepts everything - **Possibly Real** → Inconclusive on custom domain - **Fake (Rejected)** → Explicitly doesn't exist - **Fake/Invalid** → Bad format or no mail server 🔒 No emails sent • Runs on Hugging Face (cloud IPs blocked by big providers — hence "Likely Real") """) demo.launch()