Spaces:
Sleeping
Sleeping
| # app.py - Advanced Email Verifier (Optimized for 2026 Cloud Limitations + Excel Export) | |
| import re | |
| import socket | |
| import smtplib | |
| import pandas as pd | |
| import random | |
| import string | |
| import tempfile | |
| import os | |
| from typing import Dict | |
| import gradio as gr | |
| import dns.resolver | |
| # Cache for domain catch-all status | |
| domain_catchall_cache: Dict[str, bool] = {} | |
| def is_syntax_valid(email: str) -> bool: | |
| pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$' | |
| return bool(re.fullmatch(pattern, email.strip())) | |
| def get_mx_host(domain: str) -> str | None: | |
| try: | |
| answers = dns.resolver.resolve(domain, 'MX') | |
| if answers: | |
| mx = sorted(answers, key=lambda r: r.preference)[0] | |
| return str(mx.exchange).rstrip('.') | |
| except Exception: | |
| return None | |
| def smtp_rcpt_check(mx_host: str, email: str) -> str: | |
| """Returns: 'true', 'false', or 'unknown'""" | |
| try: | |
| with smtplib.SMTP(mx_host, port=25, timeout=15) as server: | |
| server.ehlo_or_helo_if_needed() | |
| server.mail('verifier@example.com') | |
| code, _ = server.rcpt(email) | |
| if code in (250, 251): | |
| return "true" | |
| elif code >= 500: | |
| return "false" | |
| else: | |
| return "unknown" | |
| except smtplib.SMTPRecipientsRefused: | |
| return "false" | |
| except Exception: | |
| return "unknown" | |
| def is_catchall_domain(domain: str, mx_host: str) -> bool: | |
| if domain in domain_catchall_cache: | |
| return domain_catchall_cache[domain] | |
| accepted_count = 0 | |
| for _ in range(2): | |
| random_local = ''.join(random.choices(string.ascii_lowercase + string.digits, k=16)) | |
| test_email = f"{random_local}@{domain}" | |
| if smtp_rcpt_check(mx_host, test_email) == "true": | |
| accepted_count += 1 | |
| is_catchall = accepted_count == 2 | |
| domain_catchall_cache[domain] = is_catchall | |
| return is_catchall | |
| def verify_email(email: str) -> Dict: | |
| email = email.strip().lower() | |
| result = { | |
| "email": email, | |
| "status": "Fake/Invalid", | |
| "comments": "" | |
| } | |
| if not is_syntax_valid(email): | |
| result["comments"] = "Invalid email format" | |
| return result | |
| domain = email.split('@')[1] | |
| mx_host = get_mx_host(domain) | |
| if not mx_host: | |
| result["comments"] = "No MX record - domain doesn't accept emails" | |
| return result | |
| # Check connectivity | |
| server_reachable = True | |
| try: | |
| socket.create_connection((mx_host, 25), timeout=10) | |
| except Exception: | |
| server_reachable = False | |
| big_providers = [ | |
| "gmail.com", "googlemail.com", | |
| "outlook.com", "hotmail.com", "live.com", "msn.com", | |
| "yahoo.com", "ymail.com", "rocketmail.com", | |
| "icloud.com", "me.com", "mac.com", | |
| "protonmail.com", "proton.me", | |
| "aol.com" | |
| ] | |
| is_big_provider = domain.endswith(tuple(big_providers)) | |
| if not server_reachable: | |
| if is_big_provider: | |
| result["status"] = "Likely Real" | |
| result["comments"] = "Major provider (Gmail/Outlook/etc.) blocks cloud verifiers β almost always valid" | |
| else: | |
| result["status"] = "Likely Fake" | |
| result["comments"] = "Mail server unreachable (may be blocked or down)" | |
| return result | |
| # Server is reachable β do full SMTP check | |
| mailbox_status = smtp_rcpt_check(mx_host, email) | |
| if mailbox_status == "true": | |
| result["status"] = "Real (Confirmed)" | |
| result["comments"] = "Server confirmed mailbox exists" | |
| elif mailbox_status == "false": | |
| result["status"] = "Fake (Rejected)" | |
| result["comments"] = "Server rejected - mailbox does not exist" | |
| else: # unknown | |
| if is_big_provider: | |
| result["status"] = "Likely Real" | |
| result["comments"] = "Inconclusive (major providers block deep verification)" | |
| else: | |
| if is_catchall_domain(domain, mx_host): | |
| result["status"] = "Deliverable (Catch-all)" | |
| result["comments"] = "Domain accepts all emails β deliverable but existence unknown" | |
| else: | |
| result["status"] = "Possibly Real" | |
| result["comments"] = "Inconclusive - server didn't confirm or reject" | |
| return result | |
| def verify_emails(input_text: str): | |
| emails = [e.strip() for e in input_text.splitlines() if e.strip()] | |
| if not emails: | |
| return None, "β οΈ Please enter at least one email.", None | |
| results = [verify_email(email) for email in emails] | |
| df = pd.DataFrame(results) | |
| # Count deliverable/likely | |
| good_statuses = df["status"].str.contains("Real|Deliverable|Likely", case=False) | |
| deliverable = len(df[good_statuses]) | |
| invalid = len(df) - deliverable | |
| summary = f"Processed: {len(emails)} | Deliverable/Likely Real: {deliverable} | Fake/Invalid: {invalid}" | |
| # Excel export | |
| df_to_save = df[["email", "status", "comments"]] | |
| temp_dir = tempfile.gettempdir() | |
| excel_filename = "email_verification_results.xlsx" | |
| excel_path = os.path.join(temp_dir, excel_filename) | |
| df_to_save.to_excel(excel_path, index=False, sheet_name="Results") | |
| return df_to_save, summary, excel_path | |
| # Gradio Interface | |
| with gr.Blocks(title="Advanced Email Verifier 2026", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# π‘οΈ Advanced Email Verifier (2026 Edition)") | |
| gr.Markdown(""" | |
| **Works great on custom/business domains** | |
| For Gmail, Outlook, Yahoo, etc.: Shows "Likely Real" because these providers block cloud-based verification tools (common in 2026). | |
| Paste emails (one per line) β get accurate results + Excel download. | |
| """) | |
| input_box = gr.Textbox( | |
| label="Emails to Verify", | |
| lines=15, | |
| placeholder="example@gmail.com\ninfo@business.com", | |
| info="One email per line" | |
| ) | |
| btn = gr.Button("π Verify Emails", variant="primary", size="lg") | |
| output_table = gr.Dataframe( | |
| label="Verification Results", | |
| headers=["Email", "Status", "Comments"], | |
| datatype=["str", "str", "str"], | |
| wrap=True | |
| ) | |
| summary_text = gr.Textbox(label="Summary", interactive=False) | |
| excel_download = gr.File( | |
| label="π₯ Download Results as Excel (.xlsx)", | |
| visible=False | |
| ) | |
| btn.click( | |
| fn=verify_emails, | |
| inputs=input_box, | |
| outputs=[output_table, summary_text, excel_download] | |
| ).then( | |
| fn=lambda: gr.update(visible=True), | |
| outputs=excel_download | |
| ) | |
| gr.Markdown(""" | |
| ### Status Meanings: | |
| - **Real (Confirmed)** β Server confirmed it exists | |
| - **Likely Real** β Gmail/Outlook/Yahoo/etc. or blocked server β 99% valid in practice | |
| - **Deliverable (Catch-all)** β Domain accepts everything | |
| - **Possibly Real** β Inconclusive on custom domain | |
| - **Fake (Rejected)** β Explicitly doesn't exist | |
| - **Fake/Invalid** β Bad format or no mail server | |
| π No emails sent β’ Runs on Hugging Face (cloud IPs blocked by big providers β hence "Likely Real") | |
| """) | |
| demo.launch() |