Email_Verifier / app.py
Amaanali01's picture
Update app.py
e09d341 verified
# app.py - Advanced Email Verifier (Optimized for 2026 Cloud Limitations + Excel Export)
import re
import socket
import smtplib
import pandas as pd
import random
import string
import tempfile
import os
from typing import Dict
import gradio as gr
import dns.resolver
# Cache for domain catch-all status
domain_catchall_cache: Dict[str, bool] = {}
def is_syntax_valid(email: str) -> bool:
pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
return bool(re.fullmatch(pattern, email.strip()))
def get_mx_host(domain: str) -> str | None:
try:
answers = dns.resolver.resolve(domain, 'MX')
if answers:
mx = sorted(answers, key=lambda r: r.preference)[0]
return str(mx.exchange).rstrip('.')
except Exception:
return None
def smtp_rcpt_check(mx_host: str, email: str) -> str:
"""Returns: 'true', 'false', or 'unknown'"""
try:
with smtplib.SMTP(mx_host, port=25, timeout=15) as server:
server.ehlo_or_helo_if_needed()
server.mail('verifier@example.com')
code, _ = server.rcpt(email)
if code in (250, 251):
return "true"
elif code >= 500:
return "false"
else:
return "unknown"
except smtplib.SMTPRecipientsRefused:
return "false"
except Exception:
return "unknown"
def is_catchall_domain(domain: str, mx_host: str) -> bool:
if domain in domain_catchall_cache:
return domain_catchall_cache[domain]
accepted_count = 0
for _ in range(2):
random_local = ''.join(random.choices(string.ascii_lowercase + string.digits, k=16))
test_email = f"{random_local}@{domain}"
if smtp_rcpt_check(mx_host, test_email) == "true":
accepted_count += 1
is_catchall = accepted_count == 2
domain_catchall_cache[domain] = is_catchall
return is_catchall
def verify_email(email: str) -> Dict:
email = email.strip().lower()
result = {
"email": email,
"status": "Fake/Invalid",
"comments": ""
}
if not is_syntax_valid(email):
result["comments"] = "Invalid email format"
return result
domain = email.split('@')[1]
mx_host = get_mx_host(domain)
if not mx_host:
result["comments"] = "No MX record - domain doesn't accept emails"
return result
# Check connectivity
server_reachable = True
try:
socket.create_connection((mx_host, 25), timeout=10)
except Exception:
server_reachable = False
big_providers = [
"gmail.com", "googlemail.com",
"outlook.com", "hotmail.com", "live.com", "msn.com",
"yahoo.com", "ymail.com", "rocketmail.com",
"icloud.com", "me.com", "mac.com",
"protonmail.com", "proton.me",
"aol.com"
]
is_big_provider = domain.endswith(tuple(big_providers))
if not server_reachable:
if is_big_provider:
result["status"] = "Likely Real"
result["comments"] = "Major provider (Gmail/Outlook/etc.) blocks cloud verifiers β†’ almost always valid"
else:
result["status"] = "Likely Fake"
result["comments"] = "Mail server unreachable (may be blocked or down)"
return result
# Server is reachable β†’ do full SMTP check
mailbox_status = smtp_rcpt_check(mx_host, email)
if mailbox_status == "true":
result["status"] = "Real (Confirmed)"
result["comments"] = "Server confirmed mailbox exists"
elif mailbox_status == "false":
result["status"] = "Fake (Rejected)"
result["comments"] = "Server rejected - mailbox does not exist"
else: # unknown
if is_big_provider:
result["status"] = "Likely Real"
result["comments"] = "Inconclusive (major providers block deep verification)"
else:
if is_catchall_domain(domain, mx_host):
result["status"] = "Deliverable (Catch-all)"
result["comments"] = "Domain accepts all emails β†’ deliverable but existence unknown"
else:
result["status"] = "Possibly Real"
result["comments"] = "Inconclusive - server didn't confirm or reject"
return result
def verify_emails(input_text: str):
emails = [e.strip() for e in input_text.splitlines() if e.strip()]
if not emails:
return None, "⚠️ Please enter at least one email.", None
results = [verify_email(email) for email in emails]
df = pd.DataFrame(results)
# Count deliverable/likely
good_statuses = df["status"].str.contains("Real|Deliverable|Likely", case=False)
deliverable = len(df[good_statuses])
invalid = len(df) - deliverable
summary = f"Processed: {len(emails)} | Deliverable/Likely Real: {deliverable} | Fake/Invalid: {invalid}"
# Excel export
df_to_save = df[["email", "status", "comments"]]
temp_dir = tempfile.gettempdir()
excel_filename = "email_verification_results.xlsx"
excel_path = os.path.join(temp_dir, excel_filename)
df_to_save.to_excel(excel_path, index=False, sheet_name="Results")
return df_to_save, summary, excel_path
# Gradio Interface
with gr.Blocks(title="Advanced Email Verifier 2026", theme=gr.themes.Soft()) as demo:
gr.Markdown("# πŸ›‘οΈ Advanced Email Verifier (2026 Edition)")
gr.Markdown("""
**Works great on custom/business domains**
For Gmail, Outlook, Yahoo, etc.: Shows "Likely Real" because these providers block cloud-based verification tools (common in 2026).
Paste emails (one per line) β†’ get accurate results + Excel download.
""")
input_box = gr.Textbox(
label="Emails to Verify",
lines=15,
placeholder="example@gmail.com\ninfo@business.com",
info="One email per line"
)
btn = gr.Button("πŸš€ Verify Emails", variant="primary", size="lg")
output_table = gr.Dataframe(
label="Verification Results",
headers=["Email", "Status", "Comments"],
datatype=["str", "str", "str"],
wrap=True
)
summary_text = gr.Textbox(label="Summary", interactive=False)
excel_download = gr.File(
label="πŸ“₯ Download Results as Excel (.xlsx)",
visible=False
)
btn.click(
fn=verify_emails,
inputs=input_box,
outputs=[output_table, summary_text, excel_download]
).then(
fn=lambda: gr.update(visible=True),
outputs=excel_download
)
gr.Markdown("""
### Status Meanings:
- **Real (Confirmed)** β†’ Server confirmed it exists
- **Likely Real** β†’ Gmail/Outlook/Yahoo/etc. or blocked server β†’ 99% valid in practice
- **Deliverable (Catch-all)** β†’ Domain accepts everything
- **Possibly Real** β†’ Inconclusive on custom domain
- **Fake (Rejected)** β†’ Explicitly doesn't exist
- **Fake/Invalid** β†’ Bad format or no mail server
πŸ”’ No emails sent β€’ Runs on Hugging Face (cloud IPs blocked by big providers β€” hence "Likely Real")
""")
demo.launch()