Spaces:

zanegraper
/

EmailFinder

Sleeping

App Files Files Community

zanegraper commited on Apr 16, 2025

Commit

26082dc

1 Parent(s): 377a2c0

Updated

Browse files

Files changed (3) hide show

__pycache__/email_finder.cpython-311.pyc +0 -0
__pycache__/emailfinder_wrapper.cpython-311.pyc +0 -0
email_finder.py +52 -8

__pycache__/email_finder.cpython-311.pyc ADDED Viewed

Binary file (2.89 kB). View file

__pycache__/emailfinder_wrapper.cpython-311.pyc ADDED Viewed

Binary file (1.18 kB). View file

email_finder.py CHANGED Viewed

@@ -1,12 +1,56 @@
 # email_finder.py
 import requests
 import re
 def find_emails(domain):
-    url = f"https://{domain}"
-    try:
-        response = requests.get(url, timeout=10)
-        content = response.text
-        emails = set(re.findall(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-z]{2,}", content))
-        return list(emails) if emails else ["No emails found."]
-    except Exception as e:
-        return [f"Error fetching domain: {e}"]

 # email_finder.py
 import requests
 import re
+from urllib.parse import urljoin
+from bs4 import BeautifulSoup
 def find_emails(domain):
+    """
+    Attempts to find emails by visiting the homepage and common subpages
+    like /about, /contact, /team, etc.
+    """
+    base_url = f"https://{domain}"
+    candidate_paths = ["", "about", "contact", "team", "staff", "leadership", "info"]
+    visited_pages = set()
+    found_emails = set()
+    headers = {
+        "User-Agent": "Mozilla/5.0 (compatible; EmailFinderBot/1.0; +https://example.com/bot)"
+    }
+    for path in candidate_paths:
+        url = urljoin(base_url, path)
+        if url in visited_pages:
+            continue
+        visited_pages.add(url)
+        try:
+            response = requests.get(url, timeout=10, headers=headers)
+            if response.status_code != 200:
+                continue
+            content = response.text
+            # Extract emails from page
+            emails = set(re.findall(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-z]{2,}", content))
+            found_emails.update(emails)
+            # Also check for additional links in case there are nested team/contact/about pages
+            soup = BeautifulSoup(content, "html.parser")
+            links = soup.find_all("a", href=True)
+            for link in links:
+                href = link["href"]
+                if any(word in href.lower() for word in ["about", "contact", "team", "staff", "leadership"]):
+                    sub_url = urljoin(base_url, href)
+                    if sub_url not in visited_pages:
+                        visited_pages.add(sub_url)
+                        try:
+                            sub_resp = requests.get(sub_url, timeout=10, headers=headers)
+                            sub_emails = set(re.findall(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-z]{2,}", sub_resp.text))
+                            found_emails.update(sub_emails)
+                        except Exception:
+                            continue
+        except Exception:
+            continue
+    return list(found_emails) if found_emails else ["No emails found."]