Spaces:
Sleeping
Sleeping
Commit
·
2f92c87
1
Parent(s):
ef3a8a7
Updated Combined Program
Browse files- combined_email_finder.py +47 -0
- emailfinder_wrapper.py +9 -4
combined_email_finder.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# combined_email_finder.py
|
| 2 |
+
|
| 3 |
+
from emailfinder.core import processing
|
| 4 |
+
from io import StringIO
|
| 5 |
+
import sys
|
| 6 |
+
import requests
|
| 7 |
+
import re
|
| 8 |
+
|
| 9 |
+
from email_finder import find_emails
|
| 10 |
+
from emailfinder_wrapper import run_emailfinder
|
| 11 |
+
|
| 12 |
+
# FILTERING FUNCTION
|
| 13 |
+
def filter_emails(email_list):
|
| 14 |
+
filtered_emails = [
|
| 15 |
+
email for email in email_list if not (
|
| 16 |
+
email.startswith(('22', 'u0027', 'jsmith', 'jdoe', 'jane.doe', 'First',
|
| 17 |
+
'John.Doe', 'FLast', 'doe', 'johnsmith', 'janedoe',
|
| 18 |
+
'Last', 'j-doe', 'LFirst', 'Jane.Doe', 'Doe',
|
| 19 |
+
'John_Smith', 'JSmith', 'JDoe', 'j_doe', 'J.Smith',
|
| 20 |
+
'JohnSmith', 'John_Doe', 'j.doe', 'Smith.John',
|
| 21 |
+
'jane@', 'd_jane', 'd-jane', 'jane_doe', 'jane.d', 'john.d'))
|
| 22 |
+
)
|
| 23 |
+
]
|
| 24 |
+
|
| 25 |
+
cleaned_emails = []
|
| 26 |
+
for email in filtered_emails:
|
| 27 |
+
if email in filtered_emails:
|
| 28 |
+
if email.startswith('u003'):
|
| 29 |
+
email = email[4:]
|
| 30 |
+
elif email.startswith('x3e'):
|
| 31 |
+
email = email[3:]
|
| 32 |
+
cleaned_emails.append(email)
|
| 33 |
+
|
| 34 |
+
cleaned_emails = [email for email in cleaned_emails if not re.match(r'^[a-zA-Z]@', email)]
|
| 35 |
+
return cleaned_emails
|
| 36 |
+
|
| 37 |
+
# METHOD COMBINATION
|
| 38 |
+
|
| 39 |
+
def get_combined_emails(domain):
|
| 40 |
+
"""
|
| 41 |
+
Returns a merged, deduplicated list of emails from both methods
|
| 42 |
+
"""
|
| 43 |
+
direct_emails = find_emails(domain)
|
| 44 |
+
finder_emails = run_emailfinder(domain)
|
| 45 |
+
|
| 46 |
+
combined = set(direct_emails) | set(finder_emails)
|
| 47 |
+
return list(combined) if combined else ["No emails found."]
|
emailfinder_wrapper.py
CHANGED
|
@@ -1,19 +1,24 @@
|
|
| 1 |
# emailfinder_wrapper.py
|
|
|
|
| 2 |
from emailfinder.core import processing
|
| 3 |
from io import StringIO
|
| 4 |
import sys
|
|
|
|
| 5 |
|
| 6 |
def run_emailfinder(domain):
|
| 7 |
# Redirect stdout temporarily
|
| 8 |
old_stdout = sys.stdout
|
| 9 |
sys.stdout = mystdout = StringIO()
|
|
|
|
| 10 |
|
| 11 |
try:
|
| 12 |
processing(domain, proxies=None)
|
| 13 |
output = mystdout.getvalue()
|
|
|
|
| 14 |
except Exception as e:
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
-
|
| 18 |
-
sys.stdout = old_stdout
|
| 19 |
-
return output
|
|
|
|
| 1 |
# emailfinder_wrapper.py
|
| 2 |
+
|
| 3 |
from emailfinder.core import processing
|
| 4 |
from io import StringIO
|
| 5 |
import sys
|
| 6 |
+
import re
|
| 7 |
|
| 8 |
def run_emailfinder(domain):
|
| 9 |
# Redirect stdout temporarily
|
| 10 |
old_stdout = sys.stdout
|
| 11 |
sys.stdout = mystdout = StringIO()
|
| 12 |
+
results = []
|
| 13 |
|
| 14 |
try:
|
| 15 |
processing(domain, proxies=None)
|
| 16 |
output = mystdout.getvalue()
|
| 17 |
+
results = re.findall(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-z]{2,}", output)
|
| 18 |
except Exception as e:
|
| 19 |
+
results = [f"EmailFinder error: {e}"]
|
| 20 |
+
|
| 21 |
+
finally:
|
| 22 |
+
sys.stdout = old_stdout
|
| 23 |
|
| 24 |
+
return results
|
|
|
|
|
|