zanegraper commited on
Commit
2f92c87
·
1 Parent(s): ef3a8a7

Updated Combined Program

Browse files
Files changed (2) hide show
  1. combined_email_finder.py +47 -0
  2. emailfinder_wrapper.py +9 -4
combined_email_finder.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # combined_email_finder.py
2
+
3
+ from emailfinder.core import processing
4
+ from io import StringIO
5
+ import sys
6
+ import requests
7
+ import re
8
+
9
+ from email_finder import find_emails
10
+ from emailfinder_wrapper import run_emailfinder
11
+
12
+ # FILTERING FUNCTION
13
+ def filter_emails(email_list):
14
+ filtered_emails = [
15
+ email for email in email_list if not (
16
+ email.startswith(('22', 'u0027', 'jsmith', 'jdoe', 'jane.doe', 'First',
17
+ 'John.Doe', 'FLast', 'doe', 'johnsmith', 'janedoe',
18
+ 'Last', 'j-doe', 'LFirst', 'Jane.Doe', 'Doe',
19
+ 'John_Smith', 'JSmith', 'JDoe', 'j_doe', 'J.Smith',
20
+ 'JohnSmith', 'John_Doe', 'j.doe', 'Smith.John',
21
+ 'jane@', 'd_jane', 'd-jane', 'jane_doe', 'jane.d', 'john.d'))
22
+ )
23
+ ]
24
+
25
+ cleaned_emails = []
26
+ for email in filtered_emails:
27
+ if email in filtered_emails:
28
+ if email.startswith('u003'):
29
+ email = email[4:]
30
+ elif email.startswith('x3e'):
31
+ email = email[3:]
32
+ cleaned_emails.append(email)
33
+
34
+ cleaned_emails = [email for email in cleaned_emails if not re.match(r'^[a-zA-Z]@', email)]
35
+ return cleaned_emails
36
+
37
+ # METHOD COMBINATION
38
+
39
+ def get_combined_emails(domain):
40
+ """
41
+ Returns a merged, deduplicated list of emails from both methods
42
+ """
43
+ direct_emails = find_emails(domain)
44
+ finder_emails = run_emailfinder(domain)
45
+
46
+ combined = set(direct_emails) | set(finder_emails)
47
+ return list(combined) if combined else ["No emails found."]
emailfinder_wrapper.py CHANGED
@@ -1,19 +1,24 @@
1
  # emailfinder_wrapper.py
 
2
  from emailfinder.core import processing
3
  from io import StringIO
4
  import sys
 
5
 
6
  def run_emailfinder(domain):
7
  # Redirect stdout temporarily
8
  old_stdout = sys.stdout
9
  sys.stdout = mystdout = StringIO()
 
10
 
11
  try:
12
  processing(domain, proxies=None)
13
  output = mystdout.getvalue()
 
14
  except Exception as e:
15
- output = f"Error: {e}"
 
 
 
16
 
17
- # Restore stdout
18
- sys.stdout = old_stdout
19
- return output
 
1
  # emailfinder_wrapper.py
2
+
3
  from emailfinder.core import processing
4
  from io import StringIO
5
  import sys
6
+ import re
7
 
8
  def run_emailfinder(domain):
9
  # Redirect stdout temporarily
10
  old_stdout = sys.stdout
11
  sys.stdout = mystdout = StringIO()
12
+ results = []
13
 
14
  try:
15
  processing(domain, proxies=None)
16
  output = mystdout.getvalue()
17
+ results = re.findall(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-z]{2,}", output)
18
  except Exception as e:
19
+ results = [f"EmailFinder error: {e}"]
20
+
21
+ finally:
22
+ sys.stdout = old_stdout
23
 
24
+ return results