Viraj0112's picture
Upload folder using huggingface_hub
03a907a verified
import re
def parse_and_validate_emails(email_list: list[str], domains_allowed: set) -> dict:
"""
Parse a list of emails, validate them against allowed domains, and group them.
email_list: list of strings (e.g., ["user@example.com", "admin@test.org"])
domains_allowed: set of allowed domains (e.g., {"example.com", "test.org"})
Returns a dict:
- 'valid_emails': list of valid emails
- 'invalid_emails': list of invalid emails (bad format or not in allowed domains)
- 'domain_counts': dict mapping domain to count of valid emails
"""
# BUG 1: regex is flawed, doesn't properly escape dot, allows invalid chars before @
email_regex = re.compile(r"^[a-zA-Z0-9_]+@[a-zA-Z0-9]+\.[a-zA-Z]+$")
result = {
'valid_emails': [],
'invalid_emails': [],
# BUG 2: using list instead of dict for domain_counts
'domain_counts': []
}
for email in email_list:
# BUG 3: not trimming whitespace from email before validation
if not email_regex.match(email):
result['invalid_emails'].append(email)
continue
# BUG 4: split could fail or result in index error if @ is missing (though regex should catch it, regex is flawed)
# BUG 5: split from the wrong direction if multiple @ exist
parts = email.split('@')
domain = parts[1]
if domain not in domains_allowed:
result['invalid_emails'].append(email)
else:
result['valid_emails'].append(email)
# BUG 6: incorrect operation for list (trying to use it as dict)
# This will raise TypeError since domain_counts is a list
if domain in result['domain_counts']:
result['domain_counts'][domain] += 1
else:
result['domain_counts'][domain] = 1
return result