| from datetime import datetime
|
| from bson import ObjectId
|
| import re
|
| import copy
|
| import random
|
| import string
|
|
|
| EMAIL_REGEX = re.compile(r'[A-Za-z0-9._%+-]+@([A-Za-z0-9.-]+\.[A-Za-z]{2,})')
|
| URL_REGEX = re.compile(r'(?i)\b((?:https?://|ftp://|www\.)[a-z0-9-]+(\.[a-z0-9-]+)+[^\s]*)')
|
|
|
| PHONE_PARTS_REGEX = re.compile(r'(\D*)(\d+)(\D*)(\d+)(\D*)(\d+)(\D*)(\d*)(\D*)')
|
|
|
|
|
| def rand_name():
|
| name = random.choice(['john', 'jane', 'alex', 'mike', 'sara', 'chris', 'emma', 'liam'])
|
| num = ''.join(random.choices(string.digits, k=3))
|
| return f'{name}{num}'
|
|
|
| def fake_email(match):
|
| domain = match.group(1)
|
| return f'{rand_name()}@{domain}'
|
|
|
| def fake_url(match):
|
| return 'https://example.com'
|
|
|
| def fake_phone(match):
|
| """
|
| Preserves exact punctuation, only replaces digits.
|
| Example: '+1 555-123-4567' → '+1 987-654-3210'
|
| """
|
| groups = match.groups()
|
| faked = []
|
| for g in groups:
|
| if g and g.isdigit():
|
| length = len(g)
|
| fake_digits = ''.join(random.choices(string.digits, k=length))
|
| faked.append(fake_digits)
|
| else:
|
| faked.append(g or '')
|
| return ''.join(faked)
|
|
|
|
|
|
|
| def anonymize_value(val):
|
| if not isinstance(val, (str, int)):
|
| return val
|
|
|
| s = str(val)
|
|
|
|
|
| if URL_REGEX.search(s):
|
| s = URL_REGEX.sub(fake_url, s)
|
|
|
|
|
| if EMAIL_REGEX.search(s):
|
| s = EMAIL_REGEX.sub(fake_email, s)
|
|
|
|
|
| if PHONE_PARTS_REGEX.search(s):
|
| s = PHONE_PARTS_REGEX.sub(fake_phone, s)
|
|
|
|
|
| if isinstance(val, int) and s.isdigit():
|
| return int(s)
|
|
|
| return s
|
|
|
|
|
| def anonymize_deep(obj):
|
| if isinstance(obj, dict):
|
| return {k: anonymize_deep(v) for k, v in obj.items()}
|
| if isinstance(obj, list):
|
| return [anonymize_deep(v) for v in obj]
|
| return anonymize_value(obj)
|
|
|
|
|
|
|
|
|
|
|
|
|