| """Regex patterns and column-name heuristics for PII detection. |
| |
| Used by catalog/pii_detector.py at ingestion time. Default policy: |
| when in doubt, set pii_flag=True. False positives cost nothing; false |
| negatives leak data. |
| """ |
|
|
| import re |
|
|
| PII_NAME_PATTERNS = frozenset({ |
| "email", |
| "phone", "mobile", "telp", "telephone", |
| "ssn", "tin", "passport", "ktp", "nik", |
| "name", "fullname", "first_name", "last_name", "surname", |
| "address", "street", "zipcode", "postal", |
| "birthdate", "dob", "birthday", |
| }) |
|
|
| EMAIL_REGEX = re.compile(r"^[\w.+-]+@[\w-]+\.[\w.-]+$") |
| PHONE_REGEX = re.compile(r"^\+?[\d\s\-()]{7,}$") |
|
|