Sparkonix commited on
Commit
7f4c4f8
·
1 Parent(s): 5e08f57

change regex for phone numbers

Browse files
Files changed (1) hide show
  1. utils.py +6 -1
utils.py CHANGED
@@ -50,7 +50,12 @@ class PIIMasker:
50
  # Define regex patterns for different entity types
51
  self.patterns = {
52
  "email": r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b',
53
- "phone_number": r'\b(\+\d{1,2}\s?)?\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4}\b',
 
 
 
 
 
54
  # Card number regex: common formats, allows optional spaces/hyphens
55
  "credit_debit_no": r'\b(?:(?:\d{4}[\s-]?){3}\d{4}|\d{13,19})\b',
56
  # CVV: 3 or 4 digits, ensuring it's a standalone number (word boundary)
 
50
  # Define regex patterns for different entity types
51
  self.patterns = {
52
  "email": r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b',
53
+ # Enhanced phone number regex that handles international formats:
54
+ # - International prefix with country code: +XX, +XXX (optional)
55
+ # - Various delimiter formats: spaces, hyphens, periods, or nothing
56
+ # - Different grouping patterns for various countries
57
+ # - Overall length between 8-15 digits (excluding formatting characters)
58
+ "phone_number": r'\b(?:(?:\+|00)[1-9]\d{0,3}[\s.-]?)?(?:\(?\d{1,5}\)?[\s.-]?)?(?:\d{1,5}[\s.-]?)??(?:\d{1,5}[\s.-]?)??(?:\d{1,5}[\s.-]?)?\d{1,5}(?:[\s.-]?\d{1,5})?\b',
59
  # Card number regex: common formats, allows optional spaces/hyphens
60
  "credit_debit_no": r'\b(?:(?:\d{4}[\s-]?){3}\d{4}|\d{13,19})\b',
61
  # CVV: 3 or 4 digits, ensuring it's a standalone number (word boundary)