darwinkernelpanic commited on
Commit
ec2830c
·
verified ·
1 Parent(s): e23e46b

Upload pii_extension.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. pii_extension.py +29 -25
pii_extension.py CHANGED
@@ -37,14 +37,15 @@ class PIIDetector:
37
  re.compile(r'\b\d{7,10}\b'), # Plain digits 7-10 chars
38
  ]
39
 
40
- # Address patterns (basic street address detection)
41
  self.address_patterns = [
42
- re.compile(r'\b\d+\s+[A-Za-z]+\s+(?:Street|St|Avenue|Ave|Road|Rd|Boulevard|Blvd|Lane|Ln|Drive|Dr|Court|Ct|Way|Place|Pl)\b', re.IGNORECASE),
43
  re.compile(r'\b(?:PO|P\.O\.)\s*Box\s*\d+\b', re.IGNORECASE),
 
44
  ]
45
 
46
- # Credit card (basic pattern - matches common formats)
47
- self.cc_pattern = re.compile(r'\b(?:\d{4}[-\s]?){3}\d{4}\b')
48
 
49
  # SSN (US Social Security Number)
50
  self.ssn_pattern = re.compile(r'\b\d{3}[-\s]?\d{2}[-\s]?\d{4}\b')
@@ -225,38 +226,41 @@ class PIIDetector:
225
  # Check grooming context for social media
226
  grooming_risk = 0.0
227
  grooming_keywords = []
228
- social_media_allowed = True
229
 
230
- if has_social_media:
 
 
 
 
 
 
 
231
  is_grooming, grooming_risk, grooming_keywords = self.detect_grooming_context(text)
232
 
233
- # Rules by age
234
  if age < 13:
235
  # Under 13: Block ALL social media sharing
236
- social_media_allowed = False
237
  action = "block"
238
  reason = "Social media sharing not permitted under 13"
 
 
 
 
239
  else:
240
- # 13+: Allow but check for grooming
241
- if is_grooming:
242
- social_media_allowed = False
243
- action = "block"
244
- reason = f"Potential grooming detected (risk: {grooming_risk:.0%})"
245
- else:
246
- social_media_allowed = True
247
- action = "allow"
248
- reason = "Social media permitted for 13+ (no grooming signals)"
249
-
250
- # Check other PII (blocked for all ages)
251
- critical_pii = pii_types.intersection({PIILabel.EMAIL, PIILabel.PHONE, PIILabel.ADDRESS, PIILabel.CREDIT_CARD, PIILabel.SSN})
252
-
253
- if critical_pii:
254
- action = "block"
255
- reason = f"PII detected: {', '.join([p.value for p in critical_pii])}"
256
- elif not has_social_media and not pii_types:
257
  action = "allow"
258
  reason = "No PII detected"
259
 
 
 
 
 
 
 
 
 
260
  return {
261
  "has_pii": len(pii_types) > 0,
262
  "pii_types": [p.value for p in pii_types],
 
37
  re.compile(r'\b\d{7,10}\b'), # Plain digits 7-10 chars
38
  ]
39
 
40
+ # Address patterns (enhanced street address detection)
41
  self.address_patterns = [
42
+ re.compile(r'\b\d+\s+\d*[A-Za-z]+(?:\s+[A-Za-z]+)?\s+(?:Street|St|Avenue|Ave|Road|Rd|Boulevard|Blvd|Lane|Ln|Drive|Dr|Court|Ct|Way|Place|Pl|Circle|Cir|Trail|Trl|Parkway|Pkwy)\b', re.IGNORECASE),
43
  re.compile(r'\b(?:PO|P\.O\.)\s*Box\s*\d+\b', re.IGNORECASE),
44
+ re.compile(r'\b\d+\s+[A-Za-z]+\s+(?:Street|St|Ave|Road|Rd)\b', re.IGNORECASE),
45
  ]
46
 
47
+ # Credit card (enhanced pattern)
48
+ self.cc_pattern = re.compile(r'\b(?:\d{4}[-\s]?){3}\d{4}\b|\b\d{16}\b')
49
 
50
  # SSN (US Social Security Number)
51
  self.ssn_pattern = re.compile(r'\b\d{3}[-\s]?\d{2}[-\s]?\d{4}\b')
 
226
  # Check grooming context for social media
227
  grooming_risk = 0.0
228
  grooming_keywords = []
 
229
 
230
+ # Check other PII first (blocked for all ages)
231
+ critical_pii = pii_types.intersection({PIILabel.EMAIL, PIILabel.PHONE, PIILabel.ADDRESS, PIILabel.CREDIT_CARD, PIILabel.SSN})
232
+
233
+ if critical_pii:
234
+ action = "block"
235
+ reason = f"PII detected: {', '.join([p.value for p in critical_pii])}"
236
+ elif has_social_media:
237
+ # Social media rules
238
  is_grooming, grooming_risk, grooming_keywords = self.detect_grooming_context(text)
239
 
 
240
  if age < 13:
241
  # Under 13: Block ALL social media sharing
 
242
  action = "block"
243
  reason = "Social media sharing not permitted under 13"
244
+ elif is_grooming:
245
+ # 13+: Block if grooming detected
246
+ action = "block"
247
+ reason = f"Potential grooming detected (risk: {grooming_risk:.0%})"
248
  else:
249
+ # 13+: Allow social media, no grooming
250
+ action = "allow"
251
+ reason = "Social media permitted for 13+ (no grooming signals)"
252
+ else:
 
 
 
 
 
 
 
 
 
 
 
 
 
253
  action = "allow"
254
  reason = "No PII detected"
255
 
256
+ # Determine if social media is allowed for return value
257
+ social_media_allowed = True
258
+ if has_social_media:
259
+ if age < 13:
260
+ social_media_allowed = False
261
+ elif grooming_risk > 0:
262
+ social_media_allowed = False
263
+
264
  return {
265
  "has_pii": len(pii_types) > 0,
266
  "pii_types": [p.value for p in pii_types],