darwinkernelpanic commited on
Commit
e3bc6f2
·
verified ·
1 Parent(s): d59eb2d

Upload pii_extension.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. pii_extension.py +376 -0
pii_extension.py ADDED
@@ -0,0 +1,376 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ PII (Personally Identifiable Information) Detection Extension
4
+ Integrates with dual-mode content moderation
5
+ """
6
+
7
+ import re
8
+ from enum import Enum
9
+ from typing import Dict, List, Tuple
10
+
11
+ class PIILabel(Enum):
12
+ SAFE = "safe"
13
+ EMAIL = "email"
14
+ PHONE = "phone"
15
+ ADDRESS = "address"
16
+ CREDIT_CARD = "credit_card"
17
+ SSN = "ssn"
18
+ SOCIAL_MEDIA = "social_media"
19
+ URL = "url"
20
+
21
+ class PIIDetector:
22
+ """Detect PII in text with context awareness"""
23
+
24
+ def __init__(self):
25
+ # Email pattern
26
+ self.email_pattern = re.compile(
27
+ r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
28
+ )
29
+
30
+ # Phone patterns (various formats)
31
+ self.phone_patterns = [
32
+ re.compile(r'\b\d{3}[-.]?\d{3}[-.]?\d{4}\b'), # US: 123-456-7890
33
+ re.compile(r'\b\(\d{3}\)\s?\d{3}[-.]?\d{4}\b'), # (123) 456-7890
34
+ re.compile(r'\b\+?\d{1,3}[-.\s]?\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b'), # International
35
+ re.compile(r'\b\d{4}\s?\d{3}\s?\d{3}\b'), # AU: 0412 345 678
36
+ re.compile(r'\b\d{3}[-.]?\d{4}\b'), # Short: 555-1234
37
+ re.compile(r'\b\d{7,10}\b'), # Plain digits 7-10 chars
38
+ ]
39
+
40
+ # Address patterns (basic street address detection)
41
+ self.address_patterns = [
42
+ re.compile(r'\b\d+\s+[A-Za-z]+\s+(?:Street|St|Avenue|Ave|Road|Rd|Boulevard|Blvd|Lane|Ln|Drive|Dr|Court|Ct|Way|Place|Pl)\b', re.IGNORECASE),
43
+ re.compile(r'\b(?:PO|P\.O\.)\s*Box\s*\d+\b', re.IGNORECASE),
44
+ ]
45
+
46
+ # Credit card (basic pattern - matches common formats)
47
+ self.cc_pattern = re.compile(r'\b(?:\d{4}[-\s]?){3}\d{4}\b')
48
+
49
+ # SSN (US Social Security Number)
50
+ self.ssn_pattern = re.compile(r'\b\d{3}[-\s]?\d{2}[-\s]?\d{4}\b')
51
+
52
+ # Social media links/platforms
53
+ self.social_media_domains = [
54
+ 'instagram.com', 'instagr.am',
55
+ 'twitter.com', 'x.com',
56
+ 'tiktok.com',
57
+ 'snapchat.com', 'snap.com',
58
+ 'discord.com', 'discord.gg',
59
+ 'facebook.com', 'fb.com',
60
+ 'reddit.com',
61
+ 'youtube.com', 'youtu.be',
62
+ 'twitch.tv',
63
+ 'steamcommunity.com',
64
+ 'roblox.com',
65
+ ]
66
+
67
+ # Grooming/suspicious keywords (context for social media sharing)
68
+ self.grooming_keywords = [
69
+ 'dm me', 'message me privately', 'private chat', 'secret',
70
+ 'dont tell your parents', 'our little secret', 'just between us',
71
+ 'send me pics', 'send pictures', 'photo of you', 'what do you look like',
72
+ 'how old are you', 'where do you live', 'home alone', 'parents gone',
73
+ 'meet up', 'meet in person', 'come over', 'visit you',
74
+ 'boyfriend', 'girlfriend', 'dating', 'relationship',
75
+ 'trust me', 'special friend', 'mature for your age',
76
+ 'youre different', 'understand you', 'only one who gets you',
77
+ ]
78
+
79
+ # URL pattern
80
+ self.url_pattern = re.compile(
81
+ r'https?://(?:[-\w.])+(?:[:\d]+)?(?:/(?:[\w/_.])*(?:\?(?:[\w&=%.])*)?(?:#(?:[\w.])*)?)?',
82
+ re.IGNORECASE
83
+ )
84
+
85
+ def detect_emails(self, text: str) -> List[Tuple[str, int, int]]:
86
+ """Find all emails in text"""
87
+ matches = []
88
+ for match in self.email_pattern.finditer(text):
89
+ matches.append((match.group(), match.start(), match.end()))
90
+ return matches
91
+
92
+ def detect_phones(self, text: str) -> List[Tuple[str, int, int]]:
93
+ """Find all phone numbers"""
94
+ matches = []
95
+ for pattern in self.phone_patterns:
96
+ for match in pattern.finditer(text):
97
+ matches.append((match.group(), match.start(), match.end()))
98
+ return matches
99
+
100
+ def detect_addresses(self, text: str) -> List[Tuple[str, int, int]]:
101
+ """Find addresses"""
102
+ matches = []
103
+ for pattern in self.address_patterns:
104
+ for match in pattern.finditer(text):
105
+ matches.append((match.group(), match.start(), match.end()))
106
+ return matches
107
+
108
+ def detect_credit_cards(self, text: str) -> List[Tuple[str, int, int]]:
109
+ """Find credit card numbers"""
110
+ matches = []
111
+ for match in self.cc_pattern.finditer(text):
112
+ card = match.group().replace('-', '').replace(' ', '')
113
+ if len(card) >= 13 and len(card) <= 19: # Valid CC length
114
+ matches.append((match.group(), match.start(), match.end()))
115
+ return matches
116
+
117
+ def detect_ssn(self, text: str) -> List[Tuple[str, int, int]]:
118
+ """Find SSNs"""
119
+ matches = []
120
+ for match in self.ssn_pattern.finditer(text):
121
+ matches.append((match.group(), match.start(), match.end()))
122
+ return matches
123
+
124
+ def detect_social_media(self, text: str) -> List[Tuple[str, int, int, str]]:
125
+ """Find social media links with platform detection"""
126
+ matches = []
127
+ urls = self.url_pattern.finditer(text)
128
+
129
+ for url_match in urls:
130
+ url = url_match.group()
131
+ for domain in self.social_media_domains:
132
+ if domain.lower() in url.lower():
133
+ matches.append((url, url_match.start(), url_match.end(), domain))
134
+ break
135
+
136
+ # Also check for plain usernames like @username or discord: username
137
+ username_patterns = [
138
+ re.compile(r'\b(?:instagram|ig|insta)[:\s]*@?(\w+)\b', re.IGNORECASE),
139
+ re.compile(r'\b(?:twitter|x)[:\s]*@?(\w+)\b', re.IGNORECASE),
140
+ re.compile(r'\bdiscord[:\s]*@?(\w+)\b', re.IGNORECASE),
141
+ re.compile(r'\bsnapchat|snap[:\s]*@?(\w+)\b', re.IGNORECASE),
142
+ re.compile(r'\btiktok[:\s]*@?(\w+)\b', re.IGNORECASE),
143
+ ]
144
+
145
+ for pattern in username_patterns:
146
+ for match in pattern.finditer(text):
147
+ platform = match.group(0).split(':')[0].lower()
148
+ matches.append((match.group(), match.start(), match.end(), platform))
149
+
150
+ return matches
151
+
152
+ def detect_grooming_context(self, text: str) -> Tuple[bool, float, List[str]]:
153
+ """Detect if social media sharing has grooming context"""
154
+ text_lower = text.lower()
155
+ found_keywords = []
156
+
157
+ for keyword in self.grooming_keywords:
158
+ if keyword in text_lower:
159
+ found_keywords.append(keyword)
160
+
161
+ # Calculate risk score
162
+ risk_score = min(len(found_keywords) / 3.0, 1.0) # Max at 3+ keywords
163
+ is_suspicious = risk_score >= 0.33 # 1+ keywords
164
+
165
+ return is_suspicious, risk_score, found_keywords
166
+
167
+ def scan(self, text: str, age: int) -> Dict:
168
+ """
169
+ Full PII scan with age-appropriate rules
170
+
171
+ Returns:
172
+ {
173
+ "has_pii": bool,
174
+ "pii_types": list,
175
+ "details": list,
176
+ "social_media_allowed": bool,
177
+ "grooming_risk": float,
178
+ "action": "allow" | "block" | "flag",
179
+ "reason": str
180
+ }
181
+ """
182
+ pii_found = []
183
+ pii_types = set()
184
+
185
+ # Detect various PII types
186
+ emails = self.detect_emails(text)
187
+ if emails:
188
+ pii_types.add(PIILabel.EMAIL)
189
+ for email, start, end in emails:
190
+ pii_found.append({"type": "email", "value": email, "start": start, "end": end})
191
+
192
+ phones = self.detect_phones(text)
193
+ if phones:
194
+ pii_types.add(PIILabel.PHONE)
195
+ for phone, start, end in phones:
196
+ pii_found.append({"type": "phone", "value": phone, "start": start, "end": end})
197
+
198
+ addresses = self.detect_addresses(text)
199
+ if addresses:
200
+ pii_types.add(PIILabel.ADDRESS)
201
+ for addr, start, end in addresses:
202
+ pii_found.append({"type": "address", "value": addr, "start": start, "end": end})
203
+
204
+ credit_cards = self.detect_credit_cards(text)
205
+ if credit_cards:
206
+ pii_types.add(PIILabel.CREDIT_CARD)
207
+ for cc, start, end in credit_cards:
208
+ pii_found.append({"type": "credit_card", "value": cc, "start": start, "end": end})
209
+
210
+ ssns = self.detect_ssn(text)
211
+ if ssns:
212
+ pii_types.add(PIILabel.SSN)
213
+ for ssn, start, end in ssns:
214
+ pii_found.append({"type": "ssn", "value": ssn, "start": start, "end": end})
215
+
216
+ # Social media detection
217
+ social_links = self.detect_social_media(text)
218
+ has_social_media = len(social_links) > 0
219
+
220
+ if has_social_media:
221
+ pii_types.add(PIILabel.SOCIAL_MEDIA)
222
+ for link, start, end, platform in social_links:
223
+ pii_found.append({"type": "social_media", "value": link, "platform": platform, "start": start, "end": end})
224
+
225
+ # Check grooming context for social media
226
+ grooming_risk = 0.0
227
+ grooming_keywords = []
228
+ social_media_allowed = True
229
+
230
+ if has_social_media:
231
+ is_grooming, grooming_risk, grooming_keywords = self.detect_grooming_context(text)
232
+
233
+ # Rules by age
234
+ if age < 13:
235
+ # Under 13: Block ALL social media sharing
236
+ social_media_allowed = False
237
+ action = "block"
238
+ reason = "Social media sharing not permitted under 13"
239
+ else:
240
+ # 13+: Allow but check for grooming
241
+ if is_grooming:
242
+ social_media_allowed = False
243
+ action = "block"
244
+ reason = f"Potential grooming detected (risk: {grooming_risk:.0%})"
245
+ else:
246
+ social_media_allowed = True
247
+ action = "allow"
248
+ reason = "Social media permitted for 13+ (no grooming signals)"
249
+
250
+ # Check other PII (blocked for all ages)
251
+ critical_pii = pii_types.intersection({PIILabel.EMAIL, PIILabel.PHONE, PIILabel.ADDRESS, PIILabel.CREDIT_CARD, PIILabel.SSN})
252
+
253
+ if critical_pii:
254
+ action = "block"
255
+ reason = f"PII detected: {', '.join([p.value for p in critical_pii])}"
256
+ elif not has_social_media and not pii_types:
257
+ action = "allow"
258
+ reason = "No PII detected"
259
+
260
+ return {
261
+ "has_pii": len(pii_types) > 0,
262
+ "pii_types": [p.value for p in pii_types],
263
+ "details": pii_found,
264
+ "social_media_allowed": social_media_allowed,
265
+ "grooming_risk": grooming_risk,
266
+ "grooming_keywords": grooming_keywords,
267
+ "action": action,
268
+ "reason": reason,
269
+ "age": age
270
+ }
271
+
272
+
273
+ # Integration with main moderation system
274
+ class CombinedModerationFilter:
275
+ """Combines content moderation + PII detection"""
276
+
277
+ def __init__(self, content_model_path="./moderation_model_v2.pkl"):
278
+ from enhanced_moderation import EnhancedContentModerator, ContentLabel
279
+
280
+ self.content_moderator = EnhancedContentModerator()
281
+ self.content_moderator.load(content_model_path)
282
+ self.pii_detector = PIIDetector()
283
+
284
+ # Age-based rules
285
+ self.under_13_blocked_content = [1, 2, 3, 4, 5] # All except SAFE
286
+ self.teen_plus_blocked_content = [1, 3, 4, 5] # Allow SWEARING_REACTION
287
+
288
+ def check(self, text: str, age: int) -> Dict:
289
+ """Full check: content + PII"""
290
+ from enhanced_moderation import ContentLabel
291
+
292
+ # Step 1: PII Check
293
+ pii_result = self.pii_detector.scan(text, age)
294
+
295
+ if pii_result["action"] == "block":
296
+ return {
297
+ "allowed": False,
298
+ "violation": "PII",
299
+ "pii_details": pii_result,
300
+ "content_details": None,
301
+ "reason": pii_result["reason"],
302
+ "age": age
303
+ }
304
+
305
+ # Step 2: Content Moderation Check
306
+ content_label, confidence = self.content_moderator.predict(text)
307
+
308
+ # Determine if content is allowed
309
+ if age >= 13:
310
+ content_allowed = content_label.value not in self.teen_plus_blocked_content
311
+ else:
312
+ content_allowed = content_label.value not in self.under_13_blocked_content
313
+
314
+ # Special case: reaction swearing for 13+
315
+ if not content_allowed and content_label.value == 2 and age >= 13: # SWEARING_REACTION = 2
316
+ content_allowed = True
317
+ content_reason = "Swearing permitted as reaction (13+)"
318
+ elif not content_allowed:
319
+ content_reason = f"{content_label.name} detected"
320
+ else:
321
+ content_reason = "Content safe"
322
+
323
+ if not content_allowed:
324
+ return {
325
+ "allowed": False,
326
+ "violation": "CONTENT",
327
+ "pii_details": pii_result,
328
+ "content_details": {
329
+ "label": content_label.name,
330
+ "confidence": confidence
331
+ },
332
+ "reason": content_reason,
333
+ "age": age
334
+ }
335
+
336
+ # All checks passed
337
+ return {
338
+ "allowed": True,
339
+ "violation": None,
340
+ "pii_details": pii_result,
341
+ "content_details": {
342
+ "label": content_label.name,
343
+ "confidence": confidence
344
+ },
345
+ "reason": "Content and PII checks passed",
346
+ "age": age
347
+ }
348
+
349
+
350
+ # Example usage
351
+ if __name__ == "__main__":
352
+ detector = PIIDetector()
353
+
354
+ test_cases = [
355
+ ("My email is john@example.com", 15),
356
+ ("Call me at 555-123-4567", 16),
357
+ ("I'm at 123 Main Street", 14),
358
+ ("Follow me on instagram @cooluser", 10),
359
+ ("Follow me on instagram @cooluser", 15),
360
+ ("DM me on instagram, don't tell your parents", 15),
361
+ ("Check my tiktok @user", 14),
362
+ ("Send me pics on snapchat, it's our secret", 13),
363
+ ]
364
+
365
+ print("PII Detection Tests")
366
+ print("=" * 70)
367
+
368
+ for text, age in test_cases:
369
+ result = detector.scan(text, age)
370
+ status = "✅ ALLOW" if result["action"] == "allow" else "❌ BLOCK"
371
+
372
+ print(f"\nAge {age}: '{text}'")
373
+ print(f" {status} - {result['reason']}")
374
+ if result["grooming_risk"] > 0:
375
+ print(f" Grooming risk: {result['grooming_risk']:.0%}")
376
+ print(f" Keywords: {result['grooming_keywords']}")