snikhilesh commited on
Commit
303942f
·
verified ·
1 Parent(s): a4fbae6

Deploy phi_deidentifier.py to backend/ directory

Browse files
Files changed (1) hide show
  1. backend/phi_deidentifier.py +469 -0
backend/phi_deidentifier.py ADDED
@@ -0,0 +1,469 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PHI De-identification Pipeline - Phase 2
3
+ HIPAA-compliant protected health information removal and anonymization.
4
+
5
+ This module provides comprehensive PHI detection and removal for medical documents
6
+ before AI processing, ensuring HIPAA compliance and data privacy.
7
+
8
+ Author: MiniMax Agent
9
+ Date: 2025-10-29
10
+ Version: 1.0.0
11
+ """
12
+
13
+ import re
14
+ import hashlib
15
+ import logging
16
+ from typing import Dict, List, Optional, Tuple, Any, Set
17
+ from dataclasses import dataclass
18
+ from datetime import datetime
19
+ from enum import Enum
20
+ import json
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ class PHICategory(Enum):
26
+ """Categories of protected health information"""
27
+ PATIENT_NAME = "patient_name"
28
+ MEDICAL_RECORD_NUMBER = "mrn"
29
+ DATE_OF_BIRTH = "dob"
30
+ SOCIAL_SECURITY_NUMBER = "ssn"
31
+ PHONE_NUMBER = "phone"
32
+ EMAIL_ADDRESS = "email"
33
+ ADDRESS = "address"
34
+ DATE = "date"
35
+ AGE_OVER_89 = "age_89_plus"
36
+ BIO_METRIC_IDENTIFIER = "biometric"
37
+ PHOTO = "photo"
38
+ DEVICE_IDENTIFIER = "device_id"
39
+ ACCOUNT_NUMBER = "account"
40
+ CERTIFICATE_NUMBER = "certificate"
41
+ VEHICLE_IDENTIFIER = "vehicle"
42
+ WEB_URL = "web_url"
43
+ IP_ADDRESS = "ip_address"
44
+ FINGERPRINT = "fingerprint"
45
+ FULL_FACE_PHOTO = "full_face_photo"
46
+
47
+
48
+ @dataclass
49
+ class PHIMatch:
50
+ """PHI entity match with replacement information"""
51
+ category: PHICategory
52
+ original_text: str
53
+ replacement: str
54
+ start_position: int
55
+ end_position: int
56
+ confidence: float
57
+ context: str
58
+
59
+
60
+ @dataclass
61
+ class DeidentificationResult:
62
+ """Result of PHI de-identification process"""
63
+ original_text: str
64
+ deidentified_text: str
65
+ phi_matches: List[PHIMatch]
66
+ anonymization_method: str
67
+ hash_original: str
68
+ timestamp: datetime
69
+ compliance_level: str # HIPAA, GDPR, NONE
70
+ audit_log: Dict[str, Any]
71
+
72
+
73
+ class PHIPatterns:
74
+ """Comprehensive PHI detection patterns"""
75
+
76
+ # Patient name patterns (various formats)
77
+ NAME_PATTERNS = [
78
+ r'\b([A-Z][a-z]+)\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\b', # First Last [Middle]
79
+ r'\b([A-Z])\.?\s+([A-Z][a-z]+)\b', # F. Last
80
+ r'\b([A-Z][a-z]+),\s+([A-Z][a-z]+)\b', # Last, First
81
+ r'Patient Name:\s*([A-Z][a-z]+\s+[A-Z][a-z]+)',
82
+ r'Name:\s*([A-Z][a-z]+\s+[A-Z][a-z]+)',
83
+ ]
84
+
85
+ # Medical Record Number patterns
86
+ MRN_PATTERNS = [
87
+ r'\b(?:MRN|Medical Record Number|Patient ID|ID Number|Record #?)[:\s]*([A-Z0-9]{6,12})\b',
88
+ r'\b(?:MRN|ID)[:\s]*([0-9]{6,10})\b',
89
+ r'\bPatient\s*(?:ID|Number)[:\s]*([A-Z0-9]{6,12})\b',
90
+ ]
91
+
92
+ # Date of Birth patterns
93
+ DOB_PATTERNS = [
94
+ r'\b(?:DOB|Date of Birth|Birth Date|Born)[:\s]*([0-9]{1,2}[/-][0-9]{1,2}[/-][0-9]{4})\b',
95
+ r'\b([0-9]{1,2}[/-][0-9]{1,2}[/-][0-9]{4})\s*(?:DOB|birth|Born)\b',
96
+ r'\b(?:DOB|Date of Birth)[:\s]*(January|February|March|April|May|June|July|August|September|October|November|December)\s+([0-9]{1,2}),?\s+([0-9]{4})\b',
97
+ ]
98
+
99
+ # Social Security Number patterns
100
+ SSN_PATTERNS = [
101
+ r'\b(?:SSN|Social Security Number)[:\s]*([0-9]{3}-[0-9]{2}-[0-9]{4})\b',
102
+ r'\b([0-9]{3}-[0-9]{2}-[0-9]{4})\b',
103
+ ]
104
+
105
+ # Phone number patterns
106
+ PHONE_PATTERNS = [
107
+ r'\b(?:Phone|Tel|Telephone|Mobile|Cell)[:\s]*([0-9]{3}[-.\s]?[0-9]{3}[-.\s]?[0-9]{4})\b',
108
+ r'\b([0-9]{3}[-.\s]?[0-9]{3}[-.\s]?[0-9]{4})\b',
109
+ r'\b\([0-9]{3}\)\s*[0-9]{3}[-.\s]?[0-9]{4}\b',
110
+ ]
111
+
112
+ # Email address patterns
113
+ EMAIL_PATTERNS = [
114
+ r'\b([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})\b',
115
+ r'\b(?:Email|E-mail)[:\s]*([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})\b',
116
+ ]
117
+
118
+ # Address patterns
119
+ ADDRESS_PATTERNS = [
120
+ r'\b([0-9]{1,5}\s+[A-Za-z\s]+(?:Street|St|Avenue|Ave|Road|Rd|Boulevard|Blvd|Lane|Ln|Drive|Dr|Court|Ct|Place|Pl))\b',
121
+ r'\b([0-9]{1,5}\s+[A-Za-z\s]+(?:Street|St|Avenue|Ave|Road|Rd|Boulevard|Blvd|Lane|Ln|Drive|Dr|Court|Ct|Place|Pl)),\s*([A-Za-z\s]+),\s*([A-Z]{2})\s*([0-9]{5})\b',
122
+ r'\b(?:Address|Addr)[:\s]*([0-9]+\s+[A-Za-z\s]+(?:Street|St|Avenue|Ave|Road|Rd))\b',
123
+ ]
124
+
125
+ # IP address patterns
126
+ IP_PATTERNS = [
127
+ r'\b(?:IP Address|IP)[:\s]*([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})\b',
128
+ r'\b([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})\b',
129
+ ]
130
+
131
+ # URL patterns
132
+ URL_PATTERNS = [
133
+ r'\b(?:URL|Website|Web)[:\s]*(https?://[^\s]+)\b',
134
+ r'\b(https?://[^\s]+)\b',
135
+ ]
136
+
137
+ # Device identifier patterns
138
+ DEVICE_PATTERNS = [
139
+ r'\b(?:Device ID|Device|Serial Number|Serial)[:\s]*([A-Z0-9]{6,20})\b',
140
+ r'\b(?:IMEI|IMSI|MAC Address)[:\s]*([A-F0-9]{15,17})\b',
141
+ ]
142
+
143
+
144
+ class MedicalPHIDeidentifier:
145
+ """HIPAA-compliant PHI de-identification system"""
146
+
147
+ def __init__(self, config: Optional[Dict[str, Any]] = None):
148
+ self.config = config or self._default_config()
149
+ self.patterns = PHIPatterns()
150
+ self.anonymization_cache = {}
151
+
152
+ def _default_config(self) -> Dict[str, Any]:
153
+ """Default de-identification configuration"""
154
+ return {
155
+ "compliance_level": "HIPAA",
156
+ "preserve_medical_context": True,
157
+ "use_hashing": True,
158
+ "redaction_method": "placeholder",
159
+ "date_shift_days": 0, # For research use
160
+ "preserve_age_category": True, # Keep age ranges but not exact ages
161
+ "whitelist_terms": ["Dr.", "Mr.", "Ms.", "Mrs.", "MD", "DO"], # Terms to preserve
162
+ }
163
+
164
+ def deidentify_text(self, text: str, document_type: str = "general") -> DeidentificationResult:
165
+ """
166
+ De-identify text by removing or replacing PHI
167
+
168
+ Args:
169
+ text: Text to de-identify
170
+ document_type: Type of medical document for targeted processing
171
+
172
+ Returns:
173
+ DeidentificationResult with de-identified text and audit log
174
+ """
175
+ original_text = text
176
+ phi_matches = []
177
+ deidentified_text = text
178
+ audit_log = {
179
+ "processing_timestamp": datetime.now().isoformat(),
180
+ "document_type": document_type,
181
+ "original_length": len(text),
182
+ "phi_categories_found": [],
183
+ "replacements_made": 0
184
+ }
185
+
186
+ # Calculate hash of original for audit trail
187
+ hash_original = hashlib.sha256(text.encode()).hexdigest()
188
+
189
+ # Process each PHI category
190
+ categories_to_process = self._get_categories_for_doc_type(document_type)
191
+
192
+ for category in categories_to_process:
193
+ matches = self._detect_phi_category(text, category)
194
+ phi_matches.extend(matches)
195
+
196
+ if matches:
197
+ audit_log["phi_categories_found"].append(category.value)
198
+ audit_log["replacements_made"] += len(matches)
199
+
200
+ # Sort matches by position (descending) to avoid index shifts
201
+ phi_matches.sort(key=lambda x: x.start_position, reverse=True)
202
+
203
+ # Apply replacements
204
+ for match in phi_matches:
205
+ deidentified_text = (
206
+ deidentified_text[:match.start_position] +
207
+ match.replacement +
208
+ deidentified_text[match.end_position:]
209
+ )
210
+
211
+ # Apply document-specific processing
212
+ if document_type == "ecg":
213
+ deidentified_text = self._process_ecg_specific(deidentified_text)
214
+ elif document_type == "radiology":
215
+ deidentified_text = self._process_radiology_specific(deidentified_text)
216
+ elif document_type == "laboratory":
217
+ deidentified_text = self._process_laboratory_specific(deidentified_text)
218
+
219
+ # Final cleanup and validation
220
+ deidentified_text = self._final_cleanup(deidentified_text)
221
+
222
+ audit_log.update({
223
+ "final_length": len(deidentified_text),
224
+ "phi_matches_count": len(phi_matches),
225
+ "compression_ratio": len(deidentified_text) / len(text) if text else 1.0
226
+ })
227
+
228
+ return DeidentificationResult(
229
+ original_text=original_text,
230
+ deidentified_text=deidentified_text,
231
+ phi_matches=phi_matches,
232
+ anonymization_method=self.config["redaction_method"],
233
+ hash_original=hash_original,
234
+ timestamp=datetime.now(),
235
+ compliance_level=self.config["compliance_level"],
236
+ audit_log=audit_log
237
+ )
238
+
239
+ def _get_categories_for_doc_type(self, document_type: str) -> List[PHICategory]:
240
+ """Get relevant PHI categories for document type"""
241
+ base_categories = [
242
+ PHICategory.PATIENT_NAME,
243
+ PHICategory.MEDICAL_RECORD_NUMBER,
244
+ PHICategory.DATE_OF_BIRTH,
245
+ PHICategory.PHONE_NUMBER,
246
+ PHICategory.EMAIL_ADDRESS,
247
+ PHICategory.ADDRESS,
248
+ PHICategory.IP_ADDRESS,
249
+ PHICategory.WEB_URL
250
+ ]
251
+
252
+ if document_type == "ecg":
253
+ base_categories.extend([PHICategory.DEVICE_IDENTIFIER])
254
+ elif document_type == "radiology":
255
+ base_categories.extend([PHICategory.DEVICE_IDENTIFIER, PHICategory.ACCOUNT_NUMBER])
256
+ elif document_type == "laboratory":
257
+ base_categories.extend([PHICategory.ACCOUNT_NUMBER])
258
+
259
+ return base_categories
260
+
261
+ def _detect_phi_category(self, text: str, category: PHICategory) -> List[PHIMatch]:
262
+ """Detect PHI for a specific category"""
263
+ matches = []
264
+
265
+ # Get relevant patterns for category
266
+ pattern_map = {
267
+ PHICategory.PATIENT_NAME: self.patterns.NAME_PATTERNS,
268
+ PHICategory.MEDICAL_RECORD_NUMBER: self.patterns.MRN_PATTERNS,
269
+ PHICategory.DATE_OF_BIRTH: self.patterns.DOB_PATTERNS,
270
+ PHICategory.SOCIAL_SECURITY_NUMBER: self.patterns.SSN_PATTERNS,
271
+ PHICategory.PHONE_NUMBER: self.patterns.PHONE_PATTERNS,
272
+ PHICategory.EMAIL_ADDRESS: self.patterns.EMAIL_PATTERNS,
273
+ PHICategory.ADDRESS: self.patterns.ADDRESS_PATTERNS,
274
+ PHICategory.IP_ADDRESS: self.patterns.IP_PATTERNS,
275
+ PHICategory.WEB_URL: self.patterns.URL_PATTERNS,
276
+ PHICategory.DEVICE_IDENTIFIER: self.patterns.DEVICE_PATTERNS,
277
+ }
278
+
279
+ patterns = pattern_map.get(category, [])
280
+
281
+ for pattern in patterns:
282
+ for match in re.finditer(pattern, text, re.IGNORECASE):
283
+ original_text = match.group(0)
284
+
285
+ # Get capture group if present
286
+ if len(match.groups()) > 0:
287
+ captured_text = match.group(1)
288
+ replacement = self._generate_replacement(category, captured_text)
289
+ start_pos = match.start(1)
290
+ end_pos = match.end(1)
291
+ else:
292
+ replacement = self._generate_replacement(category, original_text)
293
+ start_pos = match.start()
294
+ end_pos = match.end()
295
+
296
+ # Extract context
297
+ context_start = max(0, start_pos - 50)
298
+ context_end = min(len(text), end_pos + 50)
299
+ context = text[context_start:context_end]
300
+
301
+ matches.append(PHIMatch(
302
+ category=category,
303
+ original_text=original_text,
304
+ replacement=replacement,
305
+ start_position=start_pos,
306
+ end_position=end_pos,
307
+ confidence=0.8, # Pattern-based confidence
308
+ context=context
309
+ ))
310
+
311
+ return matches
312
+
313
+ def _generate_replacement(self, category: PHICategory, original: str) -> str:
314
+ """Generate appropriate replacement for PHI category"""
315
+ if self.config["use_hashing"]:
316
+ # Use consistent hashing for the same input
317
+ if original not in self.anonymization_cache:
318
+ hash_obj = hashlib.md5(original.encode())
319
+ self.anonymization_cache[original] = f"[{category.value.upper()}_{hash_obj.hexdigest()[:8]}]"
320
+ return self.anonymization_cache[original]
321
+ else:
322
+ # Use generic placeholders
323
+ placeholder_map = {
324
+ PHICategory.PATIENT_NAME: "[PATIENT_NAME]",
325
+ PHICategory.MEDICAL_RECORD_NUMBER: "[MRN]",
326
+ PHICategory.DATE_OF_BIRTH: "[DOB]",
327
+ PHICategory.SOCIAL_SECURITY_NUMBER: "[SSN]",
328
+ PHICategory.PHONE_NUMBER: "[PHONE]",
329
+ PHICategory.EMAIL_ADDRESS: "[EMAIL]",
330
+ PHICategory.ADDRESS: "[ADDRESS]",
331
+ PHICategory.IP_ADDRESS: "[IP_ADDRESS]",
332
+ PHICategory.WEB_URL: "[URL]",
333
+ PHICategory.DEVICE_IDENTIFIER: "[DEVICE_ID]"
334
+ }
335
+ return placeholder_map.get(category, f"[{category.value.upper()}]")
336
+
337
+ def _process_ecg_specific(self, text: str) -> str:
338
+ """ECG-specific PHI processing"""
339
+ # Preserve ECG technical terms but remove identifiers
340
+ ecg_preserve_terms = [
341
+ "ECG", "EKG", "lead", "rhythm", "rate", "interval", "waveform",
342
+ "QRS", "QT", "PR", "ST", "P wave", "T wave"
343
+ ]
344
+
345
+ # Remove device-specific identifiers but keep technical data
346
+ text = re.sub(r'(?:Device|Equipment)[:\s]*([A-Z0-9]+)', '[DEVICE_ID]', text)
347
+ text = re.sub(r'(?:Serial|Model)[:\s]*([A-Z0-9]+)', '[DEVICE_SERIAL]', text)
348
+
349
+ return text
350
+
351
+ def _process_radiology_specific(self, text: str) -> str:
352
+ """Radiology-specific PHI processing"""
353
+ # Preserve imaging parameters but remove identifiers
354
+ imaging_terms = [
355
+ "CT", "MRI", "X-ray", "ultrasound", "contrast", "slice", "plane",
356
+ "axial", "coronal", "sagittal", "enhancement", "attenuation"
357
+ ]
358
+
359
+ # Remove facility and equipment identifiers
360
+ text = re.sub(r'(?:Facility|Hospital|Clinic)[:\s]*([A-Za-z\s]+)', '[FACILITY]', text)
361
+ text = re.sub(r'(?:Machine|Scanner|Equipment)[:\s]*([A-Za-z0-9\s]+)', '[IMAGING_DEVICE]', text)
362
+
363
+ return text
364
+
365
+ def _process_laboratory_specific(self, text: str) -> str:
366
+ """Laboratory-specific PHI processing"""
367
+ # Preserve lab values and units but remove identifiers
368
+ lab_terms = [
369
+ "glucose", "cholesterol", "hemoglobin", "WBC", "RBC", "platelets",
370
+ "mg/dL", "g/dL", "10^3/μL", "normal", "abnormal", "elevated", "decreased"
371
+ ]
372
+
373
+ # Remove lab facility identifiers
374
+ text = re.sub(r'(?:Lab|Laboratory)[:\s]*([A-Za-z\s]+)', '[LAB_FACILITY]', text)
375
+ text = re.sub(r'(?:Accession|Test)[:\s]*([A-Z0-9]+)', '[TEST_ID]', text)
376
+
377
+ return text
378
+
379
+ def _final_cleanup(self, text: str) -> str:
380
+ """Final cleanup and validation of de-identified text"""
381
+ # Remove any residual patterns
382
+ text = re.sub(r'\s+', ' ', text) # Normalize whitespace
383
+ text = text.strip()
384
+
385
+ # Check for any remaining obvious PHI patterns
386
+ remaining_phi = self._check_residual_phi(text)
387
+ if remaining_phi:
388
+ logger.warning(f"Potential PHI detected after de-identification: {remaining_phi}")
389
+
390
+ return text
391
+
392
+ def _check_residual_phi(self, text: str) -> List[str]:
393
+ """Check for any remaining PHI patterns"""
394
+ potential_phi = []
395
+
396
+ # Check for phone numbers
397
+ if re.search(r'\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b', text):
398
+ potential_phi.append("phone_number")
399
+
400
+ # Check for email addresses
401
+ if re.search(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text):
402
+ potential_phi.append("email_address")
403
+
404
+ # Check for SSN-like patterns
405
+ if re.search(r'\b\d{3}-\d{2}-\d{4}\b', text):
406
+ potential_phi.append("ssn_pattern")
407
+
408
+ return potential_phi
409
+
410
+ def batch_deidentify(self, texts: List[Tuple[str, str]]) -> List[DeidentificationResult]:
411
+ """Batch de-identify multiple texts with document types"""
412
+ results = []
413
+ for text, doc_type in texts:
414
+ result = self.deidentify_text(text, doc_type)
415
+ results.append(result)
416
+ return results
417
+
418
+ def generate_audit_report(self, results: List[DeidentificationResult]) -> Dict[str, Any]:
419
+ """Generate comprehensive audit report for compliance"""
420
+ total_phi_matches = sum(len(r.phi_matches) for r in results)
421
+ categories_found = {}
422
+ compliance_score = 0.0
423
+
424
+ for result in results:
425
+ for match in result.phi_matches:
426
+ cat = match.category.value
427
+ categories_found[cat] = categories_found.get(cat, 0) + 1
428
+
429
+ # Calculate compliance score based on coverage
430
+ if results:
431
+ avg_phi_per_doc = total_phi_matches / len(results)
432
+ compliance_score = min(1.0, 0.9 + (0.1 * (1.0 - min(avg_phi_per_doc / 10, 1.0))))
433
+
434
+ return {
435
+ "audit_timestamp": datetime.now().isoformat(),
436
+ "total_documents": len(results),
437
+ "total_phi_matches": total_phi_matches,
438
+ "phi_categories_found": categories_found,
439
+ "compliance_score": compliance_score,
440
+ "compliance_level": "HIPAA_COMPLIANT" if compliance_score > 0.8 else "NEEDS_REVIEW",
441
+ "recommendations": self._generate_recommendations(categories_found, compliance_score)
442
+ }
443
+
444
+ def _generate_recommendations(self, categories_found: Dict[str, int], compliance_score: float) -> List[str]:
445
+ """Generate compliance recommendations"""
446
+ recommendations = []
447
+
448
+ if compliance_score < 0.8:
449
+ recommendations.append("Increase PHI detection patterns for better coverage")
450
+
451
+ if categories_found.get("patient_name", 0) > 5:
452
+ recommendations.append("Consider enhanced name detection patterns")
453
+
454
+ if categories_found.get("address", 0) > 0:
455
+ recommendations.append("Address detection appears effective")
456
+
457
+ if categories_found.get("device_identifier", 0) > 0:
458
+ recommendations.append("Device identifiers detected - ensure proper anonymization")
459
+
460
+ return recommendations
461
+
462
+
463
+ # Export main classes
464
+ __all__ = [
465
+ "MedicalPHIDeidentifier",
466
+ "PHICategory",
467
+ "PHIMatch",
468
+ "DeidentificationResult"
469
+ ]