import re # privacy.py import re from typing import Tuple # Healthcare-specific PHI patterns PHI_PATTERNS = [ # Names r'\b(Mr|Mrs|Ms|Dr|Prof)\.?\s+[A-Z][a-z]+(?:\s+[A-Z][a-z]+)?\b', # Medical record numbers r'\b(MRN|Patient ID|Medical Record)\s*:?\s*\d+\b', # Health IDs r'\b(Health Card|Insurance ID)\s*:?\s*[A-Z0-9]+\b', # Dates of birth r'\b(DOB|Date of Birth)\s*:?\s*\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b', # Phone numbers r'\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b', # Email addresses r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', # Ages (when combined with other info) r'\b\d+\s*(years old|y\.o\.|yo)\b', ] def redact_text(text: str) -> str: """Redact potential PHI from text.""" if not isinstance(text, str): return text redacted = text for pattern in PHI_PATTERNS: redacted = re.sub(pattern, '[REDACTED]', redacted, flags=re.IGNORECASE) return redacted def safety_filter(text: str, mode: str = "input") -> Tuple[str, bool, str]: """ Enhanced safety filter for healthcare content. Returns: (safe_text, blocked, reason) """ if not isinstance(text, str): return text, False, "" # Check for PHI has_phi = any(re.search(pattern, text, re.IGNORECASE) for pattern in PHI_PATTERNS) if has_phi: if mode == "input": return "", True, "Input contains potential Protected Health Information (PHI). Please remove any personal information." else: redacted = redact_text(text) return redacted, False, "Output contained PHI which has been redacted." # Add general safety checks harmful_patterns = [ r'\b(self-harm|suicide|kill myself)\b', r'\b(medical advice|diagnosis|treatment)\b.*\b(you should|you must)\b', ] for pattern in harmful_patterns: if re.search(pattern, text, re.IGNORECASE): return "", True, "Input contains potentially harmful content." return text, False, "" def refusal_reply(reason: str) -> str: """Generate a refusal message based on the reason.""" return f"I cannot process this request because: {reason}"