Spaces:
Sleeping
Sleeping
| import re | |
| # privacy.py | |
| import re | |
| from typing import Tuple | |
| # Healthcare-specific PHI patterns | |
| PHI_PATTERNS = [ | |
| # Names | |
| r'\b(Mr|Mrs|Ms|Dr|Prof)\.?\s+[A-Z][a-z]+(?:\s+[A-Z][a-z]+)?\b', | |
| # Medical record numbers | |
| r'\b(MRN|Patient ID|Medical Record)\s*:?\s*\d+\b', | |
| # Health IDs | |
| r'\b(Health Card|Insurance ID)\s*:?\s*[A-Z0-9]+\b', | |
| # Dates of birth | |
| r'\b(DOB|Date of Birth)\s*:?\s*\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b', | |
| # Phone numbers | |
| r'\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b', | |
| # Email addresses | |
| r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', | |
| # Ages (when combined with other info) | |
| r'\b\d+\s*(years old|y\.o\.|yo)\b', | |
| ] | |
| def redact_text(text: str) -> str: | |
| """Redact potential PHI from text.""" | |
| if not isinstance(text, str): | |
| return text | |
| redacted = text | |
| for pattern in PHI_PATTERNS: | |
| redacted = re.sub(pattern, '[REDACTED]', redacted, flags=re.IGNORECASE) | |
| return redacted | |
| def safety_filter(text: str, mode: str = "input") -> Tuple[str, bool, str]: | |
| """ | |
| Enhanced safety filter for healthcare content. | |
| Returns: (safe_text, blocked, reason) | |
| """ | |
| if not isinstance(text, str): | |
| return text, False, "" | |
| # Check for PHI | |
| has_phi = any(re.search(pattern, text, re.IGNORECASE) for pattern in PHI_PATTERNS) | |
| if has_phi: | |
| if mode == "input": | |
| return "", True, "Input contains potential Protected Health Information (PHI). Please remove any personal information." | |
| else: | |
| redacted = redact_text(text) | |
| return redacted, False, "Output contained PHI which has been redacted." | |
| # Add general safety checks | |
| harmful_patterns = [ | |
| r'\b(self-harm|suicide|kill myself)\b', | |
| r'\b(medical advice|diagnosis|treatment)\b.*\b(you should|you must)\b', | |
| ] | |
| for pattern in harmful_patterns: | |
| if re.search(pattern, text, re.IGNORECASE): | |
| return "", True, "Input contains potentially harmful content." | |
| return text, False, "" | |
| def refusal_reply(reason: str) -> str: | |
| """Generate a refusal message based on the reason.""" | |
| return f"I cannot process this request because: {reason}" |