Spaces:
Sleeping
Sleeping
| from codeInsight.logger import logging | |
| import re | |
| class SafetyChecker: | |
| def __init__(self): | |
| logging.info("SafetyChecker initialized.") | |
| def check_outputs(self, text : str) -> str: | |
| if not text: | |
| return "No response Generated" | |
| refusal_phrases = ["I cannot", "I am unable", "As an AI model", "I'm sorry"] | |
| if any(phrase.lower() in text.lower() for phrase in refusal_phrases): | |
| logging.warning(f"Model refusal detected: {text}") | |
| return "I'm sorry, but I cannot fulfill that request." | |
| bad_word_pattern = r"\b(fuck|shit|bitch|asshole|bastard)\b" | |
| if re.search(bad_word_pattern, text, re.IGNORECASE): | |
| logging.warning('Bad word detected') | |
| return "[Content removed due to inappropriate language]" | |
| pii_pattern = [ | |
| r"\b\d{3}-\d{2}-\d{4}\b", | |
| r"\b\d{16}\b", | |
| r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b" | |
| ] | |
| for pattern in pii_pattern: | |
| if re.search(pattern, text): | |
| logging.warning("PII detected in model output.") | |
| return "[Sensitive information removed for privacy]" | |
| hallucination_markers = ["According to a study", "In recent news", "As per research"] | |
| if any(marker.lower() in text.lower() for marker in hallucination_markers): | |
| logging.info("Potential hallucination detected.") | |
| logging.info("Output passed all safety checks.") | |
| return text |