File size: 1,546 Bytes
c2af030
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from codeInsight.logger import logging
import re

class SafetyChecker:
    def __init__(self):
        logging.info("SafetyChecker initialized.")
    
    def check_outputs(self, text : str) -> str:
        if not text:
            return "No response Generated"

        refusal_phrases = ["I cannot", "I am unable", "As an AI model", "I'm sorry"]
        if any(phrase.lower() in text.lower() for phrase in refusal_phrases):
            logging.warning(f"Model refusal detected: {text}")
            return "I'm sorry, but I cannot fulfill that request."
        
        bad_word_pattern = r"\b(fuck|shit|bitch|asshole|bastard)\b"
        if re.search(bad_word_pattern, text, re.IGNORECASE):
            logging.warning('Bad word detected')
            return "[Content removed due to inappropriate language]"
        
        pii_pattern = [
            r"\b\d{3}-\d{2}-\d{4}\b",
            r"\b\d{16}\b",
            r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b"
        ]
        for pattern in pii_pattern:
            if re.search(pattern, text):
                logging.warning("PII detected in model output.")
                return "[Sensitive information removed for privacy]"
        
        hallucination_markers = ["According to a study", "In recent news", "As per research"]
        if any(marker.lower() in text.lower() for marker in hallucination_markers):
            logging.info("Potential hallucination detected.")
            
        
        logging.info("Output passed all safety checks.")
        return text