File size: 1,600 Bytes
deff797
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from enum import Enum
from typing import List
import re

class ToxicityLevel(Enum):
    SAFE = "safe"
    MILD = "mild"
    EXPLICIT = "explicit"
    SLUR = "slur"
    THREAT = "threat"

class ContextClassifier:
    def __init__(self):
        # Basic categorization of profanity by severity
        self._mild = {'damn', 'crap', 'hell', 'ass'}
        self._explicit = {'fuck', 'shit', 'bitch', 'piss', 'dick', 'cock', 'pussy'}
        self._slurs = {'bastard'}  # Simplified - real implementation would be more comprehensive
        self._threat_keywords = ['kill', 'die', 'death', 'hurt', 'harm']
        
    def classify_context(self, text: str) -> ToxicityLevel:
        """
        Classify the toxicity level of text with context awareness.
        
        Args:
            text: Input text to classify
            
        Returns:
            ToxicityLevel: The classified toxicity level
        """
        text_lower = text.lower()
        words = set(re.findall(r'\b\w+\b', text_lower))
        
        # Check for threats first (highest priority)
        if any(keyword in text_lower for keyword in self._threat_keywords):
            return ToxicityLevel.THREAT
        
        # Check for slurs
        if words.intersection(self._slurs):
            return ToxicityLevel.SLUR
        
        # Check for explicit language
        if words.intersection(self._explicit):
            return ToxicityLevel.EXPLICIT
        
        # Check for mild profanity
        if words.intersection(self._mild):
            return ToxicityLevel.MILD
        
        return ToxicityLevel.SAFE