File size: 2,362 Bytes
deff797
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
from enum import Enum
from typing import List, Optional
import re

class ContentCategory(Enum):
    ENTITY_NAME = "entity_name"
    SONG_TITLE = "song_title"
    BRAND_NAME = "brand_name"
    USER_INPUT = "user_input"

class ProfanityDetector:
    def __init__(self):
        # Initialize with basic profanity list
        # In production, this would be loaded from a curated database
        self._profanity_list = {
            'fuck', 'shit', 'damn', 'ass', 'bitch', 'bastard',
            'crap', 'hell', 'piss', 'dick', 'cock', 'pussy'
        }
        self._profanity_words = set()
        
    def detect_profanity(
        self, 
        text: str, 
        context: ContentCategory,
        strict_mode: bool = False
    ) -> bool:
        """
        Detect profanity in text with context awareness.
        
        Args:
            text: Input text to check
            context: Category of the content (entity name, user input, etc.)
            strict_mode: Whether to apply stricter rules
            
        Returns:
            bool: True if profanity detected, False otherwise
        """
        # If it's an entity name and not in strict mode, we're more permissive
        if context in [ContentCategory.ENTITY_NAME, ContentCategory.SONG_TITLE, ContentCategory.BRAND_NAME] and not strict_mode:
            return self._check_with_context(text, context)
        
        return self._check_standard(text)
    
    def _check_with_context(self, text: str, context: ContentCategory) -> bool:
        """Context-aware checking - more permissive for entity names."""
        # For entity names, we detect but don't block
        words = re.findall(r'\b\w+\b', text.lower())
        found = [word for word in words if word in self._profanity_list]
        if found:
            self._profanity_words = set(found)
            return True
        return False
    
    def _check_standard(self, text: str) -> bool:
        """Standard profanity checking - stricter."""
        words = re.findall(r'\b\w+\b', text.lower())
        found = [word for word in words if word in self._profanity_list]
        if found:
            self._profanity_words = set(found)
            return True
        return False
    
    def get_detected_words(self) -> set:
        """Return the profane words that were detected."""
        return self._profanity_words