from enum import Enum from typing import List, Optional import re class ContentCategory(Enum): ENTITY_NAME = "entity_name" SONG_TITLE = "song_title" BRAND_NAME = "brand_name" USER_INPUT = "user_input" class ProfanityDetector: def __init__(self): # Initialize with basic profanity list # In production, this would be loaded from a curated database self._profanity_list = { 'fuck', 'shit', 'damn', 'ass', 'bitch', 'bastard', 'crap', 'hell', 'piss', 'dick', 'cock', 'pussy' } self._profanity_words = set() def detect_profanity( self, text: str, context: ContentCategory, strict_mode: bool = False ) -> bool: """ Detect profanity in text with context awareness. Args: text: Input text to check context: Category of the content (entity name, user input, etc.) strict_mode: Whether to apply stricter rules Returns: bool: True if profanity detected, False otherwise """ # If it's an entity name and not in strict mode, we're more permissive if context in [ContentCategory.ENTITY_NAME, ContentCategory.SONG_TITLE, ContentCategory.BRAND_NAME] and not strict_mode: return self._check_with_context(text, context) return self._check_standard(text) def _check_with_context(self, text: str, context: ContentCategory) -> bool: """Context-aware checking - more permissive for entity names.""" # For entity names, we detect but don't block words = re.findall(r'\b\w+\b', text.lower()) found = [word for word in words if word in self._profanity_list] if found: self._profanity_words = set(found) return True return False def _check_standard(self, text: str) -> bool: """Standard profanity checking - stricter.""" words = re.findall(r'\b\w+\b', text.lower()) found = [word for word in words if word in self._profanity_list] if found: self._profanity_words = set(found) return True return False def get_detected_words(self) -> set: """Return the profane words that were detected.""" return self._profanity_words