File size: 2,362 Bytes
deff797 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | from enum import Enum
from typing import List, Optional
import re
class ContentCategory(Enum):
ENTITY_NAME = "entity_name"
SONG_TITLE = "song_title"
BRAND_NAME = "brand_name"
USER_INPUT = "user_input"
class ProfanityDetector:
def __init__(self):
# Initialize with basic profanity list
# In production, this would be loaded from a curated database
self._profanity_list = {
'fuck', 'shit', 'damn', 'ass', 'bitch', 'bastard',
'crap', 'hell', 'piss', 'dick', 'cock', 'pussy'
}
self._profanity_words = set()
def detect_profanity(
self,
text: str,
context: ContentCategory,
strict_mode: bool = False
) -> bool:
"""
Detect profanity in text with context awareness.
Args:
text: Input text to check
context: Category of the content (entity name, user input, etc.)
strict_mode: Whether to apply stricter rules
Returns:
bool: True if profanity detected, False otherwise
"""
# If it's an entity name and not in strict mode, we're more permissive
if context in [ContentCategory.ENTITY_NAME, ContentCategory.SONG_TITLE, ContentCategory.BRAND_NAME] and not strict_mode:
return self._check_with_context(text, context)
return self._check_standard(text)
def _check_with_context(self, text: str, context: ContentCategory) -> bool:
"""Context-aware checking - more permissive for entity names."""
# For entity names, we detect but don't block
words = re.findall(r'\b\w+\b', text.lower())
found = [word for word in words if word in self._profanity_list]
if found:
self._profanity_words = set(found)
return True
return False
def _check_standard(self, text: str) -> bool:
"""Standard profanity checking - stricter."""
words = re.findall(r'\b\w+\b', text.lower())
found = [word for word in words if word in self._profanity_list]
if found:
self._profanity_words = set(found)
return True
return False
def get_detected_words(self) -> set:
"""Return the profane words that were detected."""
return self._profanity_words |