|
|
from enum import Enum |
|
|
from typing import List, Optional |
|
|
import re |
|
|
|
|
|
class ContentCategory(Enum): |
|
|
ENTITY_NAME = "entity_name" |
|
|
SONG_TITLE = "song_title" |
|
|
BRAND_NAME = "brand_name" |
|
|
USER_INPUT = "user_input" |
|
|
|
|
|
class ProfanityDetector: |
|
|
def __init__(self): |
|
|
|
|
|
|
|
|
self._profanity_list = { |
|
|
'fuck', 'shit', 'damn', 'ass', 'bitch', 'bastard', |
|
|
'crap', 'hell', 'piss', 'dick', 'cock', 'pussy' |
|
|
} |
|
|
self._profanity_words = set() |
|
|
|
|
|
def detect_profanity( |
|
|
self, |
|
|
text: str, |
|
|
context: ContentCategory, |
|
|
strict_mode: bool = False |
|
|
) -> bool: |
|
|
""" |
|
|
Detect profanity in text with context awareness. |
|
|
|
|
|
Args: |
|
|
text: Input text to check |
|
|
context: Category of the content (entity name, user input, etc.) |
|
|
strict_mode: Whether to apply stricter rules |
|
|
|
|
|
Returns: |
|
|
bool: True if profanity detected, False otherwise |
|
|
""" |
|
|
|
|
|
if context in [ContentCategory.ENTITY_NAME, ContentCategory.SONG_TITLE, ContentCategory.BRAND_NAME] and not strict_mode: |
|
|
return self._check_with_context(text, context) |
|
|
|
|
|
return self._check_standard(text) |
|
|
|
|
|
def _check_with_context(self, text: str, context: ContentCategory) -> bool: |
|
|
"""Context-aware checking - more permissive for entity names.""" |
|
|
|
|
|
words = re.findall(r'\b\w+\b', text.lower()) |
|
|
found = [word for word in words if word in self._profanity_list] |
|
|
if found: |
|
|
self._profanity_words = set(found) |
|
|
return True |
|
|
return False |
|
|
|
|
|
def _check_standard(self, text: str) -> bool: |
|
|
"""Standard profanity checking - stricter.""" |
|
|
words = re.findall(r'\b\w+\b', text.lower()) |
|
|
found = [word for word in words if word in self._profanity_list] |
|
|
if found: |
|
|
self._profanity_words = set(found) |
|
|
return True |
|
|
return False |
|
|
|
|
|
def get_detected_words(self) -> set: |
|
|
"""Return the profane words that were detected.""" |
|
|
return self._profanity_words |