prof-demo / src /core /detector.py
sbicy's picture
Upload 17 files
deff797 verified
from enum import Enum
from typing import List, Optional
import re
class ContentCategory(Enum):
ENTITY_NAME = "entity_name"
SONG_TITLE = "song_title"
BRAND_NAME = "brand_name"
USER_INPUT = "user_input"
class ProfanityDetector:
def __init__(self):
# Initialize with basic profanity list
# In production, this would be loaded from a curated database
self._profanity_list = {
'fuck', 'shit', 'damn', 'ass', 'bitch', 'bastard',
'crap', 'hell', 'piss', 'dick', 'cock', 'pussy'
}
self._profanity_words = set()
def detect_profanity(
self,
text: str,
context: ContentCategory,
strict_mode: bool = False
) -> bool:
"""
Detect profanity in text with context awareness.
Args:
text: Input text to check
context: Category of the content (entity name, user input, etc.)
strict_mode: Whether to apply stricter rules
Returns:
bool: True if profanity detected, False otherwise
"""
# If it's an entity name and not in strict mode, we're more permissive
if context in [ContentCategory.ENTITY_NAME, ContentCategory.SONG_TITLE, ContentCategory.BRAND_NAME] and not strict_mode:
return self._check_with_context(text, context)
return self._check_standard(text)
def _check_with_context(self, text: str, context: ContentCategory) -> bool:
"""Context-aware checking - more permissive for entity names."""
# For entity names, we detect but don't block
words = re.findall(r'\b\w+\b', text.lower())
found = [word for word in words if word in self._profanity_list]
if found:
self._profanity_words = set(found)
return True
return False
def _check_standard(self, text: str) -> bool:
"""Standard profanity checking - stricter."""
words = re.findall(r'\b\w+\b', text.lower())
found = [word for word in words if word in self._profanity_list]
if found:
self._profanity_words = set(found)
return True
return False
def get_detected_words(self) -> set:
"""Return the profane words that were detected."""
return self._profanity_words