Spaces:

sbicy
/

prof-demo

Sleeping

App Files Files Community

prof-demo / src /core /detector.py

sbicy

Upload 17 files

deff797 verified 4 months ago

raw

history blame contribute delete

2.36 kB

	from enum import Enum
	from typing import List, Optional
	import re

	class ContentCategory(Enum):
	ENTITY_NAME = "entity_name"
	SONG_TITLE = "song_title"
	BRAND_NAME = "brand_name"
	USER_INPUT = "user_input"

	class ProfanityDetector:
	def __init__(self):
	# Initialize with basic profanity list
	# In production, this would be loaded from a curated database
	self._profanity_list = {
	'fuck', 'shit', 'damn', 'ass', 'bitch', 'bastard',
	'crap', 'hell', 'piss', 'dick', 'cock', 'pussy'
	}
	self._profanity_words = set()

	def detect_profanity(
	self,
	text: str,
	context: ContentCategory,
	strict_mode: bool = False
	) -> bool:
	"""
	Detect profanity in text with context awareness.

	Args:
	text: Input text to check
	context: Category of the content (entity name, user input, etc.)
	strict_mode: Whether to apply stricter rules

	Returns:
	bool: True if profanity detected, False otherwise
	"""
	# If it's an entity name and not in strict mode, we're more permissive
	if context in [ContentCategory.ENTITY_NAME, ContentCategory.SONG_TITLE, ContentCategory.BRAND_NAME] and not strict_mode:
	return self._check_with_context(text, context)

	return self._check_standard(text)

	def _check_with_context(self, text: str, context: ContentCategory) -> bool:
	"""Context-aware checking - more permissive for entity names."""
	# For entity names, we detect but don't block
	words = re.findall(r'\b\w+\b', text.lower())
	found = [word for word in words if word in self._profanity_list]
	if found:
	self._profanity_words = set(found)
	return True
	return False

	def _check_standard(self, text: str) -> bool:
	"""Standard profanity checking - stricter."""
	words = re.findall(r'\b\w+\b', text.lower())
	found = [word for word in words if word in self._profanity_list]
	if found:
	self._profanity_words = set(found)
	return True
	return False

	def get_detected_words(self) -> set:
	"""Return the profane words that were detected."""
	return self._profanity_words