Spaces:

sbicy
/

prof-demo

Sleeping

prof-demo / src /core /classifier.py

Upload 17 files

deff797 verified 3 months ago

1.6 kB

	from enum import Enum
	from typing import List
	import re

	class ToxicityLevel(Enum):
	SAFE = "safe"
	MILD = "mild"
	EXPLICIT = "explicit"
	SLUR = "slur"
	THREAT = "threat"

	class ContextClassifier:
	def __init__(self):
	# Basic categorization of profanity by severity
	self._mild = {'damn', 'crap', 'hell', 'ass'}
	self._explicit = {'fuck', 'shit', 'bitch', 'piss', 'dick', 'cock', 'pussy'}
	self._slurs = {'bastard'} # Simplified - real implementation would be more comprehensive
	self._threat_keywords = ['kill', 'die', 'death', 'hurt', 'harm']

	def classify_context(self, text: str) -> ToxicityLevel:
	"""
	Classify the toxicity level of text with context awareness.

	Args:
	text: Input text to classify

	Returns:
	ToxicityLevel: The classified toxicity level
	"""
	text_lower = text.lower()
	words = set(re.findall(r'\b\w+\b', text_lower))

	# Check for threats first (highest priority)
	if any(keyword in text_lower for keyword in self._threat_keywords):
	return ToxicityLevel.THREAT

	# Check for slurs
	if words.intersection(self._slurs):
	return ToxicityLevel.SLUR

	# Check for explicit language
	if words.intersection(self._explicit):
	return ToxicityLevel.EXPLICIT

	# Check for mild profanity
	if words.intersection(self._mild):
	return ToxicityLevel.MILD

	return ToxicityLevel.SAFE