Spaces:

sbicy
/

prof-demo

Sleeping

App Files Files Community

prof-demo / src /core /delexicalizer.py

sbicy

Upload 17 files

deff797 verified 4 months ago

raw

history blame contribute delete

2.16 kB

	from typing import Dict, List, Optional
	import re

	class Delexicalizer:
	def __init__(self):
	self._placeholder_map: Dict[str, str] = {}
	self._reverse_map: Dict[str, str] = {}
	self._counter = 0
	# Basic profanity list for detection
	self._profanity_patterns = [
	'fuck', 'shit', 'damn', 'ass', 'bitch', 'bastard',
	'crap', 'hell', 'piss', 'dick', 'cock', 'pussy'
	]

	def delexicalize(self, text: str) -> str:
	"""
	Replace profane words with placeholders.

	Args:
	text: Input text containing potential profanity

	Returns:
	str: Text with profanity replaced by placeholders
	"""
	result = text
	for pattern in self._profanity_patterns:
	# Find all occurrences (case-insensitive)
	matches = list(re.finditer(r'\b' + pattern + r'\b', result, re.IGNORECASE))
	for match in reversed(matches): # Reverse to maintain positions
	original_word = match.group()
	placeholder = self._create_placeholder(original_word)
	result = result[:match.start()] + placeholder + result[match.end():]
	return result

	def relexicalize(self, text: str) -> str:
	"""
	Restore original words from placeholders.

	Args:
	text: Text with placeholders

	Returns:
	str: Original text with placeholders replaced
	"""
	for placeholder, original in self._placeholder_map.items():
	text = text.replace(placeholder, original)
	return text

	def _create_placeholder(self, word: str) -> str:
	"""Create a unique placeholder for a word."""
	self._counter += 1
	placeholder = f"<PROFANITY_{self._counter}>"
	self._placeholder_map[placeholder] = word
	self._reverse_map[word.lower()] = placeholder
	return placeholder

	def reset(self):
	"""Reset the delexicalizer state."""
	self._placeholder_map.clear()
	self._reverse_map.clear()
	self._counter = 0