Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -11,14 +11,24 @@ class AIChatbot:
|
|
| 11 |
self.database_url = database_url
|
| 12 |
self.conversation_history = []
|
| 13 |
|
| 14 |
-
# Profanity filter - list of bad words to filter
|
| 15 |
self.bad_words = {
|
|
|
|
| 16 |
'damn', 'hell', 'crap', 'suck', 'idiot', 'fool', 'jerk', 'loser', 'dumb', 'trash',
|
| 17 |
'butt', 'freak', 'nut', 'moron', 'dummy', 'bozo', 'twit', 'dope', 'dumbass',
|
| 18 |
'poophead', 'jerkoff', 'bugger', 'wanker', 'tosser', 'bastard', 'scum', 'slime',
|
| 19 |
'creep', 'brat', 'dweeb', 'goon', 'booby', 'puke', 'vomit', 'dung', 'sap',
|
| 20 |
'clutz', 'knob', 'prick', 'ass', 'shit', 'fuck', 'cock', 'tits', 'pussy',
|
| 21 |
-
'cunt', 'slut', 'bitch', 'whore', 'skank'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
}
|
| 23 |
|
| 24 |
# Simple conversation patterns
|
|
@@ -77,7 +87,17 @@ class AIChatbot:
|
|
| 77 |
# Normalize message: convert to lowercase
|
| 78 |
message_lower = message.lower()
|
| 79 |
|
| 80 |
-
# First,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
# Replace common character substitutions (numbers/symbols) with letters
|
| 82 |
obfuscation_map = {
|
| 83 |
'0': 'o', '1': 'i', '3': 'e', '4': 'a', '5': 's',
|
|
|
|
| 11 |
self.database_url = database_url
|
| 12 |
self.conversation_history = []
|
| 13 |
|
| 14 |
+
# Profanity filter - list of bad words to filter (English and Tagalog)
|
| 15 |
self.bad_words = {
|
| 16 |
+
# English bad words
|
| 17 |
'damn', 'hell', 'crap', 'suck', 'idiot', 'fool', 'jerk', 'loser', 'dumb', 'trash',
|
| 18 |
'butt', 'freak', 'nut', 'moron', 'dummy', 'bozo', 'twit', 'dope', 'dumbass',
|
| 19 |
'poophead', 'jerkoff', 'bugger', 'wanker', 'tosser', 'bastard', 'scum', 'slime',
|
| 20 |
'creep', 'brat', 'dweeb', 'goon', 'booby', 'puke', 'vomit', 'dung', 'sap',
|
| 21 |
'clutz', 'knob', 'prick', 'ass', 'shit', 'fuck', 'cock', 'tits', 'pussy',
|
| 22 |
+
'cunt', 'slut', 'bitch', 'whore', 'skank', 'stupid',
|
| 23 |
+
# Tagalog bad words
|
| 24 |
+
'gago', 'putangina', 'hayop', 'lintik', 'walang', 'hiya', 'bobo', 'leche',
|
| 25 |
+
'punyeta', 'sira', 'ulo', 'bwisit', 'pakshet', 'tarantado', 'ulol', 'buwisit',
|
| 26 |
+
'hudas', 'kupal', 'shet', 'tae', 'tanga', 'tangina'
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
# Tagalog phrases (multi-word profanity)
|
| 30 |
+
self.bad_phrases = {
|
| 31 |
+
'walang hiya', 'sira ulo'
|
| 32 |
}
|
| 33 |
|
| 34 |
# Simple conversation patterns
|
|
|
|
| 87 |
# Normalize message: convert to lowercase
|
| 88 |
message_lower = message.lower()
|
| 89 |
|
| 90 |
+
# First, check for bad phrases (multi-word profanity like "walang hiya", "sira ulo")
|
| 91 |
+
for phrase in self.bad_phrases:
|
| 92 |
+
# Remove punctuation and check if phrase exists in message
|
| 93 |
+
phrase_clean = re.sub(r'[^\w\s]', '', phrase)
|
| 94 |
+
message_clean_phrase = re.sub(r'[^\w\s]', '', message_lower)
|
| 95 |
+
# Check if phrase appears in message (with flexible spacing)
|
| 96 |
+
phrase_pattern = r'\b' + r'\s+'.join(re.escape(word) for word in phrase_clean.split()) + r'\b'
|
| 97 |
+
if re.search(phrase_pattern, message_clean_phrase, re.IGNORECASE):
|
| 98 |
+
return True
|
| 99 |
+
|
| 100 |
+
# Normalize common obfuscation characters
|
| 101 |
# Replace common character substitutions (numbers/symbols) with letters
|
| 102 |
obfuscation_map = {
|
| 103 |
'0': 'o', '1': 'i', '3': 'e', '4': 'a', '5': 's',
|