markobinario commited on
Commit
0ec3d53
·
verified ·
1 Parent(s): cefbb61

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -3
app.py CHANGED
@@ -11,14 +11,24 @@ class AIChatbot:
11
  self.database_url = database_url
12
  self.conversation_history = []
13
 
14
- # Profanity filter - list of bad words to filter
15
  self.bad_words = {
 
16
  'damn', 'hell', 'crap', 'suck', 'idiot', 'fool', 'jerk', 'loser', 'dumb', 'trash',
17
  'butt', 'freak', 'nut', 'moron', 'dummy', 'bozo', 'twit', 'dope', 'dumbass',
18
  'poophead', 'jerkoff', 'bugger', 'wanker', 'tosser', 'bastard', 'scum', 'slime',
19
  'creep', 'brat', 'dweeb', 'goon', 'booby', 'puke', 'vomit', 'dung', 'sap',
20
  'clutz', 'knob', 'prick', 'ass', 'shit', 'fuck', 'cock', 'tits', 'pussy',
21
- 'cunt', 'slut', 'bitch', 'whore', 'skank'
 
 
 
 
 
 
 
 
 
22
  }
23
 
24
  # Simple conversation patterns
@@ -77,7 +87,17 @@ class AIChatbot:
77
  # Normalize message: convert to lowercase
78
  message_lower = message.lower()
79
 
80
- # First, normalize common obfuscation characters
 
 
 
 
 
 
 
 
 
 
81
  # Replace common character substitutions (numbers/symbols) with letters
82
  obfuscation_map = {
83
  '0': 'o', '1': 'i', '3': 'e', '4': 'a', '5': 's',
 
11
  self.database_url = database_url
12
  self.conversation_history = []
13
 
14
+ # Profanity filter - list of bad words to filter (English and Tagalog)
15
  self.bad_words = {
16
+ # English bad words
17
  'damn', 'hell', 'crap', 'suck', 'idiot', 'fool', 'jerk', 'loser', 'dumb', 'trash',
18
  'butt', 'freak', 'nut', 'moron', 'dummy', 'bozo', 'twit', 'dope', 'dumbass',
19
  'poophead', 'jerkoff', 'bugger', 'wanker', 'tosser', 'bastard', 'scum', 'slime',
20
  'creep', 'brat', 'dweeb', 'goon', 'booby', 'puke', 'vomit', 'dung', 'sap',
21
  'clutz', 'knob', 'prick', 'ass', 'shit', 'fuck', 'cock', 'tits', 'pussy',
22
+ 'cunt', 'slut', 'bitch', 'whore', 'skank', 'stupid',
23
+ # Tagalog bad words
24
+ 'gago', 'putangina', 'hayop', 'lintik', 'walang', 'hiya', 'bobo', 'leche',
25
+ 'punyeta', 'sira', 'ulo', 'bwisit', 'pakshet', 'tarantado', 'ulol', 'buwisit',
26
+ 'hudas', 'kupal', 'shet', 'tae', 'tanga', 'tangina'
27
+ }
28
+
29
+ # Tagalog phrases (multi-word profanity)
30
+ self.bad_phrases = {
31
+ 'walang hiya', 'sira ulo'
32
  }
33
 
34
  # Simple conversation patterns
 
87
  # Normalize message: convert to lowercase
88
  message_lower = message.lower()
89
 
90
+ # First, check for bad phrases (multi-word profanity like "walang hiya", "sira ulo")
91
+ for phrase in self.bad_phrases:
92
+ # Remove punctuation and check if phrase exists in message
93
+ phrase_clean = re.sub(r'[^\w\s]', '', phrase)
94
+ message_clean_phrase = re.sub(r'[^\w\s]', '', message_lower)
95
+ # Check if phrase appears in message (with flexible spacing)
96
+ phrase_pattern = r'\b' + r'\s+'.join(re.escape(word) for word in phrase_clean.split()) + r'\b'
97
+ if re.search(phrase_pattern, message_clean_phrase, re.IGNORECASE):
98
+ return True
99
+
100
+ # Normalize common obfuscation characters
101
  # Replace common character substitutions (numbers/symbols) with letters
102
  obfuscation_map = {
103
  '0': 'o', '1': 'i', '3': 'e', '4': 'a', '5': 's',