flaskbot2

Sleeping

App Files Files Community

markobinario commited on Nov 10, 2025

Commit

1c8e0e8

verified ·

1 Parent(s): 228d990

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -11

app.py CHANGED Viewed

@@ -20,15 +20,65 @@ class AIChatbot:
             'creep', 'brat', 'dweeb', 'goon', 'booby', 'puke', 'vomit', 'dung', 'sap',
             'clutz', 'knob', 'prick', 'ass', 'shit', 'fuck', 'cock', 'tits', 'pussy',
             'cunt', 'slut', 'bitch', 'whore', 'skank', 'stupid',
             # Tagalog bad words
-            'gago', 'putangina', 'hayop', 'lintik', 'walang', 'hiya', 'bobo', 'leche',
             'punyeta', 'sira', 'ulo', 'bwisit', 'pakshet', 'tarantado', 'ulol', 'buwisit',
-            'hudas', 'kupal', 'shet', 'tae', 'tanga', 'tangina'
         }
-        # Tagalog phrases (multi-word profanity)
         self.bad_phrases = {
-            'walang hiya', 'sira ulo', 'walang kwenta', 'walang silbe'
         }
         # Simple conversation patterns
@@ -87,15 +137,28 @@ class AIChatbot:
         # Normalize message: convert to lowercase
         message_lower = message.lower()
-        # First, check for bad phrases (multi-word profanity like "walang hiya", "sira ulo")
         for phrase in self.bad_phrases:
-            # Remove punctuation and check if phrase exists in message
-            phrase_clean = re.sub(r'[^\w\s]', '', phrase)
-            message_clean_phrase = re.sub(r'[^\w\s]', '', message_lower)
             # Check if phrase appears in message (with flexible spacing)
-            phrase_pattern = r'\b' + r'\s+'.join(re.escape(word) for word in phrase_clean.split()) + r'\b'
-            if re.search(phrase_pattern, message_clean_phrase, re.IGNORECASE):
-                return True
         # Normalize common obfuscation characters
         # Replace common character substitutions (numbers/symbols) with letters
@@ -109,8 +172,13 @@ class AIChatbot:
         for char, replacement in obfuscation_map.items():
             normalized = normalized.replace(char, replacement)
         # Remove all non-word characters (except spaces) for word boundary checking
         message_clean = re.sub(r'[^\w\s]', '', normalized)
         words = message_clean.split()
         # Check for exact word matches in cleaned message

             'creep', 'brat', 'dweeb', 'goon', 'booby', 'puke', 'vomit', 'dung', 'sap',
             'clutz', 'knob', 'prick', 'ass', 'shit', 'fuck', 'cock', 'tits', 'pussy',
             'cunt', 'slut', 'bitch', 'whore', 'skank', 'stupid',
+            'asshole', 'dick', 'douche', 'scumbag', 'slimeball', 'douchebag', 'knobhead',
+            'numskull', 'halfwit', 'nincompoop', 'blockhead', 'dimwit', 'nitwit', 'simpleton',
+            'dunce', 'buffoon', 'doofus', 'clod', 'goober', 'jerkface', 'schmuck', 'scoundrel',
+            'miscreant', 'rat', 'git', 'wazzock', 'pillock', 'prat', 'plonker', 'div', 'bellend',
+            'tosserhead', 'twitbrain', 'sapbrain', 'knucklehead', 'dopey', 'boob', 'dingbat', 'oaf',
+            'ninnyhammer', 'chucklehead', 'saphead', 'pukehead', 'fuckface', 'assface', 'dickhead',
+            'cockhead', 'shithead', 'twatface', 'doucheface', 'bastardface', 'motherfucker', 'shitbag',
+            'cocksucker', 'jackass', 'wankerface', 'tosserface', 'arsehole', 'shitstain', 'assholeface',
+            'prickface', 'dumbfuck', 'fucknut', 'twatwaffle', 'shitbagger', 'dickweed', 'cumdump',
+            'asswipe', 'cockwomble', 'bollocks', 'twat', 'dick', 'fucking',
             # Tagalog bad words
+            'gago', 'putangina', 'putang', 'hayop', 'lintik', 'walang', 'hiya', 'bobo', 'leche',
             'punyeta', 'sira', 'ulo', 'bwisit', 'pakshet', 'tarantado', 'ulol', 'buwisit',
+            'hudas', 'kupal', 'shet', 'tae', 'tanga', 'tangina', 'bastos', 'maldita', 'loko',
+            'asar', 'pekpek', 'burat', 'kantot', 'puke', 'kantotin', 'tarantadoin', 'ulolan',
+            'bading', 'bakla', 'unggoy', 'asarin', 'bastusin', 'malditahin', 'buratin', 'pekpekin',
+            'pukein', 'tangain', 'gagoan', 'tarantadohin', 'ina'
         }
+        # Bad phrases (multi-word profanity - English and Tagalog)
         self.bad_phrases = {
+            # English phrases
+            'fuck you', 'shit you', 'damn you', 'hell you',
+            'you bastard', 'you bitch', 'you dick', 'you prick', 'you cunt', 'you slut', 'you whore',
+            'you jerk', 'you idiot', 'you fool', 'you moron', 'you dumbass', 'you douche', 'you twat',
+            'you bugger', 'you wanker', 'you tosser', 'you poophead', 'you scumbag', 'you slimeball',
+            'you douchebag', 'you knobhead', 'you bozo', 'you twit', 'you dope', 'you numskull',
+            'you halfwit', 'you nincompoop', 'you blockhead', 'you dimwit', 'you nitwit', 'you simpleton',
+            'you dunce', 'you buffoon', 'you doofus', 'you clod', 'you goober', 'you jerkface',
+            'you schmuck', 'you scoundrel', 'you miscreant', 'you rat', 'you puke', 'you vomit',
+            'you dung', 'you ass', 'you tits', 'you pussy', 'you cock', 'you fuckface', 'you assface',
+            'you dickhead', 'you cockhead', 'you shithead', 'you twatface', 'you knobhead', 'you doucheface',
+            'you loser', 'you bastardface', 'you motherfucker', 'you shitbag', 'you cocksucker',
+            'you jackass', 'you wankerface', 'you tosserface', 'you arsehole', 'you asshole', 'you freak', 'you nut',
+            'you scum', 'you creep', 'you brat', 'you dweeb', 'you goon', 'you pukehead', 'you shitstain',
+            'you assholeface', 'you prickface', 'you dumbfuck', 'you fucknut', 'you twatwaffle',
+            'you shitbagger', 'you dickweed', 'you cumdump', 'you asswipe', 'you cockwomble',
+            'you bollocks', 'you wazzock', 'you pillock', 'you plonker', 'you div', 'you bellend',
+            'you twitbrain', 'you motherfucking idiot', 'fuckig stupid',
+            # Tagalog phrases
+            'walang hiya', 'sira ulo', 'walang kwenta', 'walang silbe',
+            'putang ina', 'putang ina ka', 'putang ina mo',
+            'gago ka', 'gago mo', 'gago-gago', 'gago-gago ka', 'gago-gago mo', 'gagoan ka', 'gagoan mo',
+            'tanga ka', 'tanga mo', 'tanga-tanga', 'tanga-tanga ka', 'tanga-tanga mo', 'tangain ka', 'tangain mo', 'tanga-in ka', 'tanga-in mo',
+            'bobo ka', 'bobo mo', 'bobo-bobo', 'bobo-bobo ka', 'bobo-bobo mo', 'bobo-in ka', 'bobo-in mo',
+            'ulol ka', 'ulol mo', 'ulol-ulol', 'ulol-ulol ka', 'ulol-ulol mo', 'ulolan ka', 'ulolan mo', 'ulol-in ka', 'ulol-in mo',
+            'tarantado ka', 'tarantado mo', 'tarantado-tarantado', 'tarantado-tarantado ka', 'tarantado-tarantado mo',
+            'tarantadoin ka', 'tarantadoin mo', 'tarantado-in ka', 'tarantado-in mo', 'tarantadohin ka', 'tarantadohin mo',
+            'bastos ka', 'bastos mo', 'bastusin ka', 'bastusin mo',
+            'maldita ka', 'maldita mo', 'malditahin ka', 'malditahin mo',
+            'loko ka', 'loko mo', 'loko-loko', 'loko-loko ka', 'loko-loko mo',
+            'asar ka', 'asar mo', 'asarin ka', 'asarin mo',
+            'pekpek ka', 'pekpek mo', 'pekpekin ka', 'pekpekin mo',
+            'burat ka', 'burat mo', 'buratin ka', 'buratin mo',
+            'kantot ka', 'kantot mo', 'kantotin ka', 'kantotin mo',
+            'puke ka', 'puke mo', 'pukein ka', 'pukein mo',
+            'bading ka', 'bading mo',
+            'bakla ka', 'bakla mo',
+            'unggoy ka', 'unggoy mo'
         }
         # Simple conversation patterns
         # Normalize message: convert to lowercase
         message_lower = message.lower()
+        # First, check for bad phrases (multi-word profanity like "walang hiya", "sira ulo", "gago-gago")
         for phrase in self.bad_phrases:
+            # Replace hyphens with spaces for better matching (handles "gago-gago" as "gago gago")
+            phrase_normalized = phrase.replace('-', ' ')
+            # Remove punctuation but keep spaces, normalize whitespace
+            phrase_clean = re.sub(r'[^\w\s]', '', phrase_normalized)
+            phrase_clean = re.sub(r'\s+', ' ', phrase_clean).strip()
+            # Normalize message similarly - replace hyphens with spaces
+            message_normalized = message_lower.replace('-', ' ')
+            message_clean_phrase = re.sub(r'[^\w\s]', '', message_normalized)
+            message_clean_phrase = re.sub(r'\s+', ' ', message_clean_phrase).strip()
             # Check if phrase appears in message (with flexible spacing)
+            # Split phrase into words and create pattern that matches with any whitespace
+            phrase_words = phrase_clean.split()
+            if len(phrase_words) > 0:
+                # Create pattern that matches words with one or more spaces between them
+                # Using word boundaries to ensure whole words are matched
+                phrase_pattern = r'\b' + r'\s+'.join(re.escape(word) for word in phrase_words) + r'\b'
+                if re.search(phrase_pattern, message_clean_phrase, re.IGNORECASE):
+                    return True
         # Normalize common obfuscation characters
         # Replace common character substitutions (numbers/symbols) with letters
         for char, replacement in obfuscation_map.items():
             normalized = normalized.replace(char, replacement)
+        # Replace hyphens with spaces to handle hyphenated words like "gago-gago"
+        normalized = normalized.replace('-', ' ')
         # Remove all non-word characters (except spaces) for word boundary checking
         message_clean = re.sub(r'[^\w\s]', '', normalized)
+        # Normalize multiple spaces to single space
+        message_clean = re.sub(r'\s+', ' ', message_clean).strip()
         words = message_clean.split()
         # Check for exact word matches in cleaned message