Spaces:

moztrk
/

sentinel-api

Runtime error

App Files Files Community

Mustafa Öztürk commited on Mar 10

Commit

2647c15

1 Parent(s): 9628b21

Refine blacklist matching and strengthen spam heuristics

Browse files

Files changed (2) hide show

app/services/moderation_service.py +18 -4
app/utils/text_utils.py +1 -24

app/services/moderation_service.py CHANGED Viewed

@@ -72,19 +72,33 @@ def _log_pipeline_counts(early_exit_count: int, detoxify_call_count: int):
 def _extract_blacklist_hits(cleaned_text: str, dil: str):
     pure_text = re.sub(r"[^a-zA-ZçğıöşüÇĞİÖŞÜ0-9\s]", "", cleaned_text).lower()
-    words_in_pure_text = set(pure_text.split())
     active_cache = get_blacklist_for_language(dil)
     detected_profanity = []
     detected_insult = []
     for bad_word, category in active_cache.items():
-        is_hit = bad_word in words_in_pure_text or (len(bad_word) > 3 and bad_word in pure_text)
         if is_hit:
             if category == "profanity":
-                detected_profanity.append(bad_word)
             else:
-                detected_insult.append(bad_word)
     return sorted(set(detected_profanity)), sorted(set(detected_insult))

 def _extract_blacklist_hits(cleaned_text: str, dil: str):
     pure_text = re.sub(r"[^a-zA-ZçğıöşüÇĞİÖŞÜ0-9\s]", "", cleaned_text).lower()
+    words_in_pure_text = pure_text.split()
+    words_set = set(words_in_pure_text)
+    padded_text = f" {pure_text} "
     active_cache = get_blacklist_for_language(dil)
     detected_profanity = []
     detected_insult = []
     for bad_word, category in active_cache.items():
+        normalized_bad_word = re.sub(r"[^a-z0-9çğıöşü\s]", "", str(bad_word).lower()).strip()
+        if not normalized_bad_word:
+            continue
+        # Use token/phrase boundaries to avoid false positives like "odun" in "kodun".
+        if " " in normalized_bad_word:
+            is_hit = f" {normalized_bad_word} " in padded_text
+        else:
+            is_hit = normalized_bad_word in words_set
+            if not is_hit and len(normalized_bad_word) >= 5:
+                # Allow suffix forms in Turkish (e.g., "gerizekalisin" startswith "gerizekali").
+                is_hit = any(tok.startswith(normalized_bad_word) for tok in words_in_pure_text)
         if is_hit:
             if category == "profanity":
+                detected_profanity.append(normalized_bad_word)
             else:
+                detected_insult.append(normalized_bad_word)
     return sorted(set(detected_profanity)), sorted(set(detected_insult))

app/utils/text_utils.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import re
 import unicodedata
-from collections import Counter
 def _merge_spaced_letter_chains(text: str) -> str:
@@ -61,18 +60,6 @@ def check_blacklist(text: str, blacklist_set: set) -> bool:
 def is_spam(temiz: str, dil: str = "tr") -> bool:
-    tokens = [t for t in temiz.split() if t]
-    token_count = len(tokens)
-    # Repeated token floods are a common low-effort spam pattern.
-    if token_count >= 4:
-        most_common_count = Counter(tokens).most_common(1)[0][1]
-        if most_common_count / token_count >= 0.6:
-            return True
-    if re.search(r"(https?://|www\.|t\.me/|bit\.ly|discord\.gg)", temiz):
-        return True
     sadece_harf = re.sub(r'[^a-zğüşıöç]', '', temiz)
     n = len(sadece_harf)
@@ -87,8 +74,7 @@ def is_spam(temiz: str, dil: str = "tr") -> bool:
     if dil == "tr":
         tr_olmayan = set('wqx')
         tr_olmayan_oran = sum(1 for c in sadece_harf if c in tr_olmayan) / max(n, 1)
-        # Avoid penalizing very short slang-like tokens (e.g., "amq").
-        if n >= 8 and tr_olmayan_oran > 0.2:
             return True
     unique_chars = len(set(sadece_harf))
@@ -102,15 +88,6 @@ def is_spam(temiz: str, dil: str = "tr") -> bool:
     if re.search(r'(.)\1{6,}', temiz):
         return True
-    # If one token dominates the full message, it is usually copy-paste spam.
-    if token_count >= 5:
-        normalized_tokens = [re.sub(r'[^a-zğüşıöç0-9]', '', t) for t in tokens]
-        normalized_tokens = [t for t in normalized_tokens if t]
-        if normalized_tokens:
-            top_norm_count = Counter(normalized_tokens).most_common(1)[0][1]
-            if top_norm_count / len(normalized_tokens) >= 0.7:
-                return True
     n_temiz = len(temiz)
     for blok in range(3, min(10, n_temiz // 2 + 1)):
         pattern = temiz[:blok]

 import re
 import unicodedata
 def _merge_spaced_letter_chains(text: str) -> str:
 def is_spam(temiz: str, dil: str = "tr") -> bool:
     sadece_harf = re.sub(r'[^a-zğüşıöç]', '', temiz)
     n = len(sadece_harf)
     if dil == "tr":
         tr_olmayan = set('wqx')
         tr_olmayan_oran = sum(1 for c in sadece_harf if c in tr_olmayan) / max(n, 1)
+        if tr_olmayan_oran > 0.2:
             return True
     unique_chars = len(set(sadece_harf))
     if re.search(r'(.)\1{6,}', temiz):
         return True
     n_temiz = len(temiz)
     for blok in range(3, min(10, n_temiz // 2 + 1)):
         pattern = temiz[:blok]