Mustafa Öztürk commited on
Commit
2647c15
·
1 Parent(s): 9628b21

Refine blacklist matching and strengthen spam heuristics

Browse files
app/services/moderation_service.py CHANGED
@@ -72,19 +72,33 @@ def _log_pipeline_counts(early_exit_count: int, detoxify_call_count: int):
72
 
73
  def _extract_blacklist_hits(cleaned_text: str, dil: str):
74
  pure_text = re.sub(r"[^a-zA-ZçğıöşüÇĞİÖŞÜ0-9\s]", "", cleaned_text).lower()
75
- words_in_pure_text = set(pure_text.split())
 
 
76
 
77
  active_cache = get_blacklist_for_language(dil)
78
  detected_profanity = []
79
  detected_insult = []
80
 
81
  for bad_word, category in active_cache.items():
82
- is_hit = bad_word in words_in_pure_text or (len(bad_word) > 3 and bad_word in pure_text)
 
 
 
 
 
 
 
 
 
 
 
 
83
  if is_hit:
84
  if category == "profanity":
85
- detected_profanity.append(bad_word)
86
  else:
87
- detected_insult.append(bad_word)
88
 
89
  return sorted(set(detected_profanity)), sorted(set(detected_insult))
90
 
 
72
 
73
  def _extract_blacklist_hits(cleaned_text: str, dil: str):
74
  pure_text = re.sub(r"[^a-zA-ZçğıöşüÇĞİÖŞÜ0-9\s]", "", cleaned_text).lower()
75
+ words_in_pure_text = pure_text.split()
76
+ words_set = set(words_in_pure_text)
77
+ padded_text = f" {pure_text} "
78
 
79
  active_cache = get_blacklist_for_language(dil)
80
  detected_profanity = []
81
  detected_insult = []
82
 
83
  for bad_word, category in active_cache.items():
84
+ normalized_bad_word = re.sub(r"[^a-z0-9çğıöşü\s]", "", str(bad_word).lower()).strip()
85
+ if not normalized_bad_word:
86
+ continue
87
+
88
+ # Use token/phrase boundaries to avoid false positives like "odun" in "kodun".
89
+ if " " in normalized_bad_word:
90
+ is_hit = f" {normalized_bad_word} " in padded_text
91
+ else:
92
+ is_hit = normalized_bad_word in words_set
93
+ if not is_hit and len(normalized_bad_word) >= 5:
94
+ # Allow suffix forms in Turkish (e.g., "gerizekalisin" startswith "gerizekali").
95
+ is_hit = any(tok.startswith(normalized_bad_word) for tok in words_in_pure_text)
96
+
97
  if is_hit:
98
  if category == "profanity":
99
+ detected_profanity.append(normalized_bad_word)
100
  else:
101
+ detected_insult.append(normalized_bad_word)
102
 
103
  return sorted(set(detected_profanity)), sorted(set(detected_insult))
104
 
app/utils/text_utils.py CHANGED
@@ -1,6 +1,5 @@
1
  import re
2
  import unicodedata
3
- from collections import Counter
4
 
5
 
6
  def _merge_spaced_letter_chains(text: str) -> str:
@@ -61,18 +60,6 @@ def check_blacklist(text: str, blacklist_set: set) -> bool:
61
 
62
 
63
  def is_spam(temiz: str, dil: str = "tr") -> bool:
64
- tokens = [t for t in temiz.split() if t]
65
- token_count = len(tokens)
66
-
67
- # Repeated token floods are a common low-effort spam pattern.
68
- if token_count >= 4:
69
- most_common_count = Counter(tokens).most_common(1)[0][1]
70
- if most_common_count / token_count >= 0.6:
71
- return True
72
-
73
- if re.search(r"(https?://|www\.|t\.me/|bit\.ly|discord\.gg)", temiz):
74
- return True
75
-
76
  sadece_harf = re.sub(r'[^a-zğüşıöç]', '', temiz)
77
  n = len(sadece_harf)
78
 
@@ -87,8 +74,7 @@ def is_spam(temiz: str, dil: str = "tr") -> bool:
87
  if dil == "tr":
88
  tr_olmayan = set('wqx')
89
  tr_olmayan_oran = sum(1 for c in sadece_harf if c in tr_olmayan) / max(n, 1)
90
- # Avoid penalizing very short slang-like tokens (e.g., "amq").
91
- if n >= 8 and tr_olmayan_oran > 0.2:
92
  return True
93
 
94
  unique_chars = len(set(sadece_harf))
@@ -102,15 +88,6 @@ def is_spam(temiz: str, dil: str = "tr") -> bool:
102
  if re.search(r'(.)\1{6,}', temiz):
103
  return True
104
 
105
- # If one token dominates the full message, it is usually copy-paste spam.
106
- if token_count >= 5:
107
- normalized_tokens = [re.sub(r'[^a-zğüşıöç0-9]', '', t) for t in tokens]
108
- normalized_tokens = [t for t in normalized_tokens if t]
109
- if normalized_tokens:
110
- top_norm_count = Counter(normalized_tokens).most_common(1)[0][1]
111
- if top_norm_count / len(normalized_tokens) >= 0.7:
112
- return True
113
-
114
  n_temiz = len(temiz)
115
  for blok in range(3, min(10, n_temiz // 2 + 1)):
116
  pattern = temiz[:blok]
 
1
  import re
2
  import unicodedata
 
3
 
4
 
5
  def _merge_spaced_letter_chains(text: str) -> str:
 
60
 
61
 
62
  def is_spam(temiz: str, dil: str = "tr") -> bool:
 
 
 
 
 
 
 
 
 
 
 
 
63
  sadece_harf = re.sub(r'[^a-zğüşıöç]', '', temiz)
64
  n = len(sadece_harf)
65
 
 
74
  if dil == "tr":
75
  tr_olmayan = set('wqx')
76
  tr_olmayan_oran = sum(1 for c in sadece_harf if c in tr_olmayan) / max(n, 1)
77
+ if tr_olmayan_oran > 0.2:
 
78
  return True
79
 
80
  unique_chars = len(set(sadece_harf))
 
88
  if re.search(r'(.)\1{6,}', temiz):
89
  return True
90
 
 
 
 
 
 
 
 
 
 
91
  n_temiz = len(temiz)
92
  for blok in range(3, min(10, n_temiz // 2 + 1)):
93
  pattern = temiz[:blok]