Mustafa Öztürk commited on
Commit
7a29d91
·
1 Parent(s): 398cb92

Add int8 quantization and batch moderation endpoint

Browse files
app/api/endpoints.py CHANGED
@@ -13,7 +13,7 @@ except ImportError:
13
  psutil = None
14
 
15
  from app.services.cache_manager import get_cache_counts, load_blacklist_to_ram
16
- from app.services.moderation_service import run_moderation
17
 
18
  router = APIRouter()
19
 
@@ -91,6 +91,12 @@ class ModerationInput(BaseModel):
91
  platform_dil: Optional[str] = "tr"
92
 
93
 
 
 
 
 
 
 
94
  @router.get("/vram-status")
95
  def get_vram_status():
96
  if not torch.cuda.is_available():
@@ -151,3 +157,47 @@ async def analyze(input_data: ModerationInput):
151
  "latency_ms": latency_ms,
152
  "performance": performance,
153
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  psutil = None
14
 
15
  from app.services.cache_manager import get_cache_counts, load_blacklist_to_ram
16
+ from app.services.moderation_service import run_moderation, run_moderation_batch
17
 
18
  router = APIRouter()
19
 
 
91
  platform_dil: Optional[str] = "tr"
92
 
93
 
94
+ class ModerationBatchInput(BaseModel):
95
+ texts: list[str]
96
+ platform_dil: Optional[str] = "tr"
97
+ batch_size: Optional[int] = 8
98
+
99
+
100
  @router.get("/vram-status")
101
  def get_vram_status():
102
  if not torch.cuda.is_available():
 
157
  "latency_ms": latency_ms,
158
  "performance": performance,
159
  }
160
+
161
+
162
+ @router.post("/analyze-batch")
163
+ async def analyze_batch(input_data: ModerationBatchInput):
164
+ if not input_data.texts:
165
+ raise HTTPException(status_code=400, detail="texts alanı boş olamaz")
166
+
167
+ cleaned_texts = [t for t in input_data.texts if isinstance(t, str) and t.strip()]
168
+ if not cleaned_texts:
169
+ raise HTTPException(status_code=400, detail="Geçerli metin bulunamadı")
170
+
171
+ batch_size = max(1, int(input_data.batch_size or 8))
172
+ start_time = time.time()
173
+ batch_results = run_moderation_batch(
174
+ cleaned_texts,
175
+ input_data.platform_dil or "tr",
176
+ batch_size=batch_size,
177
+ )
178
+ latency_ms = round((time.time() - start_time) * 1000, 2)
179
+ performance = capture_process_metrics()
180
+ performance["latency_ms"] = latency_ms
181
+
182
+ items = []
183
+ for original_text, result in zip(cleaned_texts, batch_results):
184
+ decision, reason, risk, lang, cleaned, details = result
185
+ items.append(
186
+ {
187
+ "text": original_text,
188
+ "cleaned_text": cleaned,
189
+ "decision": decision,
190
+ "reason": reason,
191
+ "risk_level": risk,
192
+ "language": lang,
193
+ "details": details,
194
+ }
195
+ )
196
+
197
+ return {
198
+ "count": len(items),
199
+ "batch_size": batch_size,
200
+ "latency_ms": latency_ms,
201
+ "performance": performance,
202
+ "results": items,
203
+ }
app/ml/model_loader.py CHANGED
@@ -25,6 +25,17 @@ def load_system():
25
  model_o = AutoModelForSequenceClassification.from_pretrained(TR_OFF_MODEL_PATH).to(torch_device)
26
  model_o.eval()
27
 
 
 
 
 
 
 
 
 
 
 
 
28
  try:
29
  gibberish = pipeline(
30
  "text-classification",
@@ -37,6 +48,24 @@ def load_system():
37
  detox_en = Detoxify("original")
38
  detox_multi = Detoxify("multilingual")
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  _STATE.update(
41
  {
42
  "T_O": tokenizer_o,
 
25
  model_o = AutoModelForSequenceClassification.from_pretrained(TR_OFF_MODEL_PATH).to(torch_device)
26
  model_o.eval()
27
 
28
+ if torch_device.type == "cpu":
29
+ try:
30
+ model_o = torch.quantization.quantize_dynamic(
31
+ model_o,
32
+ {torch.nn.Linear},
33
+ dtype=torch.qint8,
34
+ )
35
+ model_o.eval()
36
+ except Exception:
37
+ pass
38
+
39
  try:
40
  gibberish = pipeline(
41
  "text-classification",
 
48
  detox_en = Detoxify("original")
49
  detox_multi = Detoxify("multilingual")
50
 
51
+ if torch_device.type == "cpu":
52
+ try:
53
+ detox_en.model = torch.quantization.quantize_dynamic(
54
+ detox_en.model,
55
+ {torch.nn.Linear},
56
+ dtype=torch.qint8,
57
+ )
58
+ except Exception:
59
+ pass
60
+ try:
61
+ detox_multi.model = torch.quantization.quantize_dynamic(
62
+ detox_multi.model,
63
+ {torch.nn.Linear},
64
+ dtype=torch.qint8,
65
+ )
66
+ except Exception:
67
+ pass
68
+
69
  _STATE.update(
70
  {
71
  "T_O": tokenizer_o,
app/services/moderation_service.py CHANGED
@@ -55,13 +55,117 @@ def calculate_verdict(profanity_hits, insult_hits, ai_scores):
55
  }
56
 
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  def run_moderation(text: str, platform_dil: str = "tr"):
59
  state = _ensure_runtime_ready()
60
 
61
  temiz = clean_text_nfkc(text)
62
  dil = "en" if platform_dil == "en" else "tr"
63
- pure_text = re.sub(r"[^a-zA-ZçğıöşüÇĞİÖŞÜ0-9\s]", "", temiz).lower()
64
- words_in_pure_text = set(pure_text.split())
65
 
66
  if is_spam(temiz, dil):
67
  return (
@@ -73,45 +177,11 @@ def run_moderation(text: str, platform_dil: str = "tr"):
73
  {"action": "MONITOR", "detox": {}},
74
  )
75
 
76
- active_cache = get_blacklist_for_language(dil)
77
- detected_profanity = []
78
- detected_insult = []
79
-
80
- for bad_word, category in active_cache.items():
81
- is_hit = bad_word in words_in_pure_text or (len(bad_word) > 3 and bad_word in pure_text)
82
- if is_hit:
83
- if category == "profanity":
84
- detected_profanity.append(bad_word)
85
- else:
86
- detected_insult.append(bad_word)
87
-
88
- profanity_hits = sorted(set(detected_profanity))
89
- insult_hits = sorted(set(detected_insult))
90
 
91
  # Fast path: if blacklist catches profanity/insult, skip all ML inference.
92
  if profanity_hits or insult_hits:
93
- verdict = calculate_verdict(
94
- profanity_hits,
95
- insult_hits,
96
- {
97
- "off_score": 0.0,
98
- "detox_toxicity": 0.0,
99
- },
100
- )
101
- action_map = {
102
- "CRITICAL": "CENSOR",
103
- "HIGH": "WARN",
104
- "MEDIUM": "MONITOR",
105
- "LOW": "MONITOR",
106
- "NONE": "ALLOW",
107
- }
108
- detail = {
109
- "hits": profanity_hits,
110
- "insult_hits": insult_hits,
111
- "action": action_map.get(verdict["risk_level"], "MONITOR"),
112
- "fast_path": "blacklist_early_exit",
113
- }
114
- return verdict["decision"], verdict["reason"], verdict["risk_level"], dil, temiz, detail
115
 
116
  if dil == "en":
117
  if state["GB_PIPE"] is not None:
@@ -152,61 +222,51 @@ def run_moderation(text: str, platform_dil: str = "tr"):
152
  "detox_toxicity": tox_score,
153
  },
154
  )
155
- action_map = {
156
- "CRITICAL": "CENSOR",
157
- "HIGH": "WARN",
158
- "MEDIUM": "MONITOR",
159
- "LOW": "MONITOR",
160
- "NONE": "ALLOW",
161
- }
162
- detail.update({"action": action_map.get(verdict["risk_level"], "MONITOR")})
163
  return verdict["decision"], verdict["reason"], verdict["risk_level"], dil, temiz, detail
164
 
165
- in_o = state["T_O"](temiz, return_tensors="pt", truncation=True, padding=True, max_length=128)
166
- in_o = {k: v.to(state["TORCH_DEVICE"]) for k, v in in_o.items()}
167
- with torch.no_grad():
168
- out_o = state["M_O"](**in_o)
169
- p_o = torch.softmax(out_o.logits, dim=1)[0]
170
- off_score = float(p_o[1].item()) if p_o.numel() > 1 else float(p_o.max().item())
171
 
172
- # Only run Detoxify on uncertain content to reduce inference cost.
173
- if off_score < 0.60:
174
- raw_threat_res = state["D_MULTI"].predict(temiz)
175
- else:
176
- raw_threat_res = {
177
- "toxicity": off_score,
178
- "identity_attack": 0.0,
179
- "threat": 0.0,
180
- "insult": 0.0,
181
- }
182
- threat_res = {k: float(v) for k, v in raw_threat_res.items()}
183
- threat = float(threat_res.get("threat", 0.0))
184
- tox_score = float(threat_res.get("toxicity", 0.0))
185
- ins_score = float(threat_res.get("insult", 0.0))
186
 
187
- detail = {
188
- "off_score": off_score,
189
- "toxicity": tox_score,
190
- "insult": ins_score,
191
- "threat": threat,
192
- "detox": threat_res,
193
- "hits": profanity_hits,
194
- "insult_hits": insult_hits,
195
- }
196
- verdict = calculate_verdict(
197
- profanity_hits,
198
- insult_hits,
199
- {
200
- "off_score": off_score,
201
- "detox_toxicity": tox_score,
202
- },
203
- )
204
- action_map = {
205
- "CRITICAL": "CENSOR",
206
- "HIGH": "WARN",
207
- "MEDIUM": "MONITOR",
208
- "LOW": "MONITOR",
209
- "NONE": "ALLOW",
210
- }
211
- detail.update({"action": action_map.get(verdict["risk_level"], "MONITOR")})
212
- return verdict["decision"], verdict["reason"], verdict["risk_level"], dil, temiz, detail
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  }
56
 
57
 
58
+ ACTION_MAP = {
59
+ "CRITICAL": "CENSOR",
60
+ "HIGH": "WARN",
61
+ "MEDIUM": "MONITOR",
62
+ "LOW": "MONITOR",
63
+ "NONE": "ALLOW",
64
+ }
65
+
66
+
67
+ def _extract_blacklist_hits(cleaned_text: str, dil: str):
68
+ pure_text = re.sub(r"[^a-zA-ZçğıöşüÇĞİÖŞÜ0-9\s]", "", cleaned_text).lower()
69
+ words_in_pure_text = set(pure_text.split())
70
+
71
+ active_cache = get_blacklist_for_language(dil)
72
+ detected_profanity = []
73
+ detected_insult = []
74
+
75
+ for bad_word, category in active_cache.items():
76
+ is_hit = bad_word in words_in_pure_text or (len(bad_word) > 3 and bad_word in pure_text)
77
+ if is_hit:
78
+ if category == "profanity":
79
+ detected_profanity.append(bad_word)
80
+ else:
81
+ detected_insult.append(bad_word)
82
+
83
+ return sorted(set(detected_profanity)), sorted(set(detected_insult))
84
+
85
+
86
+ def _blacklist_early_result(profanity_hits, insult_hits, dil, cleaned_text):
87
+ verdict = calculate_verdict(
88
+ profanity_hits,
89
+ insult_hits,
90
+ {
91
+ "off_score": 0.0,
92
+ "detox_toxicity": 0.0,
93
+ },
94
+ )
95
+ detail = {
96
+ "hits": profanity_hits,
97
+ "insult_hits": insult_hits,
98
+ "action": ACTION_MAP.get(verdict["risk_level"], "MONITOR"),
99
+ "fast_path": "blacklist_early_exit",
100
+ }
101
+ return verdict["decision"], verdict["reason"], verdict["risk_level"], dil, cleaned_text, detail
102
+
103
+
104
+ def _tr_off_scores_batched(text_list, state, batch_size: int = 8):
105
+ if not text_list:
106
+ return []
107
+
108
+ scores = []
109
+ effective_batch = max(1, int(batch_size))
110
+ for i in range(0, len(text_list), effective_batch):
111
+ chunk = text_list[i : i + effective_batch]
112
+ in_o = state["T_O"](chunk, return_tensors="pt", truncation=True, padding=True, max_length=128)
113
+ in_o = {k: v.to(state["TORCH_DEVICE"]) for k, v in in_o.items()}
114
+ with torch.no_grad():
115
+ out_o = state["M_O"](**in_o)
116
+ p_o = torch.softmax(out_o.logits, dim=1)
117
+ if p_o.shape[1] > 1:
118
+ chunk_scores = p_o[:, 1].detach().cpu().tolist()
119
+ else:
120
+ chunk_scores = p_o.max(dim=1).values.detach().cpu().tolist()
121
+ scores.extend(float(s) for s in chunk_scores)
122
+
123
+ return scores
124
+
125
+
126
+ def _tr_result_with_off_score(cleaned_text: str, profanity_hits, insult_hits, off_score: float, state, dil: str):
127
+ # Only run Detoxify on uncertain content to reduce inference cost.
128
+ if off_score < 0.60:
129
+ raw_threat_res = state["D_MULTI"].predict(cleaned_text)
130
+ else:
131
+ raw_threat_res = {
132
+ "toxicity": off_score,
133
+ "identity_attack": 0.0,
134
+ "threat": 0.0,
135
+ "insult": 0.0,
136
+ }
137
+
138
+ threat_res = {k: float(v) for k, v in raw_threat_res.items()}
139
+ threat = float(threat_res.get("threat", 0.0))
140
+ tox_score = float(threat_res.get("toxicity", 0.0))
141
+ ins_score = float(threat_res.get("insult", 0.0))
142
+
143
+ detail = {
144
+ "off_score": off_score,
145
+ "toxicity": tox_score,
146
+ "insult": ins_score,
147
+ "threat": threat,
148
+ "detox": threat_res,
149
+ "hits": profanity_hits,
150
+ "insult_hits": insult_hits,
151
+ }
152
+ verdict = calculate_verdict(
153
+ profanity_hits,
154
+ insult_hits,
155
+ {
156
+ "off_score": off_score,
157
+ "detox_toxicity": tox_score,
158
+ },
159
+ )
160
+ detail.update({"action": ACTION_MAP.get(verdict["risk_level"], "MONITOR")})
161
+ return verdict["decision"], verdict["reason"], verdict["risk_level"], dil, cleaned_text, detail
162
+
163
+
164
  def run_moderation(text: str, platform_dil: str = "tr"):
165
  state = _ensure_runtime_ready()
166
 
167
  temiz = clean_text_nfkc(text)
168
  dil = "en" if platform_dil == "en" else "tr"
 
 
169
 
170
  if is_spam(temiz, dil):
171
  return (
 
177
  {"action": "MONITOR", "detox": {}},
178
  )
179
 
180
+ profanity_hits, insult_hits = _extract_blacklist_hits(temiz, dil)
 
 
 
 
 
 
 
 
 
 
 
 
 
181
 
182
  # Fast path: if blacklist catches profanity/insult, skip all ML inference.
183
  if profanity_hits or insult_hits:
184
+ return _blacklist_early_result(profanity_hits, insult_hits, dil, temiz)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
 
186
  if dil == "en":
187
  if state["GB_PIPE"] is not None:
 
222
  "detox_toxicity": tox_score,
223
  },
224
  )
225
+ detail.update({"action": ACTION_MAP.get(verdict["risk_level"], "MONITOR")})
 
 
 
 
 
 
 
226
  return verdict["decision"], verdict["reason"], verdict["risk_level"], dil, temiz, detail
227
 
228
+ off_score = _tr_off_scores_batched([temiz], state, batch_size=1)[0]
229
+ return _tr_result_with_off_score(temiz, profanity_hits, insult_hits, off_score, state, dil)
 
 
 
 
230
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
 
232
+ def run_moderation_batch(texts, platform_dil: str = "tr", batch_size: int = 8):
233
+ state = _ensure_runtime_ready()
234
+ dil = "en" if platform_dil == "en" else "tr"
235
+
236
+ results = [None] * len(texts)
237
+ tr_pending = []
238
+ tr_pending_texts = []
239
+
240
+ for idx, text in enumerate(texts):
241
+ temiz = clean_text_nfkc(text)
242
+
243
+ if is_spam(temiz, dil):
244
+ results[idx] = (
245
+ "🗑️ SPAM/GİBBERİSH",
246
+ "Anlamsız veya tekrarlı içerik.",
247
+ "LOW",
248
+ dil,
249
+ temiz,
250
+ {"action": "MONITOR", "detox": {}},
251
+ )
252
+ continue
253
+
254
+ profanity_hits, insult_hits = _extract_blacklist_hits(temiz, dil)
255
+ if profanity_hits or insult_hits:
256
+ results[idx] = _blacklist_early_result(profanity_hits, insult_hits, dil, temiz)
257
+ continue
258
+
259
+ if dil == "en":
260
+ results[idx] = run_moderation(text, platform_dil="en")
261
+ continue
262
+
263
+ tr_pending.append((idx, temiz, profanity_hits, insult_hits))
264
+ tr_pending_texts.append(temiz)
265
+
266
+ if tr_pending_texts:
267
+ off_scores = _tr_off_scores_batched(tr_pending_texts, state, batch_size=batch_size)
268
+ for pending, off_score in zip(tr_pending, off_scores):
269
+ idx, temiz, profanity_hits, insult_hits = pending
270
+ results[idx] = _tr_result_with_off_score(temiz, profanity_hits, insult_hits, off_score, state, dil)
271
+
272
+ return results