Spaces:

moztrk
/

sentinel-api

Runtime error

App Files Files Community

Mustafa Öztürk commited on Mar 6

Commit

7a29d91

1 Parent(s): 398cb92

Add int8 quantization and batch moderation endpoint

Browse files

Files changed (3) hide show

app/api/endpoints.py +51 -1
app/ml/model_loader.py +29 -0
app/services/moderation_service.py +152 -92

app/api/endpoints.py CHANGED Viewed

@@ -13,7 +13,7 @@ except ImportError:
     psutil = None
 from app.services.cache_manager import get_cache_counts, load_blacklist_to_ram
-from app.services.moderation_service import run_moderation
 router = APIRouter()
@@ -91,6 +91,12 @@ class ModerationInput(BaseModel):
     platform_dil: Optional[str] = "tr"
 @router.get("/vram-status")
 def get_vram_status():
     if not torch.cuda.is_available():
@@ -151,3 +157,47 @@ async def analyze(input_data: ModerationInput):
         "latency_ms": latency_ms,
         "performance": performance,
     }

     psutil = None
 from app.services.cache_manager import get_cache_counts, load_blacklist_to_ram
+from app.services.moderation_service import run_moderation, run_moderation_batch
 router = APIRouter()
     platform_dil: Optional[str] = "tr"
+class ModerationBatchInput(BaseModel):
+    texts: list[str]
+    platform_dil: Optional[str] = "tr"
+    batch_size: Optional[int] = 8
 @router.get("/vram-status")
 def get_vram_status():
     if not torch.cuda.is_available():
         "latency_ms": latency_ms,
         "performance": performance,
     }
+@router.post("/analyze-batch")
+async def analyze_batch(input_data: ModerationBatchInput):
+    if not input_data.texts:
+        raise HTTPException(status_code=400, detail="texts alanı boş olamaz")
+    cleaned_texts = [t for t in input_data.texts if isinstance(t, str) and t.strip()]
+    if not cleaned_texts:
+        raise HTTPException(status_code=400, detail="Geçerli metin bulunamadı")
+    batch_size = max(1, int(input_data.batch_size or 8))
+    start_time = time.time()
+    batch_results = run_moderation_batch(
+        cleaned_texts,
+        input_data.platform_dil or "tr",
+        batch_size=batch_size,
+    )
+    latency_ms = round((time.time() - start_time) * 1000, 2)
+    performance = capture_process_metrics()
+    performance["latency_ms"] = latency_ms
+    items = []
+    for original_text, result in zip(cleaned_texts, batch_results):
+        decision, reason, risk, lang, cleaned, details = result
+        items.append(
+            {
+                "text": original_text,
+                "cleaned_text": cleaned,
+                "decision": decision,
+                "reason": reason,
+                "risk_level": risk,
+                "language": lang,
+                "details": details,
+            }
+        )
+    return {
+        "count": len(items),
+        "batch_size": batch_size,
+        "latency_ms": latency_ms,
+        "performance": performance,
+        "results": items,
+    }

app/ml/model_loader.py CHANGED Viewed

@@ -25,6 +25,17 @@ def load_system():
     model_o = AutoModelForSequenceClassification.from_pretrained(TR_OFF_MODEL_PATH).to(torch_device)
     model_o.eval()
     try:
         gibberish = pipeline(
             "text-classification",
@@ -37,6 +48,24 @@ def load_system():
     detox_en = Detoxify("original")
     detox_multi = Detoxify("multilingual")
     _STATE.update(
         {
             "T_O": tokenizer_o,

     model_o = AutoModelForSequenceClassification.from_pretrained(TR_OFF_MODEL_PATH).to(torch_device)
     model_o.eval()
+    if torch_device.type == "cpu":
+        try:
+            model_o = torch.quantization.quantize_dynamic(
+                model_o,
+                {torch.nn.Linear},
+                dtype=torch.qint8,
+            )
+            model_o.eval()
+        except Exception:
+            pass
     try:
         gibberish = pipeline(
             "text-classification",
     detox_en = Detoxify("original")
     detox_multi = Detoxify("multilingual")
+    if torch_device.type == "cpu":
+        try:
+            detox_en.model = torch.quantization.quantize_dynamic(
+                detox_en.model,
+                {torch.nn.Linear},
+                dtype=torch.qint8,
+            )
+        except Exception:
+            pass
+        try:
+            detox_multi.model = torch.quantization.quantize_dynamic(
+                detox_multi.model,
+                {torch.nn.Linear},
+                dtype=torch.qint8,
+            )
+        except Exception:
+            pass
     _STATE.update(
         {
             "T_O": tokenizer_o,

app/services/moderation_service.py CHANGED Viewed

@@ -55,13 +55,117 @@ def calculate_verdict(profanity_hits, insult_hits, ai_scores):
     }
 def run_moderation(text: str, platform_dil: str = "tr"):
     state = _ensure_runtime_ready()
     temiz = clean_text_nfkc(text)
     dil = "en" if platform_dil == "en" else "tr"
-    pure_text = re.sub(r"[^a-zA-ZçğıöşüÇĞİÖŞÜ0-9\s]", "", temiz).lower()
-    words_in_pure_text = set(pure_text.split())
     if is_spam(temiz, dil):
         return (
@@ -73,45 +177,11 @@ def run_moderation(text: str, platform_dil: str = "tr"):
             {"action": "MONITOR", "detox": {}},
         )
-    active_cache = get_blacklist_for_language(dil)
-    detected_profanity = []
-    detected_insult = []
-    for bad_word, category in active_cache.items():
-        is_hit = bad_word in words_in_pure_text or (len(bad_word) > 3 and bad_word in pure_text)
-        if is_hit:
-            if category == "profanity":
-                detected_profanity.append(bad_word)
-            else:
-                detected_insult.append(bad_word)
-    profanity_hits = sorted(set(detected_profanity))
-    insult_hits = sorted(set(detected_insult))
     # Fast path: if blacklist catches profanity/insult, skip all ML inference.
     if profanity_hits or insult_hits:
-        verdict = calculate_verdict(
-            profanity_hits,
-            insult_hits,
-            {
-                "off_score": 0.0,
-                "detox_toxicity": 0.0,
-            },
-        )
-        action_map = {
-            "CRITICAL": "CENSOR",
-            "HIGH": "WARN",
-            "MEDIUM": "MONITOR",
-            "LOW": "MONITOR",
-            "NONE": "ALLOW",
-        }
-        detail = {
-            "hits": profanity_hits,
-            "insult_hits": insult_hits,
-            "action": action_map.get(verdict["risk_level"], "MONITOR"),
-            "fast_path": "blacklist_early_exit",
-        }
-        return verdict["decision"], verdict["reason"], verdict["risk_level"], dil, temiz, detail
     if dil == "en":
         if state["GB_PIPE"] is not None:
@@ -152,61 +222,51 @@ def run_moderation(text: str, platform_dil: str = "tr"):
                 "detox_toxicity": tox_score,
             },
         )
-        action_map = {
-            "CRITICAL": "CENSOR",
-            "HIGH": "WARN",
-            "MEDIUM": "MONITOR",
-            "LOW": "MONITOR",
-            "NONE": "ALLOW",
-        }
-        detail.update({"action": action_map.get(verdict["risk_level"], "MONITOR")})
         return verdict["decision"], verdict["reason"], verdict["risk_level"], dil, temiz, detail
-    in_o = state["T_O"](temiz, return_tensors="pt", truncation=True, padding=True, max_length=128)
-    in_o = {k: v.to(state["TORCH_DEVICE"]) for k, v in in_o.items()}
-    with torch.no_grad():
-        out_o = state["M_O"](**in_o)
-    p_o = torch.softmax(out_o.logits, dim=1)[0]
-    off_score = float(p_o[1].item()) if p_o.numel() > 1 else float(p_o.max().item())
-    # Only run Detoxify on uncertain content to reduce inference cost.
-    if off_score < 0.60:
-        raw_threat_res = state["D_MULTI"].predict(temiz)
-    else:
-        raw_threat_res = {
-            "toxicity": off_score,
-            "identity_attack": 0.0,
-            "threat": 0.0,
-            "insult": 0.0,
-        }
-    threat_res = {k: float(v) for k, v in raw_threat_res.items()}
-    threat = float(threat_res.get("threat", 0.0))
-    tox_score = float(threat_res.get("toxicity", 0.0))
-    ins_score = float(threat_res.get("insult", 0.0))
-    detail = {
-        "off_score": off_score,
-        "toxicity": tox_score,
-        "insult": ins_score,
-        "threat": threat,
-        "detox": threat_res,
-        "hits": profanity_hits,
-        "insult_hits": insult_hits,
-    }
-    verdict = calculate_verdict(
-        profanity_hits,
-        insult_hits,
-        {
-            "off_score": off_score,
-            "detox_toxicity": tox_score,
-        },
-    )
-    action_map = {
-        "CRITICAL": "CENSOR",
-        "HIGH": "WARN",
-        "MEDIUM": "MONITOR",
-        "LOW": "MONITOR",
-        "NONE": "ALLOW",
-    }
-    detail.update({"action": action_map.get(verdict["risk_level"], "MONITOR")})
-    return verdict["decision"], verdict["reason"], verdict["risk_level"], dil, temiz, detail

     }
+ACTION_MAP = {
+    "CRITICAL": "CENSOR",
+    "HIGH": "WARN",
+    "MEDIUM": "MONITOR",
+    "LOW": "MONITOR",
+    "NONE": "ALLOW",
+}
+def _extract_blacklist_hits(cleaned_text: str, dil: str):
+    pure_text = re.sub(r"[^a-zA-ZçğıöşüÇĞİÖŞÜ0-9\s]", "", cleaned_text).lower()
+    words_in_pure_text = set(pure_text.split())
+    active_cache = get_blacklist_for_language(dil)
+    detected_profanity = []
+    detected_insult = []
+    for bad_word, category in active_cache.items():
+        is_hit = bad_word in words_in_pure_text or (len(bad_word) > 3 and bad_word in pure_text)
+        if is_hit:
+            if category == "profanity":
+                detected_profanity.append(bad_word)
+            else:
+                detected_insult.append(bad_word)
+    return sorted(set(detected_profanity)), sorted(set(detected_insult))
+def _blacklist_early_result(profanity_hits, insult_hits, dil, cleaned_text):
+    verdict = calculate_verdict(
+        profanity_hits,
+        insult_hits,
+        {
+            "off_score": 0.0,
+            "detox_toxicity": 0.0,
+        },
+    )
+    detail = {
+        "hits": profanity_hits,
+        "insult_hits": insult_hits,
+        "action": ACTION_MAP.get(verdict["risk_level"], "MONITOR"),
+        "fast_path": "blacklist_early_exit",
+    }
+    return verdict["decision"], verdict["reason"], verdict["risk_level"], dil, cleaned_text, detail
+def _tr_off_scores_batched(text_list, state, batch_size: int = 8):
+    if not text_list:
+        return []
+    scores = []
+    effective_batch = max(1, int(batch_size))
+    for i in range(0, len(text_list), effective_batch):
+        chunk = text_list[i : i + effective_batch]
+        in_o = state["T_O"](chunk, return_tensors="pt", truncation=True, padding=True, max_length=128)
+        in_o = {k: v.to(state["TORCH_DEVICE"]) for k, v in in_o.items()}
+        with torch.no_grad():
+            out_o = state["M_O"](**in_o)
+        p_o = torch.softmax(out_o.logits, dim=1)
+        if p_o.shape[1] > 1:
+            chunk_scores = p_o[:, 1].detach().cpu().tolist()
+        else:
+            chunk_scores = p_o.max(dim=1).values.detach().cpu().tolist()
+        scores.extend(float(s) for s in chunk_scores)
+    return scores
+def _tr_result_with_off_score(cleaned_text: str, profanity_hits, insult_hits, off_score: float, state, dil: str):
+    # Only run Detoxify on uncertain content to reduce inference cost.
+    if off_score < 0.60:
+        raw_threat_res = state["D_MULTI"].predict(cleaned_text)
+    else:
+        raw_threat_res = {
+            "toxicity": off_score,
+            "identity_attack": 0.0,
+            "threat": 0.0,
+            "insult": 0.0,
+        }
+    threat_res = {k: float(v) for k, v in raw_threat_res.items()}
+    threat = float(threat_res.get("threat", 0.0))
+    tox_score = float(threat_res.get("toxicity", 0.0))
+    ins_score = float(threat_res.get("insult", 0.0))
+    detail = {
+        "off_score": off_score,
+        "toxicity": tox_score,
+        "insult": ins_score,
+        "threat": threat,
+        "detox": threat_res,
+        "hits": profanity_hits,
+        "insult_hits": insult_hits,
+    }
+    verdict = calculate_verdict(
+        profanity_hits,
+        insult_hits,
+        {
+            "off_score": off_score,
+            "detox_toxicity": tox_score,
+        },
+    )
+    detail.update({"action": ACTION_MAP.get(verdict["risk_level"], "MONITOR")})
+    return verdict["decision"], verdict["reason"], verdict["risk_level"], dil, cleaned_text, detail
 def run_moderation(text: str, platform_dil: str = "tr"):
     state = _ensure_runtime_ready()
     temiz = clean_text_nfkc(text)
     dil = "en" if platform_dil == "en" else "tr"
     if is_spam(temiz, dil):
         return (
             {"action": "MONITOR", "detox": {}},
         )
+    profanity_hits, insult_hits = _extract_blacklist_hits(temiz, dil)
     # Fast path: if blacklist catches profanity/insult, skip all ML inference.
     if profanity_hits or insult_hits:
+        return _blacklist_early_result(profanity_hits, insult_hits, dil, temiz)
     if dil == "en":
         if state["GB_PIPE"] is not None:
                 "detox_toxicity": tox_score,
             },
         )
+        detail.update({"action": ACTION_MAP.get(verdict["risk_level"], "MONITOR")})
         return verdict["decision"], verdict["reason"], verdict["risk_level"], dil, temiz, detail
+    off_score = _tr_off_scores_batched([temiz], state, batch_size=1)[0]
+    return _tr_result_with_off_score(temiz, profanity_hits, insult_hits, off_score, state, dil)
+def run_moderation_batch(texts, platform_dil: str = "tr", batch_size: int = 8):
+    state = _ensure_runtime_ready()
+    dil = "en" if platform_dil == "en" else "tr"
+    results = [None] * len(texts)
+    tr_pending = []
+    tr_pending_texts = []
+    for idx, text in enumerate(texts):
+        temiz = clean_text_nfkc(text)
+        if is_spam(temiz, dil):
+            results[idx] = (
+                "🗑️ SPAM/GİBBERİSH",
+                "Anlamsız veya tekrarlı içerik.",
+                "LOW",
+                dil,
+                temiz,
+                {"action": "MONITOR", "detox": {}},
+            )
+            continue
+        profanity_hits, insult_hits = _extract_blacklist_hits(temiz, dil)
+        if profanity_hits or insult_hits:
+            results[idx] = _blacklist_early_result(profanity_hits, insult_hits, dil, temiz)
+            continue
+        if dil == "en":
+            results[idx] = run_moderation(text, platform_dil="en")
+            continue
+        tr_pending.append((idx, temiz, profanity_hits, insult_hits))
+        tr_pending_texts.append(temiz)
+    if tr_pending_texts:
+        off_scores = _tr_off_scores_batched(tr_pending_texts, state, batch_size=batch_size)
+        for pending, off_score in zip(tr_pending, off_scores):
+            idx, temiz, profanity_hits, insult_hits = pending
+            results[idx] = _tr_result_with_off_score(temiz, profanity_hits, insult_hits, off_score, state, dil)
+    return results