Spaces:
Runtime error
Runtime error
Mustafa Öztürk commited on
Commit ·
857d4f5
1
Parent(s): 2eae299
Deploy Sentinel API to HF Space
Browse files- .dockerignore +13 -0
- .gitignore +52 -0
- Dockerfile +25 -0
- README.md +255 -8
- app.py +1017 -0
- app/__init__.py +0 -0
- app/api/__init__.py +0 -0
- app/api/endpoints.py +131 -0
- app/core/__init__.py +0 -0
- app/core/config.py +14 -0
- app/db/__init__.py +0 -0
- app/db/supabase_client.py +24 -0
- app/ml/__init__.py +0 -0
- app/ml/model_loader.py +55 -0
- app/services/__init__.py +0 -0
- app/services/cache_manager.py +73 -0
- app/services/moderation_service.py +169 -0
- app/utils/__init__.py +0 -0
- app/utils/text_utils.py +65 -0
- main.py +12 -0
- performance_test.py +58 -0
- requirements.txt +16 -0
- stress_test.py +63 -0
- utils.py +51 -0
- vram_check.py +32 -0
.dockerignore
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
*.py[cod]
|
| 3 |
+
*.pyo
|
| 4 |
+
*.pyd
|
| 5 |
+
.pytest_cache/
|
| 6 |
+
.mypy_cache/
|
| 7 |
+
.ruff_cache/
|
| 8 |
+
.venv/
|
| 9 |
+
venv/
|
| 10 |
+
.git/
|
| 11 |
+
.gitignore
|
| 12 |
+
.env
|
| 13 |
+
*.log
|
.gitignore
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python bytecode/cache
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
|
| 6 |
+
# Virtual environments
|
| 7 |
+
venv/
|
| 8 |
+
.venv/
|
| 9 |
+
env/
|
| 10 |
+
ENV/
|
| 11 |
+
|
| 12 |
+
# Environment variables
|
| 13 |
+
.env
|
| 14 |
+
.env.*
|
| 15 |
+
!.env.example
|
| 16 |
+
|
| 17 |
+
# Build/distribution artifacts
|
| 18 |
+
build/
|
| 19 |
+
dist/
|
| 20 |
+
*.egg-info/
|
| 21 |
+
.eggs/
|
| 22 |
+
|
| 23 |
+
# Test/cache artifacts
|
| 24 |
+
.pytest_cache/
|
| 25 |
+
.mypy_cache/
|
| 26 |
+
.ruff_cache/
|
| 27 |
+
.coverage
|
| 28 |
+
coverage.xml
|
| 29 |
+
htmlcov/
|
| 30 |
+
|
| 31 |
+
# Jupyter
|
| 32 |
+
.ipynb_checkpoints/
|
| 33 |
+
|
| 34 |
+
# IDE/editor
|
| 35 |
+
.vscode/
|
| 36 |
+
.idea/
|
| 37 |
+
*.swp
|
| 38 |
+
*.swo
|
| 39 |
+
|
| 40 |
+
# OS files
|
| 41 |
+
.DS_Store
|
| 42 |
+
Thumbs.db
|
| 43 |
+
|
| 44 |
+
# Logs/runtime files
|
| 45 |
+
*.log
|
| 46 |
+
*.pid
|
| 47 |
+
|
| 48 |
+
# Local model caches / weights
|
| 49 |
+
models_cache/
|
| 50 |
+
|
| 51 |
+
# Streamlit local state
|
| 52 |
+
.streamlit/secrets.toml
|
Dockerfile
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10-slim
|
| 2 |
+
|
| 3 |
+
ENV PYTHONDONTWRITEBYTECODE=1 \
|
| 4 |
+
PYTHONUNBUFFERED=1 \
|
| 5 |
+
PIP_NO_CACHE_DIR=1
|
| 6 |
+
|
| 7 |
+
WORKDIR /app
|
| 8 |
+
|
| 9 |
+
# Build tools are needed for some Python packages during install.
|
| 10 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 11 |
+
build-essential \
|
| 12 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 13 |
+
|
| 14 |
+
COPY requirements.txt .
|
| 15 |
+
|
| 16 |
+
# Hugging Face Spaces free tier is CPU-based; install CPU Torch explicitly.
|
| 17 |
+
RUN pip install --upgrade pip && \
|
| 18 |
+
pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu && \
|
| 19 |
+
pip install --no-cache-dir -r requirements.txt
|
| 20 |
+
|
| 21 |
+
COPY . .
|
| 22 |
+
|
| 23 |
+
EXPOSE 7860
|
| 24 |
+
|
| 25 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
CHANGED
|
@@ -1,10 +1,257 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
# Hibrit İçerik Moderasyon Sistemi
|
| 2 |
+
|
| 3 |
+
Türkçe ve İngilizce kullanıcı içeriklerini düşük gecikmeyle analiz edip zararlı içeriği otomatik sınıflandıran, kara liste + yapay zeka modellerini birlikte kullanan katmanlı bir moderasyon sistemidir.
|
| 4 |
+
|
| 5 |
+
## 1. Proje Amacı
|
| 6 |
+
Bu proje, gerçek zamanlı içerik moderasyonunda hem hızlı hem de yüksek doğruluklu karar üretmek için kural tabanlı filtreleri ve makine öğrenmesi modellerini tek bir hibrit akışta birleştirir.
|
| 7 |
+
|
| 8 |
+
## 2. Kullanılan Teknolojiler
|
| 9 |
+
- `Python 3.x`
|
| 10 |
+
- `FastAPI` (API servis katmanı)
|
| 11 |
+
- `Uvicorn` (ASGI sunucu)
|
| 12 |
+
- `Streamlit` (moderasyon paneli)
|
| 13 |
+
- `PyTorch` (inference altyapısı)
|
| 14 |
+
- `Transformers` (BERTurk tokenizer/model yükleme)
|
| 15 |
+
- `Detoxify` (toxicity, insult, threat, identity attack skorları)
|
| 16 |
+
- `Supabase` (canlı kara liste veritabanı)
|
| 17 |
+
- `Pandas` / `Openpyxl` (toplu veri analizi)
|
| 18 |
+
- `Scikit-learn` / `Matplotlib` (değerlendirme ve raporlama)
|
| 19 |
+
|
| 20 |
+
## 3. Proje Dizin Yapısı
|
| 21 |
+
```text
|
| 22 |
+
moderasyon/
|
| 23 |
+
├─ main.py
|
| 24 |
+
├─ app.py
|
| 25 |
+
├─ utils.py
|
| 26 |
+
├─ performance_test.py
|
| 27 |
+
├─ stress_test.py
|
| 28 |
+
├─ vram_check.py
|
| 29 |
+
├─ requirements.txt
|
| 30 |
+
├─ models_cache/
|
| 31 |
+
│ ├─ bertturk-offensive-42k/
|
| 32 |
+
│ └─ bertturk-hate-speech/ # v4 raporunda aktif akıştan çıkarıldı
|
| 33 |
+
└─ app/
|
| 34 |
+
├─ api/
|
| 35 |
+
│ └─ endpoints.py
|
| 36 |
+
├─ core/
|
| 37 |
+
│ └─ config.py
|
| 38 |
+
├─ db/
|
| 39 |
+
│ └─ supabase_client.py
|
| 40 |
+
├─ ml/
|
| 41 |
+
│ └─ model_loader.py
|
| 42 |
+
├─ services/
|
| 43 |
+
│ ├─ cache_manager.py
|
| 44 |
+
│ └─ moderation_service.py
|
| 45 |
+
└─ utils/
|
| 46 |
+
└─ text_utils.py
|
| 47 |
+
```
|
| 48 |
+
|
| 49 |
+
## 4. Sistem Nasıl Çalışır?
|
| 50 |
+
Sistem, gelen metni önce temizler, sonra spam/gibberish ve kara liste kontrollerini yapar, en son gerekli ise ML modellerini çalıştırıp tek bir karar motorunda sonucu üretir.
|
| 51 |
+
|
| 52 |
+
### 4.1 Genel Akış
|
| 53 |
+
```mermaid
|
| 54 |
+
flowchart TD
|
| 55 |
+
A[POST /analyze] --> B[clean_text_nfkc]
|
| 56 |
+
B --> C{is_spam?}
|
| 57 |
+
C -- Evet --> D[SPAM/GIBBERISH - Early Exit]
|
| 58 |
+
C -- Hayir --> E[Supabase Cache Kontrolu]
|
| 59 |
+
E --> F{Dil}
|
| 60 |
+
F -- TR --> G[BERTurk Offensive + Detoxify Multilingual]
|
| 61 |
+
F -- EN --> H[Gibberish Detector + Detoxify Original]
|
| 62 |
+
G --> I[calculate_verdict]
|
| 63 |
+
H --> I
|
| 64 |
+
I --> J[decision + risk + action + latency_ms]
|
| 65 |
+
```
|
| 66 |
+
|
| 67 |
+
### 4.2 TR Pipeline
|
| 68 |
+
- Metin normalize edilir (`NFKC`, obfuscation temizliği, leet dönüşümü).
|
| 69 |
+
- `is_spam()` hızlı kuralları çalışır; pozitifse model çağrılmaz.
|
| 70 |
+
- Supabase kara listesi RAM cache içinden taranır.
|
| 71 |
+
- `BERTurk Offensive 42K` ile `off_score` hesaplanır.
|
| 72 |
+
- `Detoxify Multilingual` ile toxicity/insult/threat/identity attack skorları alınır.
|
| 73 |
+
- `calculate_verdict()` son kararı üretir.
|
| 74 |
+
|
| 75 |
+
### 4.3 EN Pipeline
|
| 76 |
+
- Metin normalize edilir.
|
| 77 |
+
- `is_spam()` kontrolü yapılır.
|
| 78 |
+
- Supabase EN cache taranır.
|
| 79 |
+
- Gibberish detector `noise > 0.98` ise erken çıkış verir.
|
| 80 |
+
- `Detoxify Original` 6 etiketli skor üretir.
|
| 81 |
+
- `calculate_verdict()` ile nihai sınıflandırma yapılır.
|
| 82 |
+
|
| 83 |
+
## 5. Modeller, Veri Setleri ve Eğitim Süreci
|
| 84 |
+
|
| 85 |
+
### 5.1 Kullanılan Veri Setleri
|
| 86 |
+
- `Toygar/turkish-offensive-language-detection` (aktif TR modeli)
|
| 87 |
+
- Toplam: `53,005`
|
| 88 |
+
- Train: `42,398`, Validation: `1,756`, Test: `8,851`
|
| 89 |
+
- Etiketler: `0=temiz`, `1=offensive`
|
| 90 |
+
- `fawern/turkish-hate-speech` (referans)
|
| 91 |
+
- v4 notu: ayrı hate modeli aktif akıştan çıkarılmıştır.
|
| 92 |
+
|
| 93 |
+
### 5.2 Fine-Tuning Özeti (BERTurk Offensive 42K)
|
| 94 |
+
- `num_train_epochs=3`
|
| 95 |
+
- `batch_size=16`
|
| 96 |
+
- `learning_rate=2e-5`
|
| 97 |
+
- `weight_decay=0.01`
|
| 98 |
+
- `fp16=True`
|
| 99 |
+
- `load_best_model_at_end=True`
|
| 100 |
+
|
| 101 |
+
Eğitim gözlemi (özet): model ilk epoch'tan sonra hızla iyileşir, sonraki epoch'larda doğruluk artışı sınırlı kalsa da en iyi checkpoint otomatik seçilerek stabil performans korunur.
|
| 102 |
+
|
| 103 |
+
### 5.3 Neden Hibrit Mimari?
|
| 104 |
+
- Sadece blacklist: semantik saldırganlığı kaçırabilir.
|
| 105 |
+
- Sadece model: obfuscation ve açık küfürlerde gereksiz gecikme yaratabilir.
|
| 106 |
+
- Hibrit yaklaşım: erken çıkış + semantik model kombinasyonuyla hız/doğruluk dengesi sağlar.
|
| 107 |
+
|
| 108 |
+
## 6. Başarı Metrikleri (Rapor v4)
|
| 109 |
+
|
| 110 |
+
### 6.1 Model Kalitesi
|
| 111 |
+
- TR Accuracy: `%92`
|
| 112 |
+
- TR Macro F1: `%92`
|
| 113 |
+
- Offensive sınıfı F1: `%92`
|
| 114 |
+
|
| 115 |
+
### 6.2 Performans
|
| 116 |
+
- Tek istek gecikmesi:
|
| 117 |
+
- TR: `~90ms - 240ms`
|
| 118 |
+
- EN: `~54ms - 111ms`
|
| 119 |
+
- Hedef: `<300ms` (karşılanıyor)
|
| 120 |
+
|
| 121 |
+
### 6.3 Stress Test
|
| 122 |
+
- `50` istek, `5` eş zamanlı kullanıcı
|
| 123 |
+
- Ortalama gecikme: `319.69ms`
|
| 124 |
+
- Throughput: `15.01 req/sec`
|
| 125 |
+
- GPU: `RTX 3050 Ti Laptop GPU`
|
| 126 |
+
- VRAM: `687.36MB allocated / 750MB reserved`
|
| 127 |
+
|
| 128 |
+
### 6.4 Gerçek Veri Testi (500 Tweet)
|
| 129 |
+
- Toplam süre: `83 saniye` (`~166ms/satır`)
|
| 130 |
+
- Dağılım:
|
| 131 |
+
- `TEMİZ`: `216` (`%43.2`)
|
| 132 |
+
- `KÜFÜR/PROFANITY`: `169` (`%33.8`)
|
| 133 |
+
- `SALDIRGAN/TOXIC`: `87` (`%17.4`)
|
| 134 |
+
- `İNCELEME GEREKLİ`: `27` (`%5.4`)
|
| 135 |
+
- `SPAM/GİBBERİSH`: `1` (`%0.2`)
|
| 136 |
+
|
| 137 |
+
## 7. API Endpointleri
|
| 138 |
+
- `POST /analyze`
|
| 139 |
+
- Girdi: `{"text": "...", "platform_dil": "tr|en"}`
|
| 140 |
+
- Çıktı: `decision`, `risk_level`, `details`, `latency_ms`
|
| 141 |
+
- `GET /refresh-cache`
|
| 142 |
+
- Supabase kara listesini sistemi durdurmadan RAM'e yeniden yükler.
|
| 143 |
+
- `GET /vram-status`
|
| 144 |
+
- GPU bellek kullanımını döndürür.
|
| 145 |
+
|
| 146 |
+
## 8. Önemli Fonksyonlar
|
| 147 |
+
|
| 148 |
+
### 8.1 `clean_text_nfkc()` ne yapar?
|
| 149 |
+
Dosya: `app/utils/text_utils.py`
|
| 150 |
+
- Mesajdaki karakterleri standartlaştırır.
|
| 151 |
+
- Gizlenmiş küfürleri görünür hale getirir.
|
| 152 |
+
- Örnek: `m.a.l` -> `mal`, `ger1zekalı` -> `gerizekalı`.
|
| 153 |
+
|
| 154 |
+
Kısaca: Kullanıcı metnini “makine için okunabilir ve karşılaştırılabilir” hale getirir.
|
| 155 |
+
|
| 156 |
+
### 8.2 `is_spam()` ne yapar?
|
| 157 |
+
Dosya: `app/utils/text_utils.py`
|
| 158 |
+
- Çok kısa, anlamsız, tekrar eden veya reklam kalıbı içeren metni işaretler.
|
| 159 |
+
- Eğer spam ise pahalı model çağrısını atlar.
|
| 160 |
+
|
| 161 |
+
Kısaca: Sistemin hem hızını artırır hem de gereksiz GPU kullanımını azaltır.
|
| 162 |
+
|
| 163 |
+
### 8.3 `load_blacklist_to_ram()` ne yapar?
|
| 164 |
+
Dosya: `app/services/cache_manager.py`
|
| 165 |
+
- Supabase'deki `blacklist` tablosunu sayfalı şekilde çeker.
|
| 166 |
+
- TR ve EN kelimeleri ayrı sözlüklerde RAM'e alır.
|
| 167 |
+
- `/refresh-cache` çağrısıyla canlı güncellenir.
|
| 168 |
+
|
| 169 |
+
Kısaca: Kara listeyi veritabanından her istekte tekrar okumadan çok hızlı kullanmayı sağlar.
|
| 170 |
+
|
| 171 |
+
### 8.4 `run_moderation()` ne yapar?
|
| 172 |
+
Dosya: `app/services/moderation_service.py`
|
| 173 |
+
- Tüm moderasyon adımlarını sırayla çalıştıran ana fonksiyondur.
|
| 174 |
+
- Temizleme -> spam -> kara liste -> model -> karar akışını yönetir.
|
| 175 |
+
- Sonuçta API'nin döndüğü tüm karar bilgilerini üretir.
|
| 176 |
+
|
| 177 |
+
Kısaca: Bu fonksiyon sistemin beyni gibi çalışır.
|
| 178 |
+
|
| 179 |
+
### 8.5 `calculate_verdict()` ne yapar?
|
| 180 |
+
Dosya: `app/services/moderation_service.py`
|
| 181 |
+
- Kara liste eşleşmeleri ve model skorlarını tek bir karara dönüştürür.
|
| 182 |
+
- Risk seviyesini (`CRITICAL`, `MEDIUM`, `LOW`, `NONE`) belirler.
|
| 183 |
+
- Karşılık gelen aksiyonu (`CENSOR`, `MONITOR`, `ALLOW`) tetikler.
|
| 184 |
+
|
| 185 |
+
Kısaca: Model skorlarını insanlar için anlaşılır moderasyon kararına çevirir.
|
| 186 |
+
|
| 187 |
+
### 8.6 `/analyze` endpoint'i ne yapar?
|
| 188 |
+
Dosya: `app/api/endpoints.py`
|
| 189 |
+
- Dış sistemin çağırdığı ana API kapısıdır.
|
| 190 |
+
- Metni alır, `run_moderation()` ile analiz eder.
|
| 191 |
+
- JSON formatında karar + gecikme bilgisi döndürür.
|
| 192 |
+
|
| 193 |
+
Kısaca: Platform ile moderasyon motoru arasındaki köprüdür.
|
| 194 |
+
|
| 195 |
+
## 9. Kurulum ve Çalıştırma
|
| 196 |
+
|
| 197 |
+
### 9.1 Gereksinimler
|
| 198 |
+
- Python 3.10+
|
| 199 |
+
- CUDA destekli GPU (önerilir, zorunlu değil)
|
| 200 |
+
- Supabase proje bilgileri (`SUPABASE_URL`, `SUPABASE_KEY`)
|
| 201 |
+
|
| 202 |
+
### 9.2 Kurulum
|
| 203 |
+
```bash
|
| 204 |
+
pip install -r requirements.txt
|
| 205 |
+
```
|
| 206 |
+
|
| 207 |
+
### 9.3 API'yi Başlatma
|
| 208 |
+
```bash
|
| 209 |
+
uvicorn main:app --reload
|
| 210 |
+
```
|
| 211 |
+
|
| 212 |
+
### 9.4 Streamlit Paneli Başlatma
|
| 213 |
+
```bash
|
| 214 |
+
streamlit run app.py
|
| 215 |
+
```
|
| 216 |
+
|
| 217 |
+
## 10. Notlar ve İyileştirme Önerileri
|
| 218 |
+
- Bilinen kenar vaka: aşırı tekrar + küfür kombinasyonlarında bazı metinler spam'e düşebilir.
|
| 219 |
+
- Öneri 1: tekrar harf sadeleştirmesini spam kontrolünden önce kesinleştir.
|
| 220 |
+
- Öneri 2: `İNCELEME GEREKLİ` eşiğini veri dağılımına göre yeniden kalibre et.
|
| 221 |
+
- Öneri 3: platformdan gelen etiketli gerçek verilerle periyodik yeniden eğitim yap.
|
| 222 |
+
|
| 223 |
---
|
| 224 |
+
Hazırlanan bu README, `Teknik Araştırma & Geliştirme Raporu v4` içeriğini proje kod yapısıyla birlikte tek dokümanda birleştirir.
|
| 225 |
+
|
| 226 |
+
## 11. Docker ve Hugging Face Spaces Deployment
|
| 227 |
+
|
| 228 |
+
Bu proje FastAPI servisini `main.py` içindeki `app` nesnesi ile başlatır. Docker imajı içinde doğru başlangıç komutu bu nedenle `uvicorn main:app ...` şeklindedir.
|
| 229 |
+
|
| 230 |
+
### 11.1 Yerelde Docker ile test
|
| 231 |
+
```bash
|
| 232 |
+
docker build -t sentinel-api .
|
| 233 |
+
docker run --rm -p 7860:7860 \
|
| 234 |
+
-e SUPABASE_URL="https://YOUR_PROJECT.supabase.co" \
|
| 235 |
+
-e SUPABASE_KEY="YOUR_SUPABASE_KEY" \
|
| 236 |
+
sentinel-api
|
| 237 |
+
```
|
| 238 |
+
|
| 239 |
+
Test isteği:
|
| 240 |
+
```bash
|
| 241 |
+
curl -X POST "http://127.0.0.1:7860/analyze" \
|
| 242 |
+
-H "Content-Type: application/json" \
|
| 243 |
+
-d '{"text":"örnek metin","platform_dil":"tr"}'
|
| 244 |
+
```
|
| 245 |
+
|
| 246 |
+
### 11.2 Hugging Face Spaces adımları
|
| 247 |
+
1. Hugging Face hesabında `New Space` oluştur.
|
| 248 |
+
2. `SDK` olarak `Docker` seç.
|
| 249 |
+
3. Bu repodaki dosyaları Space'e yükle (`Dockerfile`, `requirements.txt`, `app/`, `main.py`, `models_cache/` vb.).
|
| 250 |
+
4. Space ayarlarında `Settings -> Variables and secrets` bölümüne şu secret'ları ekle:
|
| 251 |
+
- `SUPABASE_URL`
|
| 252 |
+
- `SUPABASE_KEY`
|
| 253 |
+
5. Build tamamlandığında servis `7860` portunda ayağa kalkar.
|
| 254 |
|
| 255 |
+
Notlar:
|
| 256 |
+
- Bu Docker kurulumunda `torch` CPU wheel olarak yüklenir (HF Spaces free tier için uygun).
|
| 257 |
+
- `.dockerignore` dosyası, gereksiz yerel dosyaları imaja dahil etmeyerek build süresini ve imaj boyutunu azaltır.
|
app.py
ADDED
|
@@ -0,0 +1,1017 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import io
|
| 2 |
+
import subprocess
|
| 3 |
+
import time
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
|
| 6 |
+
import pandas as pd
|
| 7 |
+
import requests
|
| 8 |
+
import streamlit as st
|
| 9 |
+
|
| 10 |
+
try:
|
| 11 |
+
import psutil
|
| 12 |
+
except ImportError:
|
| 13 |
+
psutil = None
|
| 14 |
+
|
| 15 |
+
st.set_page_config(
|
| 16 |
+
page_title="Sentinel — İçerik Moderasyon",
|
| 17 |
+
layout="wide",
|
| 18 |
+
initial_sidebar_state="expanded",
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
st.markdown(
|
| 22 |
+
"""
|
| 23 |
+
<style>
|
| 24 |
+
@import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;500;600&family=IBM+Plex+Sans:wght@300;400;500;600&display=swap');
|
| 25 |
+
|
| 26 |
+
html, body, [class*="css"] {
|
| 27 |
+
font-family: 'IBM Plex Sans', sans-serif;
|
| 28 |
+
background-color: #0a0e17;
|
| 29 |
+
color: #c9d1e0;
|
| 30 |
+
}
|
| 31 |
+
[data-testid="stSidebar"] {
|
| 32 |
+
background: #0d1220;
|
| 33 |
+
border-right: 1px solid #1e2d45;
|
| 34 |
+
min-width: 300px !important;
|
| 35 |
+
max-width: 300px !important;
|
| 36 |
+
width: 300px !important;
|
| 37 |
+
margin-left: 0 !important;
|
| 38 |
+
transform: translateX(0) !important;
|
| 39 |
+
flex-shrink: 0 !important;
|
| 40 |
+
}
|
| 41 |
+
[data-testid="stSidebar"][aria-expanded="false"] {
|
| 42 |
+
min-width: 300px !important;
|
| 43 |
+
max-width: 300px !important;
|
| 44 |
+
width: 300px !important;
|
| 45 |
+
margin-left: 0 !important;
|
| 46 |
+
transform: translateX(0) !important;
|
| 47 |
+
}
|
| 48 |
+
[data-testid="stSidebar"][aria-expanded="true"] {
|
| 49 |
+
min-width: 300px !important;
|
| 50 |
+
max-width: 300px !important;
|
| 51 |
+
width: 300px !important;
|
| 52 |
+
}
|
| 53 |
+
[data-testid="stSidebarContent"] {
|
| 54 |
+
display: block !important;
|
| 55 |
+
visibility: visible !important;
|
| 56 |
+
opacity: 1 !important;
|
| 57 |
+
}
|
| 58 |
+
[data-testid="stSidebar"] * { color: #8a9bc0 !important; }
|
| 59 |
+
[data-testid="stSidebar"] .stRadio label { color: #c9d1e0 !important; }
|
| 60 |
+
[data-testid="collapsedControl"],
|
| 61 |
+
[data-testid="stSidebarCollapseButton"],
|
| 62 |
+
button[title="Close sidebar"],
|
| 63 |
+
button[title="Open sidebar"] { display: none !important; }
|
| 64 |
+
#MainMenu, footer, header { visibility: hidden; }
|
| 65 |
+
.block-container { padding-top: 1.5rem; padding-bottom: 2rem; }
|
| 66 |
+
|
| 67 |
+
.sentinel-header {
|
| 68 |
+
display: flex; align-items: center; gap: 16px;
|
| 69 |
+
padding: 20px 0 28px 0;
|
| 70 |
+
border-bottom: 1px solid #1e2d45;
|
| 71 |
+
margin-bottom: 28px;
|
| 72 |
+
}
|
| 73 |
+
.sentinel-logo {
|
| 74 |
+
width: 44px; height: 44px;
|
| 75 |
+
background: linear-gradient(135deg, #1a6cf7, #0d3d8e);
|
| 76 |
+
border-radius: 10px;
|
| 77 |
+
display: flex; align-items: center; justify-content: center;
|
| 78 |
+
font-size: 22px;
|
| 79 |
+
}
|
| 80 |
+
.sentinel-title { font-family:'IBM Plex Mono',monospace; font-size:22px; font-weight:600; color:#e8eef8; }
|
| 81 |
+
.sentinel-sub { font-size:12px; color:#6f86ab; font-family:'IBM Plex Mono',monospace; letter-spacing:1px; text-transform:uppercase; }
|
| 82 |
+
.status-pill {
|
| 83 |
+
margin-left:auto; background:#0a1f0e; border:1px solid #1a5c28;
|
| 84 |
+
color:#3ddc5f; font-family:'IBM Plex Mono',monospace;
|
| 85 |
+
font-size:11px; padding:4px 12px; border-radius:20px;
|
| 86 |
+
}
|
| 87 |
+
.status-dot { display:inline-block; width:7px; height:7px; background:#3ddc5f; border-radius:50%; margin-right:6px; animation:pulse 2s infinite; }
|
| 88 |
+
@keyframes pulse { 0%,100%{opacity:1} 50%{opacity:0.3} }
|
| 89 |
+
|
| 90 |
+
.verdict-card { border-radius:12px; padding:24px 28px; margin-bottom:20px; border:1px solid; position:relative; overflow:hidden; }
|
| 91 |
+
.verdict-card::before { content:''; position:absolute; top:0; left:0; width:4px; height:100%; }
|
| 92 |
+
.verdict-TEMIZ { background:#050f07; border-color:#1a4d25; } .verdict-TEMIZ::before { background:#2ea84a; }
|
| 93 |
+
.verdict-KUFUR { background:#0f0c02; border-color:#4d3d08; } .verdict-KUFUR::before { background:#d4a017; }
|
| 94 |
+
.verdict-SALDIRGAN{ background:#0f0c02; border-color:#4d3d08; } .verdict-SALDIRGAN::before{ background:#d4a017; }
|
| 95 |
+
.verdict-TOXIC { background:#0f0c02; border-color:#4d3d08; } .verdict-TOXIC::before { background:#d4a017; }
|
| 96 |
+
.verdict-NEFRET { background:#120a02; border-color:#5c2e0a; } .verdict-NEFRET::before { background:#e07020; }
|
| 97 |
+
.verdict-INCELEME { background:#060a13; border-color:#1a2d5c; } .verdict-INCELEME::before { background:#3a7bd4; }
|
| 98 |
+
.verdict-SPAM { background:#080810; border-color:#2a1a4d; } .verdict-SPAM::before { background:#8030d4; }
|
| 99 |
+
|
| 100 |
+
.verdict-label { font-family:'IBM Plex Mono',monospace; font-size:26px; font-weight:600; margin-bottom:6px; }
|
| 101 |
+
.verdict-reason { font-size:14px; color:#6a7f9a; font-family:'IBM Plex Mono',monospace; }
|
| 102 |
+
|
| 103 |
+
.metric-row { display:flex; gap:12px; margin-bottom:20px; }
|
| 104 |
+
.metric-card { flex:1; background:#0d1220; border:1px solid #1e2d45; border-radius:10px; padding:16px 20px; }
|
| 105 |
+
.metric-label { font-family:'IBM Plex Mono',monospace; font-size:11px; color:#7690b8; text-transform:uppercase; letter-spacing:1px; margin-bottom:8px; }
|
| 106 |
+
.metric-value { font-family:'IBM Plex Mono',monospace; font-size:24px; font-weight:600; color:#e8eef8; }
|
| 107 |
+
.metric-value.low{color:#2ea84a} .metric-value.med{color:#d4a017} .metric-value.high{color:#e03030}
|
| 108 |
+
|
| 109 |
+
.score-row { margin-bottom:14px; }
|
| 110 |
+
.score-label { display:flex; justify-content:space-between; font-family:'IBM Plex Mono',monospace; font-size:12px; color:#8ea7cb; margin-bottom:5px; }
|
| 111 |
+
.score-track { height:5px; background:#1a2535; border-radius:3px; overflow:hidden; }
|
| 112 |
+
.score-fill { height:100%; border-radius:3px; }
|
| 113 |
+
|
| 114 |
+
.stTextArea textarea { background:#0d1220 !important; border:1px solid #1e2d45 !important; border-radius:10px !important; color:#c9d1e0 !important; font-family:'IBM Plex Sans',sans-serif !important; font-size:15px !important; padding:14px !important; }
|
| 115 |
+
.stTextArea textarea:focus { border-color:#1a6cf7 !important; }
|
| 116 |
+
.stButton button { background:#1a6cf7 !important; color:white !important; border:none !important; border-radius:8px !important; font-family:'IBM Plex Sans',sans-serif !important; font-weight:500 !important; font-size:14px !important; padding:10px 24px !important; }
|
| 117 |
+
.stButton button:hover { background:#1557cc !important; }
|
| 118 |
+
.stTabs [data-baseweb="tab-list"] { background:transparent !important; border-bottom:1px solid #1e2d45 !important; }
|
| 119 |
+
.stTabs [data-baseweb="tab"] { background:transparent !important; color:#4a6080 !important; font-family:'IBM Plex Mono',monospace !important; font-size:13px !important; padding:10px 20px !important; border-bottom:2px solid transparent !important; }
|
| 120 |
+
.stTabs [aria-selected="true"] { color:#1a6cf7 !important; border-bottom-color:#1a6cf7 !important; background:transparent !important; }
|
| 121 |
+
[data-testid="stFileUploader"] { background:#0d1220 !important; border:1px dashed #1e2d45 !important; border-radius:10px !important; }
|
| 122 |
+
.stRadio label { background:#111827 !important; border:1px solid #1e2d45 !important; border-radius:8px !important; padding:10px 14px !important; }
|
| 123 |
+
.stRadio label:has(input:checked) { border-color:#1a6cf7 !important; background:#0d1a33 !important; }
|
| 124 |
+
hr { border-color:#1e2d45 !important; }
|
| 125 |
+
.stTextInput input { background:#0d1220 !important; border:1px solid #1e2d45 !important; color:#c9d1e0 !important; border-radius:8px !important; font-family:'IBM Plex Mono',monospace !important; font-size:12px !important; }
|
| 126 |
+
[data-testid="stDataFrame"] { border:1px solid #1e2d45 !important; border-radius:10px !important; overflow:hidden !important; }
|
| 127 |
+
.stProgress > div > div { background:#1a6cf7 !important; }
|
| 128 |
+
|
| 129 |
+
.report-table { width:100%; border-collapse:collapse; font-family:'IBM Plex Mono',monospace; font-size:12px; }
|
| 130 |
+
.report-table th {
|
| 131 |
+
text-align:left; padding:10px 14px;
|
| 132 |
+
color:#4a6080; font-weight:600; font-size:10px;
|
| 133 |
+
letter-spacing:1.2px; text-transform:uppercase;
|
| 134 |
+
background:#0d1220; border-bottom:1px solid #1e2d45;
|
| 135 |
+
position:sticky; top:0; z-index:10;
|
| 136 |
+
}
|
| 137 |
+
.report-table td { padding:10px 14px; border-bottom:1px solid #0f1826; vertical-align:middle; }
|
| 138 |
+
.report-table tr:hover td { background:#0d1525; }
|
| 139 |
+
|
| 140 |
+
.risk-badge {
|
| 141 |
+
display:inline-block; padding:2px 10px; border-radius:12px;
|
| 142 |
+
font-size:10px; font-weight:600; letter-spacing:0.8px;
|
| 143 |
+
font-family:'IBM Plex Mono',monospace;
|
| 144 |
+
}
|
| 145 |
+
.badge-CRITICAL { background:#1f0c0c; color:#e03030; border:1px solid #5c1a1a; }
|
| 146 |
+
.badge-HIGH { background:#1a0e03; color:#e07020; border:1px solid #5c2e0a; }
|
| 147 |
+
.badge-MEDIUM { background:#141002; color:#d4a017; border:1px solid #4d3d08; }
|
| 148 |
+
.badge-LOW { background:#07091a; color:#3a7bd4; border:1px solid #1a2d5c; }
|
| 149 |
+
.badge-NONE { background:#050f07; color:#2ea84a; border:1px solid #1a4d25; }
|
| 150 |
+
|
| 151 |
+
.inline-bar {
|
| 152 |
+
display:inline-block; height:4px; border-radius:2px;
|
| 153 |
+
vertical-align:middle; margin-right:4px;
|
| 154 |
+
}
|
| 155 |
+
.hits-tag {
|
| 156 |
+
display:inline-block; background:#1f0e0e; border:1px solid #5c1a1a;
|
| 157 |
+
color:#e05050; font-size:10px; padding:1px 6px; border-radius:4px; margin:1px;
|
| 158 |
+
}
|
| 159 |
+
.karar-cell { font-weight:600; font-size:11px; }
|
| 160 |
+
.metin-cell { color:#8a9bc0; max-width:280px; overflow:hidden; text-overflow:ellipsis; white-space:nowrap; }
|
| 161 |
+
.skor-cell { color:#6a8cb0; font-size:11px; }
|
| 162 |
+
|
| 163 |
+
.summary-grid { display:grid; grid-template-columns:repeat(auto-fit, minmax(140px, 1fr)); gap:12px; margin-bottom:24px; }
|
| 164 |
+
.summary-card { background:#0d1220; border:1px solid #1e2d45; border-radius:10px; padding:16px; text-align:center; }
|
| 165 |
+
.summary-count { font-family:'IBM Plex Mono',monospace; font-size:36px; font-weight:700; margin-bottom:4px; }
|
| 166 |
+
.summary-label { font-family:'IBM Plex Mono',monospace; font-size:10px; color:#4a6080; text-transform:uppercase; letter-spacing:1px; }
|
| 167 |
+
|
| 168 |
+
.queue-card {
|
| 169 |
+
background:#060a13; border:1px solid #1a2d5c; border-radius:10px;
|
| 170 |
+
padding:16px; margin-bottom:10px;
|
| 171 |
+
display:flex; gap:16px; align-items:flex-start;
|
| 172 |
+
}
|
| 173 |
+
.queue-index { font-family:'IBM Plex Mono',monospace; font-size:11px; color:#2a3d55; min-width:28px; }
|
| 174 |
+
.queue-text { color:#c9d1e0; font-size:13px; line-height:1.5; flex:1; }
|
| 175 |
+
.queue-meta { font-family:'IBM Plex Mono',monospace; font-size:10px; color:#4a6080; margin-top:4px; }
|
| 176 |
+
</style>
|
| 177 |
+
""",
|
| 178 |
+
unsafe_allow_html=True,
|
| 179 |
+
)
|
| 180 |
+
|
| 181 |
+
API_URL = "http://127.0.0.1:8000/analyze"
|
| 182 |
+
|
| 183 |
+
VERDICT_COLORS = {
|
| 184 |
+
"High": "#e03030",
|
| 185 |
+
"Medium": "#d4a017",
|
| 186 |
+
"Low": "#8030d4",
|
| 187 |
+
"None": "#2ea84a",
|
| 188 |
+
"CRITICAL": "#e03030",
|
| 189 |
+
}
|
| 190 |
+
VERDICT_ICONS = {"High": "●", "Medium": "◆", "Low": "▲", "None": "✓", "CRITICAL": "🚨"}
|
| 191 |
+
|
| 192 |
+
if "last_latency_ms" not in st.session_state:
|
| 193 |
+
st.session_state["last_latency_ms"] = None
|
| 194 |
+
if "last_metrics" not in st.session_state:
|
| 195 |
+
st.session_state["last_metrics"] = None
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
def get_gpu_info():
|
| 199 |
+
try:
|
| 200 |
+
result = subprocess.check_output(
|
| 201 |
+
[
|
| 202 |
+
"nvidia-smi",
|
| 203 |
+
"--query-gpu=name,utilization.gpu,temperature.gpu,memory.used,memory.total",
|
| 204 |
+
"--format=csv,noheader,nounits",
|
| 205 |
+
],
|
| 206 |
+
encoding="utf-8",
|
| 207 |
+
stderr=subprocess.STDOUT,
|
| 208 |
+
)
|
| 209 |
+
line = result.strip().splitlines()[0]
|
| 210 |
+
name, util, temp, mem_used, mem_total = [p.strip() for p in line.split(",", maxsplit=4)]
|
| 211 |
+
return {
|
| 212 |
+
"name": name,
|
| 213 |
+
"load": int(float(util)),
|
| 214 |
+
"temp": int(float(temp)),
|
| 215 |
+
"vram_used": int(float(mem_used)),
|
| 216 |
+
"vram_total": int(float(mem_total)),
|
| 217 |
+
}
|
| 218 |
+
except Exception:
|
| 219 |
+
return None
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
def capture_process_metrics():
|
| 223 |
+
gpu_data = get_gpu_info()
|
| 224 |
+
|
| 225 |
+
cpu_val = 0.0
|
| 226 |
+
ram_pct = 0.0
|
| 227 |
+
if psutil is not None:
|
| 228 |
+
cpu_val = psutil.cpu_percent(interval=0.1)
|
| 229 |
+
ram_pct = psutil.virtual_memory().percent
|
| 230 |
+
|
| 231 |
+
return {
|
| 232 |
+
"cpu": round(cpu_val, 1),
|
| 233 |
+
"ram_pct": round(ram_pct, 1),
|
| 234 |
+
"vram_used": str(gpu_data["vram_used"]) if gpu_data else "0",
|
| 235 |
+
"gpu_load": str(gpu_data["load"]) if gpu_data else "0",
|
| 236 |
+
"timestamp": time.strftime("%H:%M:%S"),
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
def verdict_css_class(decision):
|
| 241 |
+
d = decision.upper()
|
| 242 |
+
if "TEMIZ" in d or "CLEAR" in d:
|
| 243 |
+
return "TEMIZ"
|
| 244 |
+
if "NEFRET" in d or "IDENTITY" in d:
|
| 245 |
+
return "NEFRET"
|
| 246 |
+
if "KÜFÜR" in d or "KUFUR" in d or "PROFANITY" in d:
|
| 247 |
+
return "KUFUR"
|
| 248 |
+
if "SALDIRGAN" in d or "TOXIC" in d:
|
| 249 |
+
return "SALDIRGAN"
|
| 250 |
+
if "İNCELEME" in d or "INCELEME" in d or "REVIEW" in d:
|
| 251 |
+
return "INCELEME"
|
| 252 |
+
if "SPAM" in d or "GİBBERİSH" in d:
|
| 253 |
+
return "SPAM"
|
| 254 |
+
return "TEMIZ"
|
| 255 |
+
|
| 256 |
+
|
| 257 |
+
def risk_color(val):
|
| 258 |
+
if val > 0.7:
|
| 259 |
+
return "#e03030"
|
| 260 |
+
if val > 0.4:
|
| 261 |
+
return "#d4a017"
|
| 262 |
+
if val > 0.15:
|
| 263 |
+
return "#f0a020"
|
| 264 |
+
return "#2ea84a"
|
| 265 |
+
|
| 266 |
+
|
| 267 |
+
def score_bar(label, value, color="#1a6cf7"):
|
| 268 |
+
pct = min(max(value * 100, 0), 100)
|
| 269 |
+
return f"""<div class=\"score-row\">
|
| 270 |
+
<div class=\"score-label\"><span>{label}</span><span style=\"color:{color};font-weight:600\">%{pct:.1f}</span></div>
|
| 271 |
+
<div class=\"score-track\"><div class=\"score-fill\" style=\"width:{pct}%;background:{color}\"></div></div>
|
| 272 |
+
</div>"""
|
| 273 |
+
|
| 274 |
+
|
| 275 |
+
def badge_html(risk):
|
| 276 |
+
cls = {
|
| 277 |
+
"CRITICAL": "badge-CRITICAL",
|
| 278 |
+
"HIGH": "badge-HIGH",
|
| 279 |
+
"MEDIUM": "badge-MEDIUM",
|
| 280 |
+
"LOW": "badge-LOW",
|
| 281 |
+
"NONE": "badge-NONE",
|
| 282 |
+
}.get(risk.upper(), "badge-NONE")
|
| 283 |
+
return f'<span class="risk-badge {cls}">{risk}</span>'
|
| 284 |
+
|
| 285 |
+
|
| 286 |
+
def inline_bar_html(value, color):
|
| 287 |
+
w = min(max(value * 60, 0), 60)
|
| 288 |
+
return f'<span class="inline-bar" style="width:{w}px;background:{color}"></span><span style="color:{color};font-size:11px">%{value * 100:.0f}</span>'
|
| 289 |
+
|
| 290 |
+
|
| 291 |
+
def generate_docx_report(res_df, total_time, platform_dil):
|
| 292 |
+
try:
|
| 293 |
+
from docx import Document
|
| 294 |
+
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
| 295 |
+
from docx.oxml import OxmlElement
|
| 296 |
+
from docx.oxml.ns import qn
|
| 297 |
+
from docx.shared import Cm, Pt, RGBColor
|
| 298 |
+
except ImportError:
|
| 299 |
+
return None
|
| 300 |
+
|
| 301 |
+
doc = Document()
|
| 302 |
+
|
| 303 |
+
for section in doc.sections:
|
| 304 |
+
section.top_margin = Cm(1.8)
|
| 305 |
+
section.bottom_margin = Cm(1.8)
|
| 306 |
+
section.left_margin = Cm(2.0)
|
| 307 |
+
section.right_margin = Cm(2.0)
|
| 308 |
+
|
| 309 |
+
def set_cell_bg(cell, hex_color):
|
| 310 |
+
tc = cell._tc
|
| 311 |
+
tc_pr = tc.get_or_add_tcPr()
|
| 312 |
+
shd = OxmlElement("w:shd")
|
| 313 |
+
shd.set(qn("w:val"), "clear")
|
| 314 |
+
shd.set(qn("w:color"), "auto")
|
| 315 |
+
shd.set(qn("w:fill"), hex_color)
|
| 316 |
+
tc_pr.append(shd)
|
| 317 |
+
|
| 318 |
+
def add_run(para, text, bold=False, size=10, color="000000", italic=False):
|
| 319 |
+
run = para.add_run(text)
|
| 320 |
+
run.bold = bold
|
| 321 |
+
run.italic = italic
|
| 322 |
+
run.font.size = Pt(size)
|
| 323 |
+
run.font.color.rgb = RGBColor(int(color[0:2], 16), int(color[2:4], 16), int(color[4:6], 16))
|
| 324 |
+
return run
|
| 325 |
+
|
| 326 |
+
title_para = doc.add_paragraph()
|
| 327 |
+
title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
| 328 |
+
add_run(title_para, "SENTINEL AI - Moderasyon Analiz Raporu", bold=True, size=18, color="1F4E79")
|
| 329 |
+
|
| 330 |
+
sub_para = doc.add_paragraph()
|
| 331 |
+
sub_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
| 332 |
+
ts = datetime.now().strftime("%d.%m.%Y %H:%M")
|
| 333 |
+
add_run(
|
| 334 |
+
sub_para,
|
| 335 |
+
f"Platform: {platform_dil.upper()} | Olusturulma: {ts} | {len(res_df)} kayit | {total_time:.1f}s",
|
| 336 |
+
size=9,
|
| 337 |
+
color="888888",
|
| 338 |
+
)
|
| 339 |
+
|
| 340 |
+
doc.add_paragraph()
|
| 341 |
+
|
| 342 |
+
counts = res_df["Karar"].value_counts()
|
| 343 |
+
sum_para = doc.add_paragraph()
|
| 344 |
+
add_run(sum_para, "OZET", bold=True, size=11, color="1F4E79")
|
| 345 |
+
|
| 346 |
+
sum_tbl = doc.add_table(rows=1, cols=len(counts) + 1)
|
| 347 |
+
sum_tbl.style = "Table Grid"
|
| 348 |
+
hdr = sum_tbl.rows[0].cells
|
| 349 |
+
set_cell_bg(hdr[0], "1F4E79")
|
| 350 |
+
p = hdr[0].paragraphs[0]
|
| 351 |
+
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
| 352 |
+
add_run(p, "Metrik", bold=True, size=9, color="FFFFFF")
|
| 353 |
+
|
| 354 |
+
karar_colors = {
|
| 355 |
+
"TEMIZ": "2EA84A",
|
| 356 |
+
"KÜFÜR": "D4A017",
|
| 357 |
+
"KUFUR": "D4A017",
|
| 358 |
+
"PROFANITY": "D4A017",
|
| 359 |
+
"SALDIRGAN": "D4A017",
|
| 360 |
+
"TOXIC": "D4A017",
|
| 361 |
+
"NEFRET": "E07020",
|
| 362 |
+
"INCELEME": "3A7BD4",
|
| 363 |
+
"SPAM": "8030D4",
|
| 364 |
+
"GIBBERISH": "8030D4",
|
| 365 |
+
}
|
| 366 |
+
for i, (karar, cnt) in enumerate(counts.items()):
|
| 367 |
+
cell = hdr[i + 1]
|
| 368 |
+
set_cell_bg(cell, "0D1220")
|
| 369 |
+
p2 = cell.paragraphs[0]
|
| 370 |
+
p2.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
| 371 |
+
c = next((v for k, v in karar_colors.items() if k in karar.upper()), "888888")
|
| 372 |
+
add_run(p2, f"{cnt}", bold=True, size=14, color=c)
|
| 373 |
+
p3 = cell.add_paragraph()
|
| 374 |
+
p3.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
| 375 |
+
add_run(p3, karar[:16], size=7, color="888888")
|
| 376 |
+
|
| 377 |
+
doc.add_paragraph()
|
| 378 |
+
|
| 379 |
+
detail_para = doc.add_paragraph()
|
| 380 |
+
add_run(detail_para, "DETAYLI ANALIZ SONUCLARI", bold=True, size=11, color="1F4E79")
|
| 381 |
+
|
| 382 |
+
cols = ["#", "Metin", "Normalize", "Karar", "Risk", "Saldirganlik", "Nefret", "Tehdit", "Hits"]
|
| 383 |
+
tbl = doc.add_table(rows=1, cols=len(cols))
|
| 384 |
+
tbl.style = "Table Grid"
|
| 385 |
+
|
| 386 |
+
for i, col_name in enumerate(cols):
|
| 387 |
+
cell = tbl.rows[0].cells[i]
|
| 388 |
+
set_cell_bg(cell, "1F4E79")
|
| 389 |
+
p = cell.paragraphs[0]
|
| 390 |
+
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
| 391 |
+
add_run(p, col_name, bold=True, size=8, color="FFFFFF")
|
| 392 |
+
|
| 393 |
+
for idx, row in res_df.iterrows():
|
| 394 |
+
tr = tbl.add_row()
|
| 395 |
+
cells = tr.cells
|
| 396 |
+
|
| 397 |
+
risk_str = str(row.get("Risk", "")).upper()
|
| 398 |
+
row_colors = {
|
| 399 |
+
"CRITICAL": "1F0C0C",
|
| 400 |
+
"HIGH": "1A0E03",
|
| 401 |
+
"MEDIUM": "141002",
|
| 402 |
+
"LOW": "07091A",
|
| 403 |
+
"NONE": "050F07",
|
| 404 |
+
}
|
| 405 |
+
row_fill = row_colors.get(risk_str, "0D1220")
|
| 406 |
+
|
| 407 |
+
set_cell_bg(cells[0], row_fill)
|
| 408 |
+
p = cells[0].paragraphs[0]
|
| 409 |
+
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
| 410 |
+
add_run(p, str(idx + 1), size=8, color="4A6080")
|
| 411 |
+
|
| 412 |
+
set_cell_bg(cells[1], row_fill)
|
| 413 |
+
p = cells[1].paragraphs[0]
|
| 414 |
+
add_run(p, str(row.get("Metin", ""))[:120], size=8, color="C9D1E0")
|
| 415 |
+
|
| 416 |
+
set_cell_bg(cells[2], row_fill)
|
| 417 |
+
p = cells[2].paragraphs[0]
|
| 418 |
+
add_run(p, str(row.get("Normalize", ""))[:60], size=7, color="6A8CB0", italic=True)
|
| 419 |
+
|
| 420 |
+
set_cell_bg(cells[3], row_fill)
|
| 421 |
+
p = cells[3].paragraphs[0]
|
| 422 |
+
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
| 423 |
+
karar = str(row.get("Karar", ""))
|
| 424 |
+
c = next((v for k, v in karar_colors.items() if k in karar.upper()), "888888")
|
| 425 |
+
add_run(p, karar[:20], bold=True, size=8, color=c)
|
| 426 |
+
|
| 427 |
+
set_cell_bg(cells[4], row_fill)
|
| 428 |
+
p = cells[4].paragraphs[0]
|
| 429 |
+
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
| 430 |
+
risk_colors = {
|
| 431 |
+
"CRITICAL": "E03030",
|
| 432 |
+
"HIGH": "E07020",
|
| 433 |
+
"MEDIUM": "D4A017",
|
| 434 |
+
"LOW": "3A7BD4",
|
| 435 |
+
"NONE": "2EA84A",
|
| 436 |
+
}
|
| 437 |
+
rc = risk_colors.get(risk_str, "888888")
|
| 438 |
+
add_run(p, risk_str, bold=True, size=8, color=rc)
|
| 439 |
+
|
| 440 |
+
for col_i, field in [(5, "Saldırganlık"), (6, "Nefret"), (7, "Tehdit")]:
|
| 441 |
+
set_cell_bg(cells[col_i], row_fill)
|
| 442 |
+
p = cells[col_i].paragraphs[0]
|
| 443 |
+
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
| 444 |
+
score = float(row.get(field, 0.0))
|
| 445 |
+
add_run(p, f"%{score * 100:.1f}", size=8, color=risk_color(score).replace("#", ""))
|
| 446 |
+
|
| 447 |
+
set_cell_bg(cells[8], row_fill)
|
| 448 |
+
p = cells[8].paragraphs[0]
|
| 449 |
+
hits = str(row.get("Hits", "")).strip("[]'\"")
|
| 450 |
+
add_run(p, hits if hits else "-", size=7, color="E05050" if hits else "2A3D55")
|
| 451 |
+
|
| 452 |
+
widths_cm = [0.7, 4.5, 3.0, 2.8, 1.5, 1.4, 1.4, 1.4, 2.0]
|
| 453 |
+
for i, w in enumerate(widths_cm):
|
| 454 |
+
for row in tbl.rows:
|
| 455 |
+
row.cells[i].width = Cm(w)
|
| 456 |
+
|
| 457 |
+
doc.add_paragraph()
|
| 458 |
+
|
| 459 |
+
inceleme = res_df[res_df["Karar"].str.contains("İNCELEME|INCELEME|REVIEW", na=False)]
|
| 460 |
+
if len(inceleme):
|
| 461 |
+
q_para = doc.add_paragraph()
|
| 462 |
+
add_run(q_para, f"INCELEME KUYRUGU - {len(inceleme)} Icerik", bold=True, size=11, color="3A7BD4")
|
| 463 |
+
|
| 464 |
+
for _, row in inceleme.iterrows():
|
| 465 |
+
q_tbl = doc.add_table(rows=1, cols=1)
|
| 466 |
+
q_tbl.style = "Table Grid"
|
| 467 |
+
cell = q_tbl.rows[0].cells[0]
|
| 468 |
+
set_cell_bg(cell, "060A13")
|
| 469 |
+
p = cell.paragraphs[0]
|
| 470 |
+
add_run(p, str(row.get("Metin", ""))[:200], size=9, color="C9D1E0")
|
| 471 |
+
p2 = cell.add_paragraph()
|
| 472 |
+
add_run(
|
| 473 |
+
p2,
|
| 474 |
+
f"Risk: {row.get('Risk', '')} | Saldirganlik: %{float(row.get('Saldırganlık', 0)) * 100:.0f} | {row.get('Gerekçe', '')}",
|
| 475 |
+
size=8,
|
| 476 |
+
color="4A6080",
|
| 477 |
+
italic=True,
|
| 478 |
+
)
|
| 479 |
+
|
| 480 |
+
doc.add_paragraph()
|
| 481 |
+
footer_p = doc.add_paragraph()
|
| 482 |
+
footer_p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
| 483 |
+
add_run(footer_p, "Sentinel AI - Dahili Kullanim - " + datetime.now().strftime("%Y"), size=8, color="2A3D55")
|
| 484 |
+
|
| 485 |
+
buf = io.BytesIO()
|
| 486 |
+
doc.save(buf)
|
| 487 |
+
buf.seek(0)
|
| 488 |
+
return buf
|
| 489 |
+
|
| 490 |
+
|
| 491 |
+
st.markdown(
|
| 492 |
+
"""
|
| 493 |
+
<div class="sentinel-header">
|
| 494 |
+
<div class="sentinel-logo">⬡</div>
|
| 495 |
+
<div>
|
| 496 |
+
<div class="sentinel-title">Sentinel</div>
|
| 497 |
+
<div class="sentinel-sub">İçerik Moderasyon Sistemi</div>
|
| 498 |
+
</div>
|
| 499 |
+
<div class="status-pill"><span class="status-dot"></span>ONLINE</div>
|
| 500 |
+
</div>
|
| 501 |
+
""",
|
| 502 |
+
unsafe_allow_html=True,
|
| 503 |
+
)
|
| 504 |
+
|
| 505 |
+
with st.sidebar:
|
| 506 |
+
st.markdown(
|
| 507 |
+
"""<div style="padding:8px 0 20px 0; border-bottom:1px solid #1e2d45; margin-bottom:20px;">
|
| 508 |
+
<div style="font-family:'IBM Plex Mono',monospace; font-size:11px; color:#4a6080; letter-spacing:1.5px; text-transform:uppercase; margin-bottom:16px;">Sistem Konfigürasyonu</div>
|
| 509 |
+
</div>""",
|
| 510 |
+
unsafe_allow_html=True,
|
| 511 |
+
)
|
| 512 |
+
|
| 513 |
+
st.markdown(
|
| 514 |
+
"""<div style="font-family:'IBM Plex Mono',monospace; font-size:11px; color:#4a6080; text-transform:uppercase; letter-spacing:1px; margin-bottom:10px;">Platform Dili</div>""",
|
| 515 |
+
unsafe_allow_html=True,
|
| 516 |
+
)
|
| 517 |
+
platform_dil = st.radio(
|
| 518 |
+
"Platform dili",
|
| 519 |
+
["tr", "en"],
|
| 520 |
+
format_func=lambda x: "Türkçe · TR Pipeline" if x == "tr" else "English · EN Pipeline",
|
| 521 |
+
label_visibility="collapsed",
|
| 522 |
+
)
|
| 523 |
+
|
| 524 |
+
st.markdown("<br>", unsafe_allow_html=True)
|
| 525 |
+
st.markdown(
|
| 526 |
+
"""<div style="font-family:'IBM Plex Mono',monospace; font-size:11px; color:#4a6080; text-transform:uppercase; letter-spacing:1px; margin-bottom:10px;">API Endpoint</div>""",
|
| 527 |
+
unsafe_allow_html=True,
|
| 528 |
+
)
|
| 529 |
+
api_url = st.text_input("API", value=API_URL, label_visibility="collapsed")
|
| 530 |
+
|
| 531 |
+
st.markdown("<br><br>", unsafe_allow_html=True)
|
| 532 |
+
st.markdown(
|
| 533 |
+
"""<div style="font-family:'IBM Plex Mono',monospace; font-size:11px; color:#2a3d55; line-height:1.8;">
|
| 534 |
+
TR PIPELINE<br><span style="color:#4a6289">──────────────</span><br>
|
| 535 |
+
<span style="color:#6f8fbf">▸</span> is_spam() evrensel filtre<br>
|
| 536 |
+
<span style="color:#6f8fbf">▸</span> Küfür listesi lookup<br>
|
| 537 |
+
<span style="color:#6f8fbf">▸</span> BERTurk offensive 42K<br>
|
| 538 |
+
<span style="color:#6f8fbf">▸</span> Detoxify multilingual<br><br>
|
| 539 |
+
EN PIPELINE<br><span style="color:#4a6289">──────────────</span><br>
|
| 540 |
+
<span style="color:#6f8fbf">▸</span> is_spam() evrensel filtre<br>
|
| 541 |
+
<span style="color:#6f8fbf">▸</span> Gibberish Detector<br>
|
| 542 |
+
<span style="color:#6f8fbf">▸</span> Detoxify original 6-label
|
| 543 |
+
</div>""",
|
| 544 |
+
unsafe_allow_html=True,
|
| 545 |
+
)
|
| 546 |
+
|
| 547 |
+
st.markdown("---")
|
| 548 |
+
st.markdown("### 🖥️ Sistem Monitörü")
|
| 549 |
+
|
| 550 |
+
if psutil is None:
|
| 551 |
+
st.warning("psutil yüklü değil. Kurulum: pip install psutil")
|
| 552 |
+
else:
|
| 553 |
+
cpu_load = psutil.cpu_percent(interval=0.2)
|
| 554 |
+
ram = psutil.virtual_memory()
|
| 555 |
+
ram_used_gb = ram.used / (1024**3)
|
| 556 |
+
|
| 557 |
+
col1, col2 = st.columns(2)
|
| 558 |
+
col1.metric("CPU Yükü", f"%{cpu_load:.0f}")
|
| 559 |
+
col2.metric("RAM", f"{ram_used_gb:.1f} GB", f"%{ram.percent:.0f}", delta_color="inverse")
|
| 560 |
+
|
| 561 |
+
gpu = get_gpu_info()
|
| 562 |
+
if gpu:
|
| 563 |
+
st.markdown(f"**GPU:** {gpu['name']}")
|
| 564 |
+
col3, col4 = st.columns(2)
|
| 565 |
+
col3.metric("GPU Yükü", f"%{gpu['load']}")
|
| 566 |
+
col4.metric("GPU Isı", f"{gpu['temp']}°C")
|
| 567 |
+
|
| 568 |
+
vram_pct = 0.0
|
| 569 |
+
if gpu["vram_total"] > 0:
|
| 570 |
+
vram_pct = min(max(gpu["vram_used"] / gpu["vram_total"], 0.0), 1.0)
|
| 571 |
+
st.write(f"VRAM: {gpu['vram_used']}MB / {gpu['vram_total']}MB")
|
| 572 |
+
st.progress(vram_pct)
|
| 573 |
+
else:
|
| 574 |
+
st.warning("GPU bilgisi alınamadı (nvidia-smi erişimi yok).")
|
| 575 |
+
|
| 576 |
+
st.markdown("---")
|
| 577 |
+
live_latency = st.session_state.get("last_latency_ms")
|
| 578 |
+
if live_latency is None:
|
| 579 |
+
st.info("🚀 **Model Latency:** N/A\n\n🛡️ **Sentinel v2.9 Active**")
|
| 580 |
+
else:
|
| 581 |
+
st.info(f"🚀 **Model Latency:** ~{live_latency:.0f}ms/req\n\n🛡️ **Sentinel v2.9 Active**")
|
| 582 |
+
|
| 583 |
+
st.markdown("---")
|
| 584 |
+
if st.session_state.get("last_metrics"):
|
| 585 |
+
m = st.session_state["last_metrics"]
|
| 586 |
+
st.markdown("### ⚡ Son İşlem Performansı")
|
| 587 |
+
st.caption(f"Saat: {m['timestamp']} (İstek anındaki veriler)")
|
| 588 |
+
|
| 589 |
+
col5, col6 = st.columns(2)
|
| 590 |
+
col5.metric("İşlem CPU", f"%{m['cpu']}")
|
| 591 |
+
col6.metric("İşlem RAM", f"%{m['ram_pct']}")
|
| 592 |
+
|
| 593 |
+
col7, col8 = st.columns(2)
|
| 594 |
+
col7.metric("GPU Yükü", f"%{m['gpu_load']}")
|
| 595 |
+
col8.metric("VRAM", f"{m['vram_used']} MB")
|
| 596 |
+
|
| 597 |
+
st.success("Analiz işlemi için performans verisi kaydedildi.")
|
| 598 |
+
else:
|
| 599 |
+
st.info("Performans verisi için analiz başlatın.")
|
| 600 |
+
|
| 601 |
+
|
| 602 |
+
tab1, tab2 = st.tabs([" Tek Metin Analizi ", " Toplu Analiz "])
|
| 603 |
+
|
| 604 |
+
with tab1:
|
| 605 |
+
st.markdown("<br>", unsafe_allow_html=True)
|
| 606 |
+
user_input = st.text_area(
|
| 607 |
+
"Analiz metni",
|
| 608 |
+
height=120,
|
| 609 |
+
placeholder="Analiz edilecek metni buraya yazın...",
|
| 610 |
+
label_visibility="collapsed",
|
| 611 |
+
)
|
| 612 |
+
col_btn, col_info = st.columns([2, 5])
|
| 613 |
+
with col_btn:
|
| 614 |
+
analyze_btn = st.button("Analiz Et", use_container_width=True)
|
| 615 |
+
with col_info:
|
| 616 |
+
st.markdown(
|
| 617 |
+
"""<div style="padding:10px 0; font-family:'IBM Plex Mono',monospace; font-size:11px; color:#8ea7cb; line-height:1.8;">Spam → Dil → Küfür → Model → Karar</div>""",
|
| 618 |
+
unsafe_allow_html=True,
|
| 619 |
+
)
|
| 620 |
+
|
| 621 |
+
if analyze_btn:
|
| 622 |
+
if not user_input.strip():
|
| 623 |
+
st.warning("Analiz için metin gerekli.")
|
| 624 |
+
else:
|
| 625 |
+
with st.spinner(""):
|
| 626 |
+
try:
|
| 627 |
+
t0 = time.time()
|
| 628 |
+
resp = requests.post(api_url, json={"text": user_input, "platform_dil": platform_dil}, timeout=30)
|
| 629 |
+
st.session_state["last_metrics"] = capture_process_metrics()
|
| 630 |
+
elapsed = (time.time() - t0) * 1000
|
| 631 |
+
except requests.RequestException as e:
|
| 632 |
+
st.error(f"API bağlantı hatası: {e}")
|
| 633 |
+
st.stop()
|
| 634 |
+
|
| 635 |
+
if resp.status_code != 200:
|
| 636 |
+
st.error(f"API {resp.status_code} döndü.")
|
| 637 |
+
st.stop()
|
| 638 |
+
|
| 639 |
+
r = resp.json()
|
| 640 |
+
decision = r.get("decision", "—")
|
| 641 |
+
reason = r.get("reason", "—")
|
| 642 |
+
risk = r.get("risk_level", "None")
|
| 643 |
+
lang = r.get("language", platform_dil).upper()
|
| 644 |
+
cleaned = r.get("cleaned_text", "")
|
| 645 |
+
details = r.get("details", {})
|
| 646 |
+
latency = r.get("latency_ms", round(elapsed, 1))
|
| 647 |
+
st.session_state["last_latency_ms"] = float(latency)
|
| 648 |
+
backend_perf = r.get("performance")
|
| 649 |
+
if isinstance(backend_perf, dict):
|
| 650 |
+
st.session_state["last_metrics"] = {
|
| 651 |
+
"cpu": backend_perf.get("cpu", 0),
|
| 652 |
+
"ram_pct": backend_perf.get("ram_pct", 0),
|
| 653 |
+
"vram_used": str(backend_perf.get("vram_used", 0)),
|
| 654 |
+
"gpu_load": str(backend_perf.get("gpu_load", 0)),
|
| 655 |
+
"timestamp": backend_perf.get("timestamp", time.strftime("%H:%M:%S")),
|
| 656 |
+
}
|
| 657 |
+
vcls = verdict_css_class(decision)
|
| 658 |
+
vcolor = VERDICT_COLORS.get(risk, "#2ea84a")
|
| 659 |
+
vicon = VERDICT_ICONS.get(risk, "✓")
|
| 660 |
+
|
| 661 |
+
st.markdown(
|
| 662 |
+
f"""<div class="verdict-card verdict-{vcls}">
|
| 663 |
+
<div class="verdict-label" style="color:{vcolor}">{vicon} {decision}
|
| 664 |
+
<span style="font-size:14px;color:#2a3d55;margin-left:12px;">[{lang}]</span>
|
| 665 |
+
</div>
|
| 666 |
+
<div class="verdict-reason">{reason}</div>
|
| 667 |
+
</div>""",
|
| 668 |
+
unsafe_allow_html=True,
|
| 669 |
+
)
|
| 670 |
+
|
| 671 |
+
lat_class = "low" if latency < 200 else ("med" if latency < 500 else "high")
|
| 672 |
+
risk_class = {"High": "high", "Medium": "med", "Low": "med", "None": "low", "CRITICAL": "high"}.get(risk, "low")
|
| 673 |
+
st.markdown(
|
| 674 |
+
f"""<div class="metric-row">
|
| 675 |
+
<div class="metric-card"><div class="metric-label">Risk Seviyesi</div><div class="metric-value {risk_class}">{risk}</div></div>
|
| 676 |
+
<div class="metric-card"><div class="metric-label">Gecikme</div><div class="metric-value {lat_class}">{latency:.0f} ms</div></div>
|
| 677 |
+
<div class="metric-card"><div class="metric-label">Pipeline</div><div class="metric-value" style="font-size:18px;">{lang}</div></div>
|
| 678 |
+
<div class="metric-card" style="flex:2"><div class="metric-label">Normalize Edilen Metin</div>
|
| 679 |
+
<div style="font-family:'IBM Plex Mono',monospace;font-size:13px;color:#6a8cb0;margin-top:6px;word-break:break-all;">{cleaned}</div>
|
| 680 |
+
</div>
|
| 681 |
+
</div>""",
|
| 682 |
+
unsafe_allow_html=True,
|
| 683 |
+
)
|
| 684 |
+
|
| 685 |
+
hits = details.get("hits", []) or []
|
| 686 |
+
insult_hits = details.get("insult_hits", []) or []
|
| 687 |
+
if hits or insult_hits:
|
| 688 |
+
tags = "".join(f'<span class="hits-tag">⚡ {h}</span>' for h in hits)
|
| 689 |
+
tags += "".join(
|
| 690 |
+
f'<span class="hits-tag" style="color:#d4a017;border-color:#5c3d08;background:#1a1002">⚠ {h}</span>'
|
| 691 |
+
for h in insult_hits
|
| 692 |
+
)
|
| 693 |
+
st.markdown(
|
| 694 |
+
f"""<div style="margin-bottom:16px;">
|
| 695 |
+
<div style="font-family:'IBM Plex Mono',monospace;font-size:11px;color:#4a6080;text-transform:uppercase;letter-spacing:1px;margin-bottom:8px;">Kara Liste Eşleşmeleri</div>
|
| 696 |
+
{tags}
|
| 697 |
+
</div>""",
|
| 698 |
+
unsafe_allow_html=True,
|
| 699 |
+
)
|
| 700 |
+
|
| 701 |
+
col_scores, col_models = st.columns([1, 1.2])
|
| 702 |
+
with col_scores:
|
| 703 |
+
st.markdown(
|
| 704 |
+
"""<div style="font-family:'IBM Plex Mono',monospace;font-size:11px;color:#4a6080;text-transform:uppercase;letter-spacing:1px;margin-bottom:14px;">Sinyal Analizi</div>""",
|
| 705 |
+
unsafe_allow_html=True,
|
| 706 |
+
)
|
| 707 |
+
bars = ""
|
| 708 |
+
if lang == "TR":
|
| 709 |
+
off = details.get("off_score", 0.0)
|
| 710 |
+
ia = details.get("detox", {}).get("identity_attack", 0.0)
|
| 711 |
+
thr = details.get("threat", 0.0)
|
| 712 |
+
bars += score_bar("Saldırganlık", off, risk_color(off))
|
| 713 |
+
bars += score_bar("Nefret (identity_attack)", ia, risk_color(ia))
|
| 714 |
+
bars += score_bar("Tehdit", thr, risk_color(thr))
|
| 715 |
+
else:
|
| 716 |
+
dtx = details.get("detox", {})
|
| 717 |
+
for key, lbl in [
|
| 718 |
+
("toxicity", "Toxicity"),
|
| 719 |
+
("threat", "Threat"),
|
| 720 |
+
("insult", "Insult"),
|
| 721 |
+
("identity_attack", "Identity Attack"),
|
| 722 |
+
("severe_toxicity", "Severe Toxicity"),
|
| 723 |
+
("obscene", "Obscene"),
|
| 724 |
+
]:
|
| 725 |
+
v = dtx.get(key, 0.0)
|
| 726 |
+
bars += score_bar(lbl, v, risk_color(v))
|
| 727 |
+
st.markdown(bars, unsafe_allow_html=True)
|
| 728 |
+
|
| 729 |
+
with col_models:
|
| 730 |
+
st.markdown(
|
| 731 |
+
"""<div style="font-family:'IBM Plex Mono',monospace;font-size:11px;color:#4a6080;text-transform:uppercase;letter-spacing:1px;margin-bottom:14px;">Model Kaynak Analizi (Source)</div>""",
|
| 732 |
+
unsafe_allow_html=True,
|
| 733 |
+
)
|
| 734 |
+
rows_html = ""
|
| 735 |
+
if lang == "TR":
|
| 736 |
+
m_list = [
|
| 737 |
+
("BERTurk Offensive", "N/A", details.get("off_score", 0.0)),
|
| 738 |
+
("Detoxify (TR)", "Analyzed", details.get("detox", {}).get("toxicity", 0.0)),
|
| 739 |
+
]
|
| 740 |
+
else:
|
| 741 |
+
m_list = [
|
| 742 |
+
("Detoxify (Original)", "Analyzed", details.get("detox", {}).get("toxicity", 0.0)),
|
| 743 |
+
(
|
| 744 |
+
"Gibberish Detector",
|
| 745 |
+
details.get("gibberish_label", "N/A"),
|
| 746 |
+
details.get("gibberish_score", 0.0) or 0.0,
|
| 747 |
+
),
|
| 748 |
+
]
|
| 749 |
+
for m_name, m_dec, m_score in m_list:
|
| 750 |
+
try:
|
| 751 |
+
m_score = float(m_score)
|
| 752 |
+
except (TypeError, ValueError):
|
| 753 |
+
m_score = 0.0
|
| 754 |
+
c = risk_color(m_score)
|
| 755 |
+
rows_html += f"""<div style="background:#0d1220;border:1px solid #1e2d45;border-radius:8px;padding:10px;margin-bottom:8px;">
|
| 756 |
+
<div style="display:flex;justify-content:space-between;align-items:center;gap:10px;">
|
| 757 |
+
<span style="font-size:12px;font-weight:600;color:#e8eef8;">{m_name}</span>
|
| 758 |
+
<span style="font-size:10px;color:{c};background:{c}22;padding:2px 8px;border-radius:4px;border:1px solid {c}44;white-space:nowrap;">
|
| 759 |
+
{m_dec} (%{m_score * 100:.1f})
|
| 760 |
+
</span>
|
| 761 |
+
</div>
|
| 762 |
+
</div>"""
|
| 763 |
+
st.markdown(rows_html, unsafe_allow_html=True)
|
| 764 |
+
|
| 765 |
+
with tab2:
|
| 766 |
+
st.markdown("<br>", unsafe_allow_html=True)
|
| 767 |
+
st.markdown(
|
| 768 |
+
"""<div style="font-family:'IBM Plex Mono',monospace;font-size:11px;color:#4a6080;text-transform:uppercase;letter-spacing:1px;margin-bottom:16px;">Veri Seti Yükle</div>""",
|
| 769 |
+
unsafe_allow_html=True,
|
| 770 |
+
)
|
| 771 |
+
|
| 772 |
+
uploaded = st.file_uploader("Dosya", type=["csv", "xlsx"], label_visibility="collapsed")
|
| 773 |
+
|
| 774 |
+
if uploaded:
|
| 775 |
+
df = pd.read_csv(uploaded) if uploaded.name.endswith(".csv") else pd.read_excel(uploaded)
|
| 776 |
+
if len(df) == 0:
|
| 777 |
+
st.warning("Dosya boş.")
|
| 778 |
+
st.stop()
|
| 779 |
+
|
| 780 |
+
st.markdown(
|
| 781 |
+
f"""<div style="font-family:'IBM Plex Mono',monospace;font-size:12px;color:#4a6080;margin-bottom:16px;">{len(df)} satır yüklendi</div>""",
|
| 782 |
+
unsafe_allow_html=True,
|
| 783 |
+
)
|
| 784 |
+
col_name = st.selectbox("Analiz sütunu:", df.columns)
|
| 785 |
+
|
| 786 |
+
if st.button("Toplu Analizi Başlat", use_container_width=False):
|
| 787 |
+
progress = st.progress(0)
|
| 788 |
+
status_text = st.empty()
|
| 789 |
+
results = []
|
| 790 |
+
t0 = time.time()
|
| 791 |
+
|
| 792 |
+
for i, text in enumerate(df[col_name]):
|
| 793 |
+
try:
|
| 794 |
+
resp = requests.post(api_url, json={"text": str(text), "platform_dil": platform_dil}, timeout=30)
|
| 795 |
+
r = resp.json() if resp.status_code == 200 else {}
|
| 796 |
+
except Exception:
|
| 797 |
+
r = {}
|
| 798 |
+
|
| 799 |
+
details = r.get("details", {})
|
| 800 |
+
hits_all = list(details.get("hits", []) or []) + list(details.get("insult_hits", []) or [])
|
| 801 |
+
results.append(
|
| 802 |
+
{
|
| 803 |
+
"Metin": str(text),
|
| 804 |
+
"Normalize": r.get("cleaned_text", ""),
|
| 805 |
+
"Dil": r.get("language", "—").upper(),
|
| 806 |
+
"Karar": r.get("decision", "—"),
|
| 807 |
+
"Risk": r.get("risk_level", "—"),
|
| 808 |
+
"Gerekçe": r.get("reason", "—"),
|
| 809 |
+
"Saldırganlık": round(float(details.get("off_score", 0.0)), 4),
|
| 810 |
+
"Nefret": round(float(details.get("detox", {}).get("identity_attack", 0.0)), 4),
|
| 811 |
+
"Tehdit": round(float(details.get("threat", details.get("detox", {}).get("threat", 0.0))), 4),
|
| 812 |
+
"Hits": ", ".join(hits_all) if hits_all else "",
|
| 813 |
+
}
|
| 814 |
+
)
|
| 815 |
+
progress.progress((i + 1) / len(df))
|
| 816 |
+
status_text.markdown(
|
| 817 |
+
f"""<span style="font-family:'IBM Plex Mono',monospace;font-size:12px;color:#4a6080;">{i + 1} / {len(df)} işlendi</span>""",
|
| 818 |
+
unsafe_allow_html=True,
|
| 819 |
+
)
|
| 820 |
+
|
| 821 |
+
elapsed = time.time() - t0
|
| 822 |
+
res_df = pd.DataFrame(results)
|
| 823 |
+
if len(df) > 0:
|
| 824 |
+
st.session_state["last_latency_ms"] = (elapsed * 1000.0) / len(df)
|
| 825 |
+
status_text.empty()
|
| 826 |
+
progress.empty()
|
| 827 |
+
|
| 828 |
+
st.markdown(
|
| 829 |
+
f"""<div style="font-family:'IBM Plex Mono',monospace;font-size:12px;color:#2ea84a;margin:12px 0;">
|
| 830 |
+
{len(df)} satır {elapsed:.1f}s içinde analiz edildi</div>""",
|
| 831 |
+
unsafe_allow_html=True,
|
| 832 |
+
)
|
| 833 |
+
|
| 834 |
+
counts = res_df["Karar"].value_counts()
|
| 835 |
+
karar_colors_ui = {
|
| 836 |
+
"TEMIZ": "#2ea84a",
|
| 837 |
+
"CLEAR": "#2ea84a",
|
| 838 |
+
"KÜFÜR": "#d4a017",
|
| 839 |
+
"KUFUR": "#d4a017",
|
| 840 |
+
"PROFANITY": "#d4a017",
|
| 841 |
+
"SALDIRGAN": "#d4a017",
|
| 842 |
+
"TOXIC": "#d4a017",
|
| 843 |
+
"NEFRET": "#e07020",
|
| 844 |
+
"IDENTITY": "#e07020",
|
| 845 |
+
"İNCELEME": "#3a7bd4",
|
| 846 |
+
"INCELEME": "#3a7bd4",
|
| 847 |
+
"REVIEW": "#3a7bd4",
|
| 848 |
+
"SPAM": "#8030d4",
|
| 849 |
+
"GİBBERİSH": "#8030d4",
|
| 850 |
+
}
|
| 851 |
+
cols_summary = st.columns(min(len(counts), 6))
|
| 852 |
+
for i, (karar, cnt) in enumerate(counts.items()):
|
| 853 |
+
if i < 6:
|
| 854 |
+
vc = next((v for k, v in karar_colors_ui.items() if k in karar.upper()), "#888888")
|
| 855 |
+
with cols_summary[i]:
|
| 856 |
+
st.markdown(
|
| 857 |
+
f"""<div class="metric-card" style="text-align:center;">
|
| 858 |
+
<div class="summary-count" style="color:{vc}">{cnt}</div>
|
| 859 |
+
<div class="summary-label">{karar[:18]}</div>
|
| 860 |
+
</div>""",
|
| 861 |
+
unsafe_allow_html=True,
|
| 862 |
+
)
|
| 863 |
+
|
| 864 |
+
st.markdown("<br>", unsafe_allow_html=True)
|
| 865 |
+
|
| 866 |
+
st.markdown(
|
| 867 |
+
"""<div style="font-family:'IBM Plex Mono',monospace;font-size:11px;color:#4a6080;text-transform:uppercase;letter-spacing:1px;margin-bottom:12px;">Detaylı Analiz Tablosu</div>""",
|
| 868 |
+
unsafe_allow_html=True,
|
| 869 |
+
)
|
| 870 |
+
|
| 871 |
+
table_rows = ""
|
| 872 |
+
for idx, row in res_df.iterrows():
|
| 873 |
+
risk_str = str(row.get("Risk", "")).upper()
|
| 874 |
+
row_bg = {
|
| 875 |
+
"CRITICAL": "#1f0c0c",
|
| 876 |
+
"HIGH": "#1a0e03",
|
| 877 |
+
"MEDIUM": "#141002",
|
| 878 |
+
"LOW": "#07091a",
|
| 879 |
+
"NONE": "#050f07",
|
| 880 |
+
}.get(risk_str, "#0d1220")
|
| 881 |
+
|
| 882 |
+
karar_str = str(row.get("Karar", ""))
|
| 883 |
+
kc = next((v for k, v in karar_colors_ui.items() if k in karar_str.upper()), "#888888")
|
| 884 |
+
|
| 885 |
+
sal = float(row.get("Saldırganlık", 0.0))
|
| 886 |
+
nef = float(row.get("Nefret", 0.0))
|
| 887 |
+
thr = float(row.get("Tehdit", 0.0))
|
| 888 |
+
|
| 889 |
+
hits_str = str(row.get("Hits", "")).strip()
|
| 890 |
+
hits_html = ""
|
| 891 |
+
if hits_str:
|
| 892 |
+
for h in hits_str.split(","):
|
| 893 |
+
h = h.strip()
|
| 894 |
+
if h:
|
| 895 |
+
hits_html += f'<span class="hits-tag">{h}</span>'
|
| 896 |
+
else:
|
| 897 |
+
hits_html = '<span style="color:#2a3d55;font-size:10px;">—</span>'
|
| 898 |
+
|
| 899 |
+
metin_full = str(row.get("Metin", ""))
|
| 900 |
+
metin_short = metin_full[:60] + "..." if len(metin_full) > 60 else metin_full
|
| 901 |
+
normalize = str(row.get("Normalize", ""))[:50]
|
| 902 |
+
|
| 903 |
+
table_rows += f"""
|
| 904 |
+
<tr style="background:{row_bg}">
|
| 905 |
+
<td style="color:#2a3d55;text-align:center;font-size:11px;">{idx + 1}</td>
|
| 906 |
+
<td class="metin-cell" title="{metin_full}">{metin_short}</td>
|
| 907 |
+
<td style="color:#4a6080;font-size:10px;font-style:italic;">{normalize}</td>
|
| 908 |
+
<td class="karar-cell" style="color:{kc}">{karar_str[:22]}</td>
|
| 909 |
+
<td>{badge_html(risk_str)}</td>
|
| 910 |
+
<td class="skor-cell">{inline_bar_html(sal, risk_color(sal))}</td>
|
| 911 |
+
<td class="skor-cell">{inline_bar_html(nef, risk_color(nef))}</td>
|
| 912 |
+
<td class="skor-cell">{inline_bar_html(thr, risk_color(thr))}</td>
|
| 913 |
+
<td>{hits_html}</td>
|
| 914 |
+
<td style="color:#4a6080;font-size:10px;max-width:180px;">{str(row.get("Gerekçe", ""))[:60]}</td>
|
| 915 |
+
</tr>"""
|
| 916 |
+
|
| 917 |
+
st.markdown(
|
| 918 |
+
f"""
|
| 919 |
+
<div style="overflow-x:auto;overflow-y:auto;max-height:520px;border:1px solid #1e2d45;border-radius:10px;">
|
| 920 |
+
<table class="report-table">
|
| 921 |
+
<thead>
|
| 922 |
+
<tr>
|
| 923 |
+
<th>#</th><th>Metin</th><th>Normalize</th><th>Karar</th>
|
| 924 |
+
<th>Risk</th><th>Saldırganlık</th><th>Nefret</th><th>Tehdit</th>
|
| 925 |
+
<th>Hits</th><th>Gerekçe</th>
|
| 926 |
+
</tr>
|
| 927 |
+
</thead>
|
| 928 |
+
<tbody>{table_rows}</tbody>
|
| 929 |
+
</table>
|
| 930 |
+
</div>""",
|
| 931 |
+
unsafe_allow_html=True,
|
| 932 |
+
)
|
| 933 |
+
|
| 934 |
+
st.markdown("<br>", unsafe_allow_html=True)
|
| 935 |
+
|
| 936 |
+
col_chart, col_stats = st.columns([1, 1])
|
| 937 |
+
with col_chart:
|
| 938 |
+
st.markdown(
|
| 939 |
+
"""<div style="font-family:'IBM Plex Mono',monospace;font-size:11px;color:#4a6080;text-transform:uppercase;letter-spacing:1px;margin-bottom:10px;">Dağılım</div>""",
|
| 940 |
+
unsafe_allow_html=True,
|
| 941 |
+
)
|
| 942 |
+
st.bar_chart(counts)
|
| 943 |
+
|
| 944 |
+
with col_stats:
|
| 945 |
+
st.markdown(
|
| 946 |
+
"""<div style="font-family:'IBM Plex Mono',monospace;font-size:11px;color:#4a6080;text-transform:uppercase;letter-spacing:1px;margin-bottom:10px;">İstatistikler</div>""",
|
| 947 |
+
unsafe_allow_html=True,
|
| 948 |
+
)
|
| 949 |
+
total = len(res_df)
|
| 950 |
+
zararli = total - len(res_df[res_df["Karar"].str.contains("TEMİZ|CLEAR", na=False)])
|
| 951 |
+
st.markdown(
|
| 952 |
+
f"""
|
| 953 |
+
<div style="font-family:'IBM Plex Mono',monospace;font-size:13px;line-height:2.2;color:#8a9bc0;">
|
| 954 |
+
<span style="color:#4a6080">Toplam kayıt </span> {total}<br>
|
| 955 |
+
<span style="color:#4a6080">Zararlı içerik</span> <span style="color:#e03030">{zararli}</span> (%{zararli / total * 100:.1f})<br>
|
| 956 |
+
<span style="color:#4a6080">Ortalama süre </span> {elapsed / total * 1000:.0f}ms / satır<br>
|
| 957 |
+
<span style="color:#4a6080">Hits bulundu </span> {len(res_df[res_df['Hits'].str.len() > 0])} kayıt<br>
|
| 958 |
+
<span style="color:#4a6080">İnceleme kuyruğu</span> {len(res_df[res_df['Karar'].str.contains('İNCELEME|INCELEME', na=False)])} içerik
|
| 959 |
+
</div>""",
|
| 960 |
+
unsafe_allow_html=True,
|
| 961 |
+
)
|
| 962 |
+
|
| 963 |
+
st.markdown("<br>", unsafe_allow_html=True)
|
| 964 |
+
|
| 965 |
+
inceleme = res_df[res_df["Karar"].str.contains("İNCELEME|INCELEME|REVIEW", na=False)]
|
| 966 |
+
if len(inceleme):
|
| 967 |
+
st.markdown(
|
| 968 |
+
f"""<div style="font-family:'IBM Plex Mono',monospace;font-size:11px;color:#3a7bd4;text-transform:uppercase;letter-spacing:1px;margin-bottom:12px;">İnceleme Kuyruğu — {len(inceleme)} İçerik</div>""",
|
| 969 |
+
unsafe_allow_html=True,
|
| 970 |
+
)
|
| 971 |
+
for i, (_, row) in enumerate(inceleme.iterrows()):
|
| 972 |
+
sal = float(row.get("Saldırganlık", 0.0))
|
| 973 |
+
st.markdown(
|
| 974 |
+
f"""<div class="queue-card">
|
| 975 |
+
<div class="queue-index">{i + 1:02d}</div>
|
| 976 |
+
<div>
|
| 977 |
+
<div class="queue-text">{str(row.get('Metin', ''))}</div>
|
| 978 |
+
<div class="queue-meta">
|
| 979 |
+
Risk: {row.get('Risk', '')} |
|
| 980 |
+
Saldırganlık: %{sal * 100:.0f} |
|
| 981 |
+
{row.get('Gerekçe', '')}
|
| 982 |
+
</div>
|
| 983 |
+
</div>
|
| 984 |
+
</div>""",
|
| 985 |
+
unsafe_allow_html=True,
|
| 986 |
+
)
|
| 987 |
+
|
| 988 |
+
st.markdown("<br>", unsafe_allow_html=True)
|
| 989 |
+
|
| 990 |
+
st.markdown(
|
| 991 |
+
"""<div style="font-family:'IBM Plex Mono',monospace;font-size:11px;color:#4a6080;text-transform:uppercase;letter-spacing:1px;margin-bottom:12px;">Raporu İndir</div>""",
|
| 992 |
+
unsafe_allow_html=True,
|
| 993 |
+
)
|
| 994 |
+
col_dl1, col_dl2, _ = st.columns([1, 1, 4])
|
| 995 |
+
|
| 996 |
+
with col_dl1:
|
| 997 |
+
csv_bytes = res_df.to_csv(index=False).encode("utf-8")
|
| 998 |
+
st.download_button(
|
| 999 |
+
"⬇ CSV",
|
| 1000 |
+
data=csv_bytes,
|
| 1001 |
+
file_name=f"sentinel_raporu_{datetime.now().strftime('%Y%m%d_%H%M')}.csv",
|
| 1002 |
+
mime="text/csv",
|
| 1003 |
+
use_container_width=True,
|
| 1004 |
+
)
|
| 1005 |
+
|
| 1006 |
+
with col_dl2:
|
| 1007 |
+
docx_buf = generate_docx_report(res_df, elapsed, platform_dil)
|
| 1008 |
+
if docx_buf:
|
| 1009 |
+
st.download_button(
|
| 1010 |
+
"⬇ DOCX",
|
| 1011 |
+
data=docx_buf,
|
| 1012 |
+
file_name=f"sentinel_raporu_{datetime.now().strftime('%Y%m%d_%H%M')}.docx",
|
| 1013 |
+
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
| 1014 |
+
use_container_width=True,
|
| 1015 |
+
)
|
| 1016 |
+
else:
|
| 1017 |
+
st.warning("python-docx yüklü değil: pip install python-docx")
|
app/__init__.py
ADDED
|
File without changes
|
app/api/__init__.py
ADDED
|
File without changes
|
app/api/endpoints.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import subprocess
|
| 2 |
+
import time
|
| 3 |
+
from typing import Optional
|
| 4 |
+
|
| 5 |
+
import torch
|
| 6 |
+
from fastapi import APIRouter, HTTPException
|
| 7 |
+
from pydantic import BaseModel
|
| 8 |
+
|
| 9 |
+
try:
|
| 10 |
+
import psutil
|
| 11 |
+
except ImportError:
|
| 12 |
+
psutil = None
|
| 13 |
+
|
| 14 |
+
from app.services.cache_manager import get_cache_counts, load_blacklist_to_ram
|
| 15 |
+
from app.services.moderation_service import run_moderation
|
| 16 |
+
|
| 17 |
+
router = APIRouter()
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def get_gpu_info():
|
| 21 |
+
try:
|
| 22 |
+
raw = subprocess.check_output(
|
| 23 |
+
[
|
| 24 |
+
"nvidia-smi",
|
| 25 |
+
"--query-gpu=utilization.gpu,memory.used,memory.total",
|
| 26 |
+
"--format=csv,noheader,nounits",
|
| 27 |
+
],
|
| 28 |
+
encoding="utf-8",
|
| 29 |
+
stderr=subprocess.STDOUT,
|
| 30 |
+
)
|
| 31 |
+
util, mem_used, mem_total = [p.strip() for p in raw.strip().splitlines()[0].split(",", maxsplit=2)]
|
| 32 |
+
return {
|
| 33 |
+
"load": int(float(util)),
|
| 34 |
+
"vram_used": int(float(mem_used)),
|
| 35 |
+
"vram_total": int(float(mem_total)),
|
| 36 |
+
}
|
| 37 |
+
except Exception:
|
| 38 |
+
if not torch.cuda.is_available():
|
| 39 |
+
return None
|
| 40 |
+
allocated = torch.cuda.memory_allocated(0) / (1024 ** 2)
|
| 41 |
+
total = torch.cuda.get_device_properties(0).total_memory / (1024 ** 2)
|
| 42 |
+
return {
|
| 43 |
+
"load": None,
|
| 44 |
+
"vram_used": int(round(allocated)),
|
| 45 |
+
"vram_total": int(round(total)),
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def capture_process_metrics():
|
| 50 |
+
cpu_load = None
|
| 51 |
+
ram_pct = None
|
| 52 |
+
if psutil is not None:
|
| 53 |
+
cpu_load = round(psutil.cpu_percent(interval=0.05), 1)
|
| 54 |
+
ram_pct = round(psutil.virtual_memory().percent, 1)
|
| 55 |
+
|
| 56 |
+
gpu = get_gpu_info()
|
| 57 |
+
return {
|
| 58 |
+
"cpu": cpu_load,
|
| 59 |
+
"ram_pct": ram_pct,
|
| 60 |
+
"gpu_load": gpu["load"] if gpu else None,
|
| 61 |
+
"vram_used": gpu["vram_used"] if gpu else 0,
|
| 62 |
+
"vram_total": gpu["vram_total"] if gpu else 0,
|
| 63 |
+
"timestamp": time.strftime("%H:%M:%S"),
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
class ModerationInput(BaseModel):
|
| 68 |
+
text: str
|
| 69 |
+
platform_dil: Optional[str] = "tr"
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
@router.get("/vram-status")
|
| 73 |
+
def get_vram_status():
|
| 74 |
+
if not torch.cuda.is_available():
|
| 75 |
+
return {
|
| 76 |
+
"cuda_available": False,
|
| 77 |
+
"message": "CUDA aktif değil, GPU belleği ölçülemedi.",
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
allocated = torch.cuda.memory_allocated(0) / (1024 ** 2)
|
| 81 |
+
reserved = torch.cuda.memory_reserved(0) / (1024 ** 2)
|
| 82 |
+
total = torch.cuda.get_device_properties(0).total_memory / (1024 ** 2)
|
| 83 |
+
|
| 84 |
+
return {
|
| 85 |
+
"cuda_available": True,
|
| 86 |
+
"gpu_name": torch.cuda.get_device_name(0),
|
| 87 |
+
"allocated_mb": round(allocated, 2),
|
| 88 |
+
"reserved_mb": round(reserved, 2),
|
| 89 |
+
"total_mb": round(total, 2),
|
| 90 |
+
"free_estimate_mb": round(total - reserved, 2),
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
@router.get("/refresh-cache")
|
| 95 |
+
def refresh_cache():
|
| 96 |
+
load_blacklist_to_ram()
|
| 97 |
+
tr_count, en_count = get_cache_counts()
|
| 98 |
+
return {
|
| 99 |
+
"status": "success",
|
| 100 |
+
"message": "Kara liste güncellendi.",
|
| 101 |
+
"tr_count": tr_count,
|
| 102 |
+
"en_count": en_count,
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
@router.post("/analyze")
|
| 107 |
+
async def analyze(input_data: ModerationInput):
|
| 108 |
+
if not input_data.text or not input_data.text.strip():
|
| 109 |
+
raise HTTPException(status_code=400, detail="text alanı boş olamaz")
|
| 110 |
+
|
| 111 |
+
start_time = time.time()
|
| 112 |
+
decision, reason, risk, lang, cleaned, details = run_moderation(
|
| 113 |
+
input_data.text,
|
| 114 |
+
input_data.platform_dil or "tr",
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
latency_ms = round((time.time() - start_time) * 1000, 2)
|
| 118 |
+
performance = capture_process_metrics()
|
| 119 |
+
performance["latency_ms"] = latency_ms
|
| 120 |
+
|
| 121 |
+
return {
|
| 122 |
+
"text": input_data.text,
|
| 123 |
+
"cleaned_text": cleaned,
|
| 124 |
+
"decision": decision,
|
| 125 |
+
"reason": reason,
|
| 126 |
+
"risk_level": risk,
|
| 127 |
+
"language": lang,
|
| 128 |
+
"details": details,
|
| 129 |
+
"latency_ms": latency_ms,
|
| 130 |
+
"performance": performance,
|
| 131 |
+
}
|
app/core/__init__.py
ADDED
|
File without changes
|
app/core/config.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
|
| 4 |
+
load_dotenv()
|
| 5 |
+
|
| 6 |
+
APP_TITLE = "🛡️ Sentinel AI Moderasyon API"
|
| 7 |
+
APP_DESCRIPTION = "Supabase tabanlı, yüksek performanslı moderasyon motoru."
|
| 8 |
+
APP_VERSION = "2.5.0"
|
| 9 |
+
|
| 10 |
+
SUPABASE_URL = os.getenv("SUPABASE_URL", "")
|
| 11 |
+
SUPABASE_KEY = os.getenv("SUPABASE_KEY", "")
|
| 12 |
+
|
| 13 |
+
TR_HATE_MODEL_PATH = "./models_cache/bertturk-hate-speech"
|
| 14 |
+
TR_OFF_MODEL_PATH = "./models_cache/bertturk-offensive-42k"
|
app/db/__init__.py
ADDED
|
File without changes
|
app/db/supabase_client.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from supabase import create_client
|
| 2 |
+
|
| 3 |
+
from app.core.config import SUPABASE_KEY, SUPABASE_URL
|
| 4 |
+
|
| 5 |
+
_supabase = None
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def get_supabase_client():
|
| 9 |
+
global _supabase
|
| 10 |
+
|
| 11 |
+
if _supabase is not None:
|
| 12 |
+
return _supabase
|
| 13 |
+
|
| 14 |
+
if not SUPABASE_URL or not SUPABASE_KEY:
|
| 15 |
+
print("⚠️ Supabase bilgileri .env içinde bulunamadı!")
|
| 16 |
+
return None
|
| 17 |
+
|
| 18 |
+
try:
|
| 19 |
+
_supabase = create_client(SUPABASE_URL, SUPABASE_KEY)
|
| 20 |
+
except Exception as exc:
|
| 21 |
+
print(f"⚠️ Supabase client oluşturulamadı: {exc}")
|
| 22 |
+
_supabase = None
|
| 23 |
+
|
| 24 |
+
return _supabase
|
app/ml/__init__.py
ADDED
|
File without changes
|
app/ml/model_loader.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from detoxify import Detoxify
|
| 3 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
|
| 4 |
+
|
| 5 |
+
from app.core.config import TR_OFF_MODEL_PATH
|
| 6 |
+
|
| 7 |
+
_STATE = {
|
| 8 |
+
"T_O": None,
|
| 9 |
+
"M_O": None,
|
| 10 |
+
"GB_PIPE": None,
|
| 11 |
+
"D_EN": None,
|
| 12 |
+
"D_MULTI": None,
|
| 13 |
+
"TORCH_DEVICE": None,
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def load_system():
|
| 18 |
+
if _STATE["T_O"] is not None:
|
| 19 |
+
return _STATE
|
| 20 |
+
|
| 21 |
+
device_id = 0 if torch.cuda.is_available() else -1
|
| 22 |
+
torch_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 23 |
+
|
| 24 |
+
tokenizer_o = AutoTokenizer.from_pretrained(TR_OFF_MODEL_PATH)
|
| 25 |
+
model_o = AutoModelForSequenceClassification.from_pretrained(TR_OFF_MODEL_PATH).to(torch_device)
|
| 26 |
+
model_o.eval()
|
| 27 |
+
|
| 28 |
+
try:
|
| 29 |
+
gibberish = pipeline(
|
| 30 |
+
"text-classification",
|
| 31 |
+
model="madhurjindal/autonlp-Gibberish-Detector-492513457",
|
| 32 |
+
device=device_id,
|
| 33 |
+
)
|
| 34 |
+
except Exception:
|
| 35 |
+
gibberish = None
|
| 36 |
+
|
| 37 |
+
detox_en = Detoxify("original")
|
| 38 |
+
detox_multi = Detoxify("multilingual")
|
| 39 |
+
|
| 40 |
+
_STATE.update(
|
| 41 |
+
{
|
| 42 |
+
"T_O": tokenizer_o,
|
| 43 |
+
"M_O": model_o,
|
| 44 |
+
"GB_PIPE": gibberish,
|
| 45 |
+
"D_EN": detox_en,
|
| 46 |
+
"D_MULTI": detox_multi,
|
| 47 |
+
"TORCH_DEVICE": torch_device,
|
| 48 |
+
}
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
return _STATE
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def get_model_state():
|
| 55 |
+
return load_system()
|
app/services/__init__.py
ADDED
|
File without changes
|
app/services/cache_manager.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.db.supabase_client import get_supabase_client
|
| 2 |
+
|
| 3 |
+
CACHE_KUFUR_DICT_TR = {}
|
| 4 |
+
CACHE_KUFUR_DICT_EN = {}
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def load_blacklist_to_ram():
|
| 8 |
+
"""Supabase limitlerini (1000 satır) aşan sayfalama destekli loader."""
|
| 9 |
+
global CACHE_KUFUR_DICT_TR, CACHE_KUFUR_DICT_EN
|
| 10 |
+
|
| 11 |
+
temp_tr = {}
|
| 12 |
+
temp_en = {}
|
| 13 |
+
|
| 14 |
+
supabase = get_supabase_client()
|
| 15 |
+
if supabase is None:
|
| 16 |
+
print("⚠️ Supabase bağlantısı yok!")
|
| 17 |
+
return
|
| 18 |
+
|
| 19 |
+
try:
|
| 20 |
+
print("🌐 Supabase'den tüm liste çekiliyor...")
|
| 21 |
+
all_rows = []
|
| 22 |
+
start = 0
|
| 23 |
+
page_size = 1000
|
| 24 |
+
|
| 25 |
+
while True:
|
| 26 |
+
response = (
|
| 27 |
+
supabase.table("blacklist")
|
| 28 |
+
.select("word, language, category")
|
| 29 |
+
.range(start, start + page_size - 1)
|
| 30 |
+
.execute()
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
data = response.data or []
|
| 34 |
+
all_rows.extend(data)
|
| 35 |
+
|
| 36 |
+
if len(data) < page_size:
|
| 37 |
+
break
|
| 38 |
+
start += page_size
|
| 39 |
+
|
| 40 |
+
print(f"📊 Toplam çekilen satır: {len(all_rows)}")
|
| 41 |
+
|
| 42 |
+
found_langs = set()
|
| 43 |
+
|
| 44 |
+
for row in all_rows:
|
| 45 |
+
lang_raw = str(row.get("language", "")).lower().strip()
|
| 46 |
+
word = str(row.get("word", "")).lower().strip()
|
| 47 |
+
cat = str(row.get("category", "insult")).lower().strip() or "insult"
|
| 48 |
+
|
| 49 |
+
if not word:
|
| 50 |
+
continue
|
| 51 |
+
|
| 52 |
+
found_langs.add(lang_raw)
|
| 53 |
+
|
| 54 |
+
if lang_raw == "tr":
|
| 55 |
+
temp_tr[word] = cat
|
| 56 |
+
elif lang_raw == "en":
|
| 57 |
+
temp_en[word] = cat
|
| 58 |
+
|
| 59 |
+
CACHE_KUFUR_DICT_TR = temp_tr
|
| 60 |
+
CACHE_KUFUR_DICT_EN = temp_en
|
| 61 |
+
|
| 62 |
+
print(f"🔍 Veritabanındaki diller: {found_langs}")
|
| 63 |
+
print(f"✅ RAM Hazır: {len(temp_tr)} TR, {len(temp_en)} EN kelime.")
|
| 64 |
+
except Exception as exc:
|
| 65 |
+
print(f"❌ Cache Hatası: {exc}")
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def get_blacklist_for_language(language: str):
|
| 69 |
+
return CACHE_KUFUR_DICT_TR if language == "tr" else CACHE_KUFUR_DICT_EN
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def get_cache_counts():
|
| 73 |
+
return len(CACHE_KUFUR_DICT_TR), len(CACHE_KUFUR_DICT_EN)
|
app/services/moderation_service.py
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
|
| 3 |
+
import torch
|
| 4 |
+
|
| 5 |
+
from app.ml.model_loader import get_model_state
|
| 6 |
+
from app.services.cache_manager import get_blacklist_for_language, get_cache_counts, load_blacklist_to_ram
|
| 7 |
+
from app.utils.text_utils import clean_text_nfkc, is_spam
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def _ensure_runtime_ready():
|
| 11 |
+
state = get_model_state()
|
| 12 |
+
tr_count, en_count = get_cache_counts()
|
| 13 |
+
if tr_count == 0 and en_count == 0:
|
| 14 |
+
load_blacklist_to_ram()
|
| 15 |
+
return state
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def calculate_verdict(hits, ai_scores):
|
| 19 |
+
if len(hits) > 0:
|
| 20 |
+
return {
|
| 21 |
+
"decision": "🚨 KÜFÜR / PROFANITY",
|
| 22 |
+
"risk_level": "CRITICAL",
|
| 23 |
+
"reason": f"Sözlük eşleşmesi: {', '.join(hits)}",
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
off_score = ai_scores.get("off_score", 0.0)
|
| 27 |
+
detox_score = ai_scores.get("detox_toxicity", 0.0)
|
| 28 |
+
max_score = max(off_score, detox_score)
|
| 29 |
+
|
| 30 |
+
if max_score > 0.80:
|
| 31 |
+
return {
|
| 32 |
+
"decision": "🟡 SALDIRGAN / TOXIC",
|
| 33 |
+
"risk_level": "MEDIUM",
|
| 34 |
+
"reason": "Yapay zeka yüksek derecede saldırganlık/hakaret algıladı.",
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
if 0.55 < max_score <= 0.80:
|
| 38 |
+
return {
|
| 39 |
+
"decision": "🔵 İNCELEME GEREKLİ",
|
| 40 |
+
"risk_level": "LOW",
|
| 41 |
+
"reason": "Gri alan tespiti; manuel moderatör onayı önerilir.",
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
return {
|
| 45 |
+
"decision": "✅ TEMİZ",
|
| 46 |
+
"risk_level": "NONE",
|
| 47 |
+
"reason": "İçerik güvenli.",
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def run_moderation(text: str, platform_dil: str = "tr"):
|
| 52 |
+
state = _ensure_runtime_ready()
|
| 53 |
+
|
| 54 |
+
temiz = clean_text_nfkc(text)
|
| 55 |
+
dil = "en" if platform_dil == "en" else "tr"
|
| 56 |
+
pure_text = re.sub(r"[^a-zA-ZçğıöşüÇĞİÖŞÜ0-9\s]", "", temiz).lower()
|
| 57 |
+
words_in_pure_text = set(pure_text.split())
|
| 58 |
+
|
| 59 |
+
if is_spam(temiz, dil):
|
| 60 |
+
return (
|
| 61 |
+
"🗑️ SPAM/GİBBERİSH",
|
| 62 |
+
"Anlamsız veya tekrarlı içerik.",
|
| 63 |
+
"LOW",
|
| 64 |
+
dil,
|
| 65 |
+
temiz,
|
| 66 |
+
{"action": "MONITOR", "detox": {}},
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
active_cache = get_blacklist_for_language(dil)
|
| 70 |
+
detected_profanity = []
|
| 71 |
+
detected_insult = []
|
| 72 |
+
|
| 73 |
+
for bad_word, category in active_cache.items():
|
| 74 |
+
is_hit = bad_word in words_in_pure_text or (len(bad_word) > 3 and bad_word in pure_text)
|
| 75 |
+
if is_hit:
|
| 76 |
+
if category == "profanity":
|
| 77 |
+
detected_profanity.append(bad_word)
|
| 78 |
+
else:
|
| 79 |
+
detected_insult.append(bad_word)
|
| 80 |
+
|
| 81 |
+
profanity_hits = sorted(set(detected_profanity))
|
| 82 |
+
insult_hits = sorted(set(detected_insult))
|
| 83 |
+
|
| 84 |
+
if dil == "en":
|
| 85 |
+
if state["GB_PIPE"] is not None:
|
| 86 |
+
gb_raw = state["GB_PIPE"](temiz)[0]
|
| 87 |
+
gb = {
|
| 88 |
+
"label": str(gb_raw.get("label", "")),
|
| 89 |
+
"score": float(gb_raw.get("score", 0.0)),
|
| 90 |
+
}
|
| 91 |
+
if gb.get("label", "").lower() == "noise" and gb.get("score", 0.0) > 0.98:
|
| 92 |
+
return (
|
| 93 |
+
"🗑️ GIBBERISH/SPAM",
|
| 94 |
+
"Metin anlamsız karakter dizileri içeriyor.",
|
| 95 |
+
"LOW",
|
| 96 |
+
"en",
|
| 97 |
+
temiz,
|
| 98 |
+
{"gibberish": gb, "action": "MONITOR", "detox": {}},
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
raw_res = state["D_EN"].predict(temiz)
|
| 102 |
+
res = {k: float(v) for k, v in raw_res.items()}
|
| 103 |
+
tox_score = float(res.get("toxicity", 0.0))
|
| 104 |
+
ins_score = float(res.get("insult", 0.0))
|
| 105 |
+
identity_attack = float(res.get("identity_attack", 0.0))
|
| 106 |
+
detail = {
|
| 107 |
+
"detox": res,
|
| 108 |
+
"insult": ins_score,
|
| 109 |
+
"toxicity": tox_score,
|
| 110 |
+
"identity_attack": identity_attack,
|
| 111 |
+
"hits": profanity_hits,
|
| 112 |
+
"insult_hits": insult_hits,
|
| 113 |
+
"gibberish": gb if state["GB_PIPE"] is not None else None,
|
| 114 |
+
}
|
| 115 |
+
verdict = calculate_verdict(
|
| 116 |
+
profanity_hits,
|
| 117 |
+
{
|
| 118 |
+
"off_score": 0.0,
|
| 119 |
+
"detox_toxicity": tox_score,
|
| 120 |
+
},
|
| 121 |
+
)
|
| 122 |
+
action_map = {
|
| 123 |
+
"CRITICAL": "CENSOR",
|
| 124 |
+
"HIGH": "WARN",
|
| 125 |
+
"MEDIUM": "MONITOR",
|
| 126 |
+
"LOW": "MONITOR",
|
| 127 |
+
"NONE": "ALLOW",
|
| 128 |
+
}
|
| 129 |
+
detail.update({"action": action_map.get(verdict["risk_level"], "MONITOR")})
|
| 130 |
+
return verdict["decision"], verdict["reason"], verdict["risk_level"], dil, temiz, detail
|
| 131 |
+
|
| 132 |
+
in_o = state["T_O"](temiz, return_tensors="pt", truncation=True, padding=True, max_length=128)
|
| 133 |
+
in_o = {k: v.to(state["TORCH_DEVICE"]) for k, v in in_o.items()}
|
| 134 |
+
with torch.no_grad():
|
| 135 |
+
out_o = state["M_O"](**in_o)
|
| 136 |
+
p_o = torch.softmax(out_o.logits, dim=1)[0]
|
| 137 |
+
off_score = float(p_o[1].item()) if p_o.numel() > 1 else float(p_o.max().item())
|
| 138 |
+
|
| 139 |
+
raw_threat_res = state["D_MULTI"].predict(temiz)
|
| 140 |
+
threat_res = {k: float(v) for k, v in raw_threat_res.items()}
|
| 141 |
+
threat = float(threat_res.get("threat", 0.0))
|
| 142 |
+
tox_score = float(threat_res.get("toxicity", 0.0))
|
| 143 |
+
ins_score = float(threat_res.get("insult", 0.0))
|
| 144 |
+
|
| 145 |
+
detail = {
|
| 146 |
+
"off_score": off_score,
|
| 147 |
+
"toxicity": tox_score,
|
| 148 |
+
"insult": ins_score,
|
| 149 |
+
"threat": threat,
|
| 150 |
+
"detox": threat_res,
|
| 151 |
+
"hits": profanity_hits,
|
| 152 |
+
"insult_hits": insult_hits,
|
| 153 |
+
}
|
| 154 |
+
verdict = calculate_verdict(
|
| 155 |
+
profanity_hits,
|
| 156 |
+
{
|
| 157 |
+
"off_score": off_score,
|
| 158 |
+
"detox_toxicity": tox_score,
|
| 159 |
+
},
|
| 160 |
+
)
|
| 161 |
+
action_map = {
|
| 162 |
+
"CRITICAL": "CENSOR",
|
| 163 |
+
"HIGH": "WARN",
|
| 164 |
+
"MEDIUM": "MONITOR",
|
| 165 |
+
"LOW": "MONITOR",
|
| 166 |
+
"NONE": "ALLOW",
|
| 167 |
+
}
|
| 168 |
+
detail.update({"action": action_map.get(verdict["risk_level"], "MONITOR")})
|
| 169 |
+
return verdict["decision"], verdict["reason"], verdict["risk_level"], dil, temiz, detail
|
app/utils/__init__.py
ADDED
|
File without changes
|
app/utils/text_utils.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
import unicodedata
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def clean_text_nfkc(text: str) -> str:
|
| 6 |
+
text = unicodedata.normalize('NFKC', str(text))
|
| 7 |
+
text = text.replace('İ', 'i').replace('I', 'ı').lower()
|
| 8 |
+
text = re.sub(r'(?<=[a-zğüşıöç0-9])[\.\-_\*]+(?=[a-zğüşıöç0-9])', '', text)
|
| 9 |
+
leet_map = {'0': 'o', '1': 'i', '3': 'e', '4': 'a', '5': 's', '7': 't', '8': 'b'}
|
| 10 |
+
for key, value in leet_map.items():
|
| 11 |
+
text = text.replace(key, value)
|
| 12 |
+
text = re.sub(r'(.)\1+', r'\1', text)
|
| 13 |
+
return " ".join(text.split())
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def check_blacklist(text: str, blacklist_set: set) -> bool:
|
| 17 |
+
return bool(set(text.split()) & blacklist_set)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def is_spam(temiz: str, dil: str = "tr") -> bool:
|
| 21 |
+
sadece_harf = re.sub(r'[^a-zğüşıöç]', '', temiz)
|
| 22 |
+
n = len(sadece_harf)
|
| 23 |
+
|
| 24 |
+
if n < 2:
|
| 25 |
+
return True
|
| 26 |
+
|
| 27 |
+
sesli = set('aeıioöuüeiou')
|
| 28 |
+
sesli_oran = sum(1 for c in sadece_harf if c in sesli) / max(n, 1)
|
| 29 |
+
if 5 < n < 100 and sesli_oran < 0.15:
|
| 30 |
+
return True
|
| 31 |
+
|
| 32 |
+
if dil == "tr":
|
| 33 |
+
tr_olmayan = set('wqx')
|
| 34 |
+
tr_olmayan_oran = sum(1 for c in sadece_harf if c in tr_olmayan) / max(n, 1)
|
| 35 |
+
if tr_olmayan_oran > 0.2:
|
| 36 |
+
return True
|
| 37 |
+
|
| 38 |
+
unique_chars = len(set(sadece_harf))
|
| 39 |
+
if 10 < n < 50:
|
| 40 |
+
if unique_chars / n < 0.25:
|
| 41 |
+
return True
|
| 42 |
+
elif n >= 50:
|
| 43 |
+
if unique_chars < 8:
|
| 44 |
+
return True
|
| 45 |
+
|
| 46 |
+
if re.search(r'(.)\1{6,}', temiz):
|
| 47 |
+
return True
|
| 48 |
+
|
| 49 |
+
n_temiz = len(temiz)
|
| 50 |
+
for blok in range(3, min(10, n_temiz // 2 + 1)):
|
| 51 |
+
pattern = temiz[:blok]
|
| 52 |
+
tekrar = temiz.count(pattern)
|
| 53 |
+
if tekrar >= 4 and tekrar * blok >= n_temiz * 0.7:
|
| 54 |
+
return True
|
| 55 |
+
|
| 56 |
+
spam_patterns = [
|
| 57 |
+
r'http[s]?://', r'www\.', r'\.com', r'\.net', r'\.org',
|
| 58 |
+
r'click\s*here', r'buy\s*cheap', r'free\s*follow',
|
| 59 |
+
r'tıkla.*kazan', r'ücretsiz.*takipçi', r'satın\s*al',
|
| 60 |
+
r'indirim.*%', r'subscribe.*channel',
|
| 61 |
+
]
|
| 62 |
+
for pattern in spam_patterns:
|
| 63 |
+
if re.search(pattern, temiz, re.IGNORECASE):
|
| 64 |
+
return True
|
| 65 |
+
return False
|
main.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI
|
| 2 |
+
|
| 3 |
+
from app.api.endpoints import router
|
| 4 |
+
from app.core.config import APP_DESCRIPTION, APP_TITLE, APP_VERSION
|
| 5 |
+
|
| 6 |
+
app = FastAPI(
|
| 7 |
+
title=APP_TITLE,
|
| 8 |
+
description=APP_DESCRIPTION,
|
| 9 |
+
version=APP_VERSION,
|
| 10 |
+
)
|
| 11 |
+
|
| 12 |
+
app.include_router(router)
|
performance_test.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import time
|
| 2 |
+
|
| 3 |
+
import requests
|
| 4 |
+
|
| 5 |
+
API_URL = "http://127.0.0.1:8000/analyze"
|
| 6 |
+
|
| 7 |
+
heavy_text_tr = """
|
| 8 |
+
Merhaba, şu an geliştirmekte olduğumuz Sentinel moderasyon sisteminin performans limitlerini test etmek amacıyla bu uzun paragrafı oluşturuyorum.
|
| 9 |
+
Yapay zeka modellerinin, özellikle BERTurk ve Detoxify gibi derin öğrenme mimarilerinin, metin uzunluğu arttıkça işlem süresini nasıl değiştirdiğini gözlemlemek bizim için kritik.
|
| 10 |
+
Bu metin, herhangi bir küfür veya spam emaresi taşımadığı için sistemin tüm ön filtrelerinden geçerek doğrudan doğal dil işleme katmanına ulaşacaktır.
|
| 11 |
+
Burada tokenization süreci, modelin çıkarım (inference) hızı ve donanım kaynaklarının kullanımı gibi metrikleri saniyeler bazında değil, milisaniyeler bazında ölçerek sistemin gerçek zamanlı
|
| 12 |
+
isteklere ne kadar hazırlıklı olduğunu kanıtlamış olacağız. Umarım sonuçlar, sistemin ölçeklenebilirliği hakkında bize net bir veri sağlar.
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
heavy_text_en = """
|
| 16 |
+
Hello, this is a long paragraph designed to test the performance limits of the Sentinel moderation system in English.
|
| 17 |
+
We are specifically looking at how the Detoxify original model handles longer contexts and multiple toxicity labels simultaneously.
|
| 18 |
+
By sending this comprehensive text, we ensure that the system bypasses simple keyword filters and triggers the full deep learning pipeline.
|
| 19 |
+
This will give us a clear baseline for latency in a global production environment.
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
test_scenarios = [
|
| 23 |
+
("TR - Kısa Temiz", "Merhaba, bugün hava çok güzel.", "tr"),
|
| 24 |
+
("TR - Early Exit (Küfür)", "Lan naber o.ç.", "tr"),
|
| 25 |
+
("TR - Ağır AI Yükü", heavy_text_tr, "tr"),
|
| 26 |
+
("EN - Kısa Temiz", "Hello, I hope you are having a wonderful day.", "en"),
|
| 27 |
+
("EN - Early Exit (Profanity)", "Shut the fuck up you bastard!", "en"),
|
| 28 |
+
("EN - Ağır AI Yükü", heavy_text_en, "en"),
|
| 29 |
+
]
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def run_performance_suite() -> None:
|
| 33 |
+
print(f"{'Senaryo Adı':<30} | {'Dil':<4} | {'API Latency':<12} | {'Toplam Süre':<12}")
|
| 34 |
+
print("-" * 75)
|
| 35 |
+
|
| 36 |
+
for label, text, lang in test_scenarios:
|
| 37 |
+
start_time = time.time()
|
| 38 |
+
try:
|
| 39 |
+
payload = {"text": text, "platform_dil": lang}
|
| 40 |
+
response = requests.post(API_URL, json=payload, timeout=60)
|
| 41 |
+
total_time = (time.time() - start_time) * 1000
|
| 42 |
+
except requests.RequestException as exc:
|
| 43 |
+
print(f"{label:<30} | {lang:<4} | BAĞLANTI HATASI: {exc}")
|
| 44 |
+
continue
|
| 45 |
+
|
| 46 |
+
if response.status_code == 200:
|
| 47 |
+
res_json = response.json()
|
| 48 |
+
api_latency = float(res_json.get("latency_ms", 0))
|
| 49 |
+
status_symbol = "⚡" if api_latency < 50 else "🧠"
|
| 50 |
+
print(f"{label:<30} | {lang.upper():<4} | {api_latency:>8.2f} ms | {total_time:>8.2f} ms {status_symbol}")
|
| 51 |
+
else:
|
| 52 |
+
print(f"{label:<30} | {lang:<4} | HATA: {response.status_code}")
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
if __name__ == "__main__":
|
| 56 |
+
requests.post(API_URL, json={"text": "warmup", "platform_dil": "tr"}, timeout=60)
|
| 57 |
+
requests.post(API_URL, json={"text": "warmup", "platform_dil": "en"}, timeout=60)
|
| 58 |
+
run_performance_suite()
|
requirements.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
transformers
|
| 2 |
+
torch
|
| 3 |
+
detoxify
|
| 4 |
+
streamlit
|
| 5 |
+
pandas
|
| 6 |
+
requests
|
| 7 |
+
openpyxl
|
| 8 |
+
sentencepiece
|
| 9 |
+
matplotlib
|
| 10 |
+
scikit-learn
|
| 11 |
+
fastapi
|
| 12 |
+
uvicorn
|
| 13 |
+
supabase
|
| 14 |
+
python-dotenv
|
| 15 |
+
psutil
|
| 16 |
+
python-docx
|
stress_test.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
import time
|
| 3 |
+
from concurrent.futures import ThreadPoolExecutor
|
| 4 |
+
import torch
|
| 5 |
+
|
| 6 |
+
API_URL = "http://127.0.0.1:8000/analyze"
|
| 7 |
+
VRAM_URL = "http://127.0.0.1:8000/vram-status"
|
| 8 |
+
TOTAL_REQUESTS = 50 # Toplam istek sayısı
|
| 9 |
+
CONCURRENT_USERS = 5 # Aynı anda saldıran kullanıcı sayısı
|
| 10 |
+
|
| 11 |
+
payload = {
|
| 12 |
+
"text": "Bu bir performans testidir. Sistemimiz hem Türkçe hem İngilizce içerikleri başarıyla analiz edebiliyor.",
|
| 13 |
+
"platform_dil": "tr",
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def send_request(_):
|
| 18 |
+
start = time.time()
|
| 19 |
+
response = requests.post(API_URL, json=payload)
|
| 20 |
+
response.raise_for_status()
|
| 21 |
+
return (time.time() - start) * 1000
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
print(f"🔥 Stress Test Başlatılıyor: {TOTAL_REQUESTS} istek, {CONCURRENT_USERS} eşzamanlı kanal...")
|
| 25 |
+
|
| 26 |
+
with ThreadPoolExecutor(max_workers=CONCURRENT_USERS) as executor:
|
| 27 |
+
# İlk isteği "Warm-up" (Isınma) için atalım ve sonuçlara dahil etmeyelim
|
| 28 |
+
requests.post(API_URL, json=payload)
|
| 29 |
+
|
| 30 |
+
start_time = time.time()
|
| 31 |
+
latencies = list(executor.map(send_request, range(TOTAL_REQUESTS)))
|
| 32 |
+
total_duration = time.time() - start_time
|
| 33 |
+
|
| 34 |
+
avg_latency = sum(latencies) / len(latencies)
|
| 35 |
+
rps = TOTAL_REQUESTS / total_duration
|
| 36 |
+
|
| 37 |
+
if torch.cuda.is_available():
|
| 38 |
+
runtime_label = f"GPU - {torch.cuda.get_device_name(0)} Üzerinde"
|
| 39 |
+
else:
|
| 40 |
+
runtime_label = "CPU Üzerinde"
|
| 41 |
+
|
| 42 |
+
print("\n" + "=" * 40)
|
| 43 |
+
print(f"📊 SONUÇLAR ({runtime_label})")
|
| 44 |
+
print("-" * 40)
|
| 45 |
+
print(f"⏱️ Ortalama Gecikme: {avg_latency:.2f} ms")
|
| 46 |
+
print(f"🚀 Saniyedeki İstek (RPS): {rps:.2f} req/sec")
|
| 47 |
+
print(f"⌛ Toplam Süre: {total_duration:.2f} saniye")
|
| 48 |
+
print("=" * 40)
|
| 49 |
+
|
| 50 |
+
print("\n🔎 VRAM Snapshot (/vram-status)")
|
| 51 |
+
try:
|
| 52 |
+
vram_resp = requests.get(VRAM_URL, timeout=10)
|
| 53 |
+
vram_resp.raise_for_status()
|
| 54 |
+
vram = vram_resp.json()
|
| 55 |
+
if vram.get("cuda_available"):
|
| 56 |
+
print(f"📟 GPU: {vram.get('gpu_name', 'Bilinmiyor')}")
|
| 57 |
+
print(f"🔥 Allocated: {vram.get('allocated_mb', 0)} MB")
|
| 58 |
+
print(f"🛡️ Reserved: {vram.get('reserved_mb', 0)} MB")
|
| 59 |
+
print(f"🆓 Free (Tahmini): {vram.get('free_estimate_mb', 0)} MB")
|
| 60 |
+
else:
|
| 61 |
+
print(f"ℹ️ {vram.get('message', 'CUDA aktif değil.')}")
|
| 62 |
+
except requests.RequestException as exc:
|
| 63 |
+
print(f"⚠️ VRAM endpoint erişilemedi: {exc}")
|
utils.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import unicodedata
|
| 2 |
+
import re
|
| 3 |
+
|
| 4 |
+
def clean_text_nfkc(text: str) -> str:
|
| 5 |
+
text = unicodedata.normalize('NFKC', str(text))
|
| 6 |
+
text = text.replace('İ', 'i').replace('I', 'ı').lower()
|
| 7 |
+
text = re.sub(r'(?<=[a-zğüşıöç0-9])[\.\-_\*]+(?=[a-zğüşıöç0-9])', '', text)
|
| 8 |
+
leet_map = {'0':'o', '1':'i', '3':'e', '4':'a', '5':'s', '7':'t', '8':'b'}
|
| 9 |
+
for key, value in leet_map.items():
|
| 10 |
+
text = text.replace(key, value)
|
| 11 |
+
text = re.sub(r'(.)\1+', r'\1', text)
|
| 12 |
+
return " ".join(text.split())
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def check_blacklist(text: str, blacklist_set: set) -> bool:
|
| 16 |
+
return bool(set(text.split()) & blacklist_set)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def is_spam(temiz: str, dil: str = "tr") -> bool:
|
| 20 |
+
sadece_harf = re.sub(r'[^a-zğüşıöç]', '', temiz)
|
| 21 |
+
if len(sadece_harf) < 2:
|
| 22 |
+
return True
|
| 23 |
+
sesli = set('aeıioöuüeiou')
|
| 24 |
+
sesli_oran = sum(1 for c in sadece_harf if c in sesli) / max(len(sadece_harf), 1)
|
| 25 |
+
if len(sadece_harf) > 5 and sesli_oran < 0.15:
|
| 26 |
+
return True
|
| 27 |
+
if dil == "tr":
|
| 28 |
+
tr_olmayan = set('wqx')
|
| 29 |
+
tr_olmayan_oran = sum(1 for c in sadece_harf if c in tr_olmayan) / max(len(sadece_harf), 1)
|
| 30 |
+
if tr_olmayan_oran > 0.2:
|
| 31 |
+
return True
|
| 32 |
+
if re.search(r'(.)\1{4,}', temiz):
|
| 33 |
+
return True
|
| 34 |
+
n = len(temiz)
|
| 35 |
+
for blok in range(2, n // 2 + 1):
|
| 36 |
+
pattern = temiz[:blok]
|
| 37 |
+
tekrar = len(re.findall(re.escape(pattern), temiz))
|
| 38 |
+
if tekrar >= 3 and tekrar * blok >= n * 0.6:
|
| 39 |
+
return True
|
| 40 |
+
if len(sadece_harf) > 10 and len(set(sadece_harf)) / len(sadece_harf) < 0.25:
|
| 41 |
+
return True
|
| 42 |
+
spam_patterns = [
|
| 43 |
+
r'http[s]?://', r'www\.', r'\.com', r'\.net', r'\.org',
|
| 44 |
+
r'click\s*here', r'buy\s*cheap', r'free\s*follow',
|
| 45 |
+
r'tıkla.*kazan', r'ücretsiz.*takipçi', r'satın\s*al',
|
| 46 |
+
r'indirim.*%', r'subscribe.*channel',
|
| 47 |
+
]
|
| 48 |
+
for pattern in spam_patterns:
|
| 49 |
+
if re.search(pattern, temiz, re.IGNORECASE):
|
| 50 |
+
return True
|
| 51 |
+
return False
|
vram_check.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
|
| 3 |
+
CRITICAL_RESERVED_MB = 3500
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def check_vram_usage() -> None:
|
| 7 |
+
if not torch.cuda.is_available():
|
| 8 |
+
print("❌ CUDA aktif değil, VRAM ölçülemez.")
|
| 9 |
+
return
|
| 10 |
+
|
| 11 |
+
allocated = torch.cuda.memory_allocated(0) / (1024 ** 2)
|
| 12 |
+
reserved = torch.cuda.memory_reserved(0) / (1024 ** 2)
|
| 13 |
+
total_capacity = torch.cuda.get_device_properties(0).total_memory / (1024 ** 2)
|
| 14 |
+
free_estimate = total_capacity - reserved
|
| 15 |
+
|
| 16 |
+
print("=" * 40)
|
| 17 |
+
print(f"📟 GPU: {torch.cuda.get_device_name(0)}")
|
| 18 |
+
print(f"📊 Toplam VRAM: {total_capacity:.2f} MB")
|
| 19 |
+
print(f"🔥 Şu An Ayrılan (Allocated): {allocated:.2f} MB")
|
| 20 |
+
print(f"🛡️ Rezerve Edilen (Reserved): {reserved:.2f} MB")
|
| 21 |
+
print(f"🆓 Boş Alan (Tahmini): {free_estimate:.2f} MB")
|
| 22 |
+
|
| 23 |
+
if reserved >= CRITICAL_RESERVED_MB:
|
| 24 |
+
print(f"⚠️ Kritik Eşik Aşıldı: Reserved >= {CRITICAL_RESERVED_MB} MB")
|
| 25 |
+
else:
|
| 26 |
+
print(f"✅ Güvenli Bölge: Reserved < {CRITICAL_RESERVED_MB} MB")
|
| 27 |
+
|
| 28 |
+
print("=" * 40)
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
if __name__ == "__main__":
|
| 32 |
+
check_vram_usage()
|