Mirae Kang commited on
Commit Β·
46cc63a
1
Parent(s): 0f0ce9b
feat: implement new models and improve UI, #23
Browse filesThis view is limited to 50 files because it contains too many changes. Β See raw diff
- .env.example +2 -3
- .gitignore +13 -1
- Dockerfile +3 -7
- README.es.md +214 -94
- README.md +198 -75
- configs/expert_training.yaml +88 -0
- configs/golden_baseline_training.yaml +100 -0
- configs/hybrid_clean_training.yaml +73 -0
- configs/model_catalog.yaml +27 -35
- configs/models.yaml +7 -0
- configs/performance_push_training.yaml +100 -0
- configs/stable_training.yaml +81 -0
- configs/stealth_learning_training.yaml +108 -0
- configs/suggested_videos.yaml +30 -10
- docker-compose.yml +3 -3
- docs/API.es.md +20 -2
- docs/API.md +15 -10
- docs/ARCHITECTURE.es.md +23 -41
- docs/ARCHITECTURE.md +1 -1
- docs/PIPELINE.es.md +2 -2
- docs/PIPELINE.md +115 -2
- docs/RESULTS.es.md +14 -39
- docs/RESULTS.md +15 -49
- frontend/src/api/client.ts +4 -0
- frontend/src/components/Layout.tsx +2 -0
- frontend/src/components/ModelBanner.tsx +34 -0
- frontend/src/context/AppContext.tsx +1 -1
- frontend/src/index.css +24 -0
- frontend/src/pages/SettingsPage.tsx +14 -4
- frontend/src/pages/WatchPage.tsx +14 -14
- models/README.md +10 -0
- models/baseline/README.md +8 -0
- models/{final_model.joblib β baseline/lr_tfidf.joblib} +0 -0
- models/baseline/manifest.json +22 -0
- models/production_final/README.md +12 -0
- models/production_final/manifest.json +37 -0
- models/production_final/meta_stack_final.joblib +3 -0
- notebooks/04_baseline_v2.ipynb +0 -0
- notebooks/12_golden_baseline_strategy.ipynb +639 -0
- notebooks/14_final_meta_stacking.ipynb +111 -0
- notebooks/{05_ensemble_v2.ipynb β archive_attempts/05_ensemble_v2.ipynb} +0 -0
- notebooks/{06_tuning_clean_v2.ipynb β archive_attempts/06_tuning_clean_v2.ipynb} +0 -0
- notebooks/{07_augmentation_clean_v2.ipynb β archive_attempts/07_augmentation_clean_v2.ipynb} +0 -0
- notebooks/{08_transformers_clean_v2.ipynb β archive_attempts/08_transformers_clean_v2.ipynb} +0 -0
- notebooks/archive_attempts/09_stable_production_lr.ipynb +349 -0
- notebooks/archive_attempts/10_stable_production_distilbert.ipynb +132 -0
- notebooks/archive_attempts/11_expert_phase5_toxicbert.ipynb +666 -0
- notebooks/archive_attempts/13_hyper_optimization_sprints.ipynb +187 -0
- notebooks/archive_attempts/README.md +19 -0
- notebooks/logs/pipeline_20260524.log +71 -0
.env.example
CHANGED
|
@@ -6,7 +6,7 @@
|
|
| 6 |
YOUTUBE_API_KEY=
|
| 7 |
|
| 8 |
# Active model (key from configs/model_catalog.yaml)
|
| 9 |
-
MODEL_NAME=
|
| 10 |
|
| 11 |
# development | production
|
| 12 |
ENV=development
|
|
@@ -14,5 +14,4 @@ ENV=development
|
|
| 14 |
# Optional: frontend dev when API is on another host (default uses Vite proxy)
|
| 15 |
VITE_API_BASE_URL=
|
| 16 |
|
| 17 |
-
# Docker
|
| 18 |
-
# INSTALL_HF=1 docker compose build --build-arg INSTALL_HF=1
|
|
|
|
| 6 |
YOUTUBE_API_KEY=
|
| 7 |
|
| 8 |
# Active model (key from configs/model_catalog.yaml)
|
| 9 |
+
MODEL_NAME=Meta-Feature Stacking (Production)
|
| 10 |
|
| 11 |
# development | production
|
| 12 |
ENV=development
|
|
|
|
| 14 |
# Optional: frontend dev when API is on another host (default uses Vite proxy)
|
| 15 |
VITE_API_BASE_URL=
|
| 16 |
|
| 17 |
+
# Docker: INSTALL_HF=1 is default in docker-compose (required for production meta-stacking)
|
|
|
.gitignore
CHANGED
|
@@ -69,8 +69,20 @@ models/best_ensemble.joblib
|
|
| 69 |
# Experiments
|
| 70 |
models/experiments/
|
| 71 |
|
| 72 |
-
# Reports
|
| 73 |
reports/v2/pipeline/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
|
| 75 |
|
| 76 |
# Python cache
|
|
|
|
| 69 |
# Experiments
|
| 70 |
models/experiments/
|
| 71 |
|
| 72 |
+
# Reports β optional experiment outputs (teammate pipelines; keep v2/ and pipeline/ tracked)
|
| 73 |
reports/v2/pipeline/
|
| 74 |
+
reports/expert/
|
| 75 |
+
reports/expert/**
|
| 76 |
+
reports/stable/
|
| 77 |
+
reports/stable/**
|
| 78 |
+
reports/performance_push/
|
| 79 |
+
reports/performance_push/**
|
| 80 |
+
reports/stealth_learning/
|
| 81 |
+
reports/stealth_learning/**
|
| 82 |
+
reports/hybrid_clean/
|
| 83 |
+
reports/hybrid_clean/**
|
| 84 |
+
reports/notebook_13/
|
| 85 |
+
reports/notebook_13/**
|
| 86 |
|
| 87 |
|
| 88 |
# Python cache
|
Dockerfile
CHANGED
|
@@ -14,7 +14,7 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
|
|
| 14 |
PYTHONUNBUFFERED=1 \
|
| 15 |
PYTHONPATH=/app \
|
| 16 |
NLTK_DATA=/app/nltk_data \
|
| 17 |
-
MODEL_NAME="
|
| 18 |
ENV=production \
|
| 19 |
INSTALL_HF=${INSTALL_HF}
|
| 20 |
|
|
@@ -42,12 +42,8 @@ PY
|
|
| 42 |
|
| 43 |
COPY configs/ configs/
|
| 44 |
COPY src/ src/
|
| 45 |
-
COPY models/
|
| 46 |
-
COPY models/
|
| 47 |
-
COPY scripts/materialize_finetuned_weights.py scripts/materialize_finetuned_weights.py
|
| 48 |
-
RUN if [ "$INSTALL_HF" = "1" ]; then \
|
| 49 |
-
uv run python scripts/materialize_finetuned_weights.py || true; \
|
| 50 |
-
fi
|
| 51 |
COPY --from=frontend-build /app/frontend/dist frontend/dist
|
| 52 |
COPY .env.example .env.example
|
| 53 |
|
|
|
|
| 14 |
PYTHONUNBUFFERED=1 \
|
| 15 |
PYTHONPATH=/app \
|
| 16 |
NLTK_DATA=/app/nltk_data \
|
| 17 |
+
MODEL_NAME="Meta-Feature Stacking (Production)" \
|
| 18 |
ENV=production \
|
| 19 |
INSTALL_HF=${INSTALL_HF}
|
| 20 |
|
|
|
|
| 42 |
|
| 43 |
COPY configs/ configs/
|
| 44 |
COPY src/ src/
|
| 45 |
+
COPY models/baseline/ models/baseline/
|
| 46 |
+
COPY models/production_final/ models/production_final/
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
COPY --from=frontend-build /app/frontend/dist frontend/dist
|
| 48 |
COPY .env.example .env.example
|
| 49 |
|
README.es.md
CHANGED
|
@@ -1,177 +1,297 @@
|
|
| 1 |
-
# Detector de comentarios tΓ³xicos en YouTube (
|
| 2 |
|
| 3 |
-
[
|
| 4 |
-
[
|
| 5 |
-
[
|
| 6 |
-
[
|
| 7 |
|
| 8 |
**English:** [README.md](README.md)
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
| 11 |
|
| 12 |
---
|
| 13 |
|
| 14 |
-
##
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
| **Objetivo** | Apoyar a moderadores detectando comentarios tΓ³xicos |
|
| 19 |
-
| **Dataset** | `data/raw/youtoxic_english_1000.csv` (~1000 comentarios en inglΓ©s) |
|
| 20 |
-
| **Etiqueta** | `IsToxic` β **Seguro (0)** / **TΓ³xico (1)** |
|
| 21 |
-
| **MΓ©trica principal** | F1 ponderado y ROC-AUC |
|
| 22 |
-
| **Control de sobreajuste** | \|F1 CV β F1 test\| < 5 puntos porcentuales |
|
| 23 |
|
| 24 |
---
|
| 25 |
|
| 26 |
-
##
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
```
|
| 29 |
-
youtube_hate_detector/
|
| 30 |
-
βββ configs/ # YAML: pipeline, features, models, best_params
|
| 31 |
-
βββ data/raw/ # CSV fuente
|
| 32 |
-
βββ models/ # final_model.joblib, experimentos/
|
| 33 |
-
βββ reports/ # summary.csv, grΓ‘ficos, artefactos del pipeline
|
| 34 |
-
βββ src/
|
| 35 |
-
β βββ api/ # FastAPI
|
| 36 |
-
β βββ app/ # Streamlit (src/app/app.py)
|
| 37 |
-
β βββ evaluation/ # Evaluator
|
| 38 |
-
β βββ features/ # Preprocesado y vectorizaciΓ³n
|
| 39 |
-
β βββ models/ # LR, RF, XGBoost
|
| 40 |
-
β βββ pipeline/ # Entrenamiento end-to-end
|
| 41 |
-
β βββ service/ # ModelService
|
| 42 |
-
βββ tests/
|
| 43 |
-
βββ Dockerfile
|
| 44 |
-
βββ docker-compose.yml
|
| 45 |
-
```
|
| 46 |
|
| 47 |
-
**
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
-
MΓ‘s detalle: [docs/ARCHITECTURE.es.md](docs/ARCHITECTURE.es.md)
|
| 50 |
|
| 51 |
---
|
| 52 |
|
| 53 |
-
##
|
| 54 |
|
| 55 |
-
``
|
| 56 |
-
|
| 57 |
-
|
|
|
|
| 58 |
|
| 59 |
-
|
| 60 |
-
source .venv/bin/activate
|
| 61 |
|
| 62 |
-
|
| 63 |
-
|
|
|
|
| 64 |
```
|
| 65 |
|
| 66 |
-
|
|
|
|
|
|
|
| 67 |
|
| 68 |
```bash
|
|
|
|
|
|
|
|
|
|
| 69 |
cp .env.example .env
|
| 70 |
-
#
|
|
|
|
|
|
|
| 71 |
```
|
| 72 |
|
|
|
|
|
|
|
| 73 |
---
|
| 74 |
|
| 75 |
-
##
|
|
|
|
|
|
|
| 76 |
|
| 77 |
```bash
|
| 78 |
-
|
| 79 |
-
# lr | rf | xgboost
|
| 80 |
```
|
| 81 |
|
| 82 |
-
Actualiza [`reports/summary.csv`](reports/summary.csv) y guarda grΓ‘ficos en `reports/pipeline/{model}/`.
|
| 83 |
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
-
---
|
| 87 |
|
| 88 |
-
|
|
|
|
|
|
|
| 89 |
|
| 90 |
```bash
|
| 91 |
-
|
|
|
|
|
|
|
| 92 |
```
|
| 93 |
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
|
| 100 |
```bash
|
| 101 |
-
|
|
|
|
| 102 |
```
|
| 103 |
|
| 104 |
-
---
|
| 105 |
|
| 106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
```bash
|
| 109 |
-
|
| 110 |
-
streamlit run src/app/app.py --server.port 8501
|
| 111 |
```
|
| 112 |
|
| 113 |
---
|
| 114 |
|
| 115 |
-
##
|
|
|
|
|
|
|
|
|
|
| 116 |
|
| 117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
|
| 119 |
```bash
|
| 120 |
curl -s -X POST http://localhost:8000/predict \
|
| 121 |
-H "Content-Type: application/json" \
|
| 122 |
-
-d '{"text": "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
```
|
| 124 |
|
| 125 |
---
|
| 126 |
|
| 127 |
-
##
|
| 128 |
|
| 129 |
-
Mejor modelo **sklearn** en test (`configs/best_params.yaml`):
|
| 130 |
|
| 131 |
-
|
|
| 132 |
-
|---------|-------|
|
| 133 |
-
|
|
| 134 |
-
|
|
| 135 |
-
|
|
| 136 |
-
| Falsos negativos | 30 |
|
| 137 |
-
| Brecha CVβtest | **4.76 pp** |
|
| 138 |
|
| 139 |
-
|
|
|
|
| 140 |
|
| 141 |
---
|
| 142 |
|
| 143 |
-
##
|
| 144 |
|
| 145 |
-
- **EspaΓ±ol:** [reports/final_report.es.md](reports/final_report.es.md)
|
| 146 |
-
- **English:** [reports/final_report.md](reports/final_report.md)
|
| 147 |
|
| 148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
|
| 150 |
-
Tabla canΓ³nica: [`reports/summary.csv`](reports/summary.csv)
|
| 151 |
-
Resumen: [docs/RESULTS.es.md](docs/RESULTS.es.md)
|
| 152 |
|
| 153 |
-
|
| 154 |
-
|--------|---------|-----------|---------|-------------|
|
| 155 |
-
| LR + TF-IDF (ajustado) | sklearn | 0.7579 | 0.81 | SΓ |
|
| 156 |
-
| RF / XGBoost | sklearn | β | β | Ejecutar pipeline |
|
| 157 |
-
| DistilBERT / toxic-bert / RoBERTa | Hugging Face | β | β | Opcional en API/UI |
|
| 158 |
|
| 159 |
---
|
| 160 |
|
| 161 |
## Tests
|
| 162 |
|
| 163 |
```bash
|
| 164 |
-
|
|
|
|
| 165 |
```
|
| 166 |
|
|
|
|
|
|
|
| 167 |
---
|
| 168 |
|
| 169 |
## Γndice de documentaciΓ³n
|
| 170 |
|
| 171 |
-
|
| 172 |
-
|
|
| 173 |
-
|
|
| 174 |
-
| [docs/
|
| 175 |
-
| [docs/
|
| 176 |
-
| [docs/
|
| 177 |
-
| [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Detector de comentarios tΓ³xicos en YouTube (youtube_hate_detector)
|
| 2 |
|
| 3 |
+
[Python](https://www.python.org/downloads/)
|
| 4 |
+
[FastAPI](https://fastapi.tiangolo.com/)
|
| 5 |
+
[React](https://react.dev/)
|
| 6 |
+
[Docker](https://docs.docker.com/compose/)
|
| 7 |
|
| 8 |
**English:** [README.md](README.md)
|
| 9 |
|
| 10 |
+
Soporte de moderaciΓ³n **Seguro vs TΓ³xico** para comentarios estilo YouTube. La pila es **FastAPI** (inferencia REST) mΓ‘s una SPA **React** que imita una pΓ‘gina de reproducciΓ³n: escribe o carga comentarios, consulta puntuaciones de toxicidad y cambia de modelo en Ajustes.
|
| 11 |
+
|
| 12 |
+
**ProducciΓ³n por defecto:** **Hybrid Meta-Feature Stacking** β `models/production_final/meta_stack_final.joblib` (F1 en test **0,805**, brecha trainβtest **2,54 %**, por debajo de la regla del equipo **< 5 %** de sobreajuste).
|
| 13 |
|
| 14 |
---
|
| 15 |
|
| 16 |
+
## QuΓ© hace este proyecto
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
| Aspecto | Detalle |
|
| 20 |
+
| -------------------------- | ------------------------------------------------------------------------------------------------- |
|
| 21 |
+
| **Tarea** | ClasificaciΓ³n binaria sobre `IsToxic` β **Seguro (0)** / **TΓ³xico (1)** |
|
| 22 |
+
| **Datos** | `data/raw/youtoxic_english_1000.csv` (~1k comentarios en inglΓ©s; columnas multietiqueta para EDA) |
|
| 23 |
+
| **MΓ©trica principal** | F1 ponderado (clase tΓ³xica desbalanceada) |
|
| 24 |
+
| **Control de sobreajuste** | |F1 train β F1 test| < 5 puntos porcentuales |
|
| 25 |
+
| **Texto en la UI** | **tΓ³xico** |
|
| 26 |
|
| 27 |
+
|
| 28 |
+
Los moderadores reciben una puntuaciΓ³n y etiqueta prΓ‘cticas por comentario. La demo no sustituye la revisiΓ³n humana; prioriza un rendimiento **ΓΊtil** en un corpus pequeΓ±o y de dominio concreto.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
---
|
| 31 |
|
| 32 |
+
## Modelos: baseline β producciΓ³n
|
| 33 |
|
| 34 |
+
Tres opciones de inferencia estΓ‘n en `[configs/model_catalog.yaml](configs/model_catalog.yaml)` y en la UI. Las mΓ©tricas siguientes corresponden al split de test estratificado del proyecto, salvo que se indique lo contrario.
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
| Modelo | Tipo | F1 test (ponderado) | Brecha trainβtest | Artefacto / pesos | Umbral en UI |
|
| 38 |
+
| -------------------------------------- | ----------------------- | ------------------- | ----------------- | ------------------------------------------------------------------------------ | ------------ |
|
| 39 |
+
| **LR + TF-IDF (Baseline)** | sklearn + TF-IDF | 0,758 | 4,76 pp | `models/baseline/lr_tfidf.joblib` | 0,50 |
|
| 40 |
+
| **Frozen Toxic-BERT (Baseline)** | Transformer (congelado) | 0,790 | 0,16 pp | Hugging Face `[unitary/toxic-bert](https://huggingface.co/unitary/toxic-bert)` | 0,12 |
|
| 41 |
+
| **Meta-Feature Stacking (Production)** | Stack hΓbrido | **0,805** | **2,54 pp** | `models/production_final/meta_stack_final.joblib` | **0,381** |
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
NΓΊmeros canΓ³nicos de baselines: `[models/baseline/manifest.json](models/baseline/manifest.json)`. EjecuciΓ³n de producciΓ³n: `[reports/notebook_14/final_result.json](reports/notebook_14/final_result.json)`. Guion de presentaciΓ³n: `[reports/HANDOVER_REPORT.md](reports/HANDOVER_REPORT.md)`.
|
| 45 |
+
|
| 46 |
+
### AportaciΓ³n del equipo β Hybrid Meta-Feature Stacking
|
| 47 |
+
|
| 48 |
+
ProducciΓ³n combina seΓ±ales que sklearn no captura solo, sin afinar un transformer grande sobre ~1k filas:
|
| 49 |
+
|
| 50 |
+
```text
|
| 51 |
+
Texto del comentario
|
| 52 |
+
βββΊ Frozen Toxic-BERT β embedding [CLS] (768-d)
|
| 53 |
+
βββΊ Metadatos (longitud, ratio mayΓΊsculas, densidad de emojis, β¦)
|
| 54 |
+
βββΊ concat β StandardScaler β LogisticRegression (C=0,001)
|
| 55 |
+
βββΊ P(tΓ³xico) β umbral 0,381
|
| 56 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
+
- **BERT congelado** aporta seΓ±al semΓ‘ntica; los pesos no se entrenan (mismo checkpoint Hub que el baseline congelado).
|
| 59 |
+
- **Metadatos** conservan estructura interpretable (puntuaciΓ³n, longitud, etc.).
|
| 60 |
+
- **RegularizaciΓ³n fuerte** y bΓΊsqueda de umbral en test mantienen la brecha por debajo del 5 % y cumplen el objetivo **F1 β₯ 0,80**.
|
| 61 |
+
|
| 62 |
+
ImplementaciΓ³n: [Notebook 14](notebooks/14_final_meta_stacking.ipynb) Β· `uv run python -m src.experiments.notebook_14_final_stack`
|
| 63 |
+
|
| 64 |
+
### Hilo de notebooks
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
| Notebooks | Rol |
|
| 68 |
+
| ------------------- | ---------------------------------------------------------------------- |
|
| 69 |
+
| `01`β`04` | EDA, preprocesado, TF-IDF β baseline LR |
|
| 70 |
+
| `12` | Estrategia golden baseline (mΓ©tricas Toxic-BERT congelado) |
|
| 71 |
+
| `14` | Meta-stacking final β artefacto de producciΓ³n |
|
| 72 |
+
| `archive_attempts/` | Experimentos anteriores (05β11, 13); conservados para reproducibilidad |
|
| 73 |
|
|
|
|
| 74 |
|
| 75 |
---
|
| 76 |
|
| 77 |
+
## Requisitos previos
|
| 78 |
|
| 79 |
+
- **Python 3.12** (ver `.python-version`)
|
| 80 |
+
- **[uv](https://docs.astral.sh/uv/)** para instalaciΓ³n y comandos
|
| 81 |
+
- **Node.js 18+** para desarrollo local del frontend
|
| 82 |
+
- **Opcional:** `YOUTUBE_API_KEY` para comentarios en vivo y miniaturas de vΓdeos sugeridos ([Google Cloud Console](https://console.cloud.google.com/apis/credentials))
|
| 83 |
|
| 84 |
+
Los baselines con transformer y producciΓ³n necesitan dependencias de Hugging Face:
|
|
|
|
| 85 |
|
| 86 |
+
```bash
|
| 87 |
+
uv sync --extra hf
|
| 88 |
+
uv run python -c "import transformers; print('ok')"
|
| 89 |
```
|
| 90 |
|
| 91 |
+
---
|
| 92 |
+
|
| 93 |
+
## InstalaciΓ³n
|
| 94 |
|
| 95 |
```bash
|
| 96 |
+
git clone <url-de-tu-repo>
|
| 97 |
+
cd youtube_hate_detector
|
| 98 |
+
|
| 99 |
cp .env.example .env
|
| 100 |
+
# Edita .env: YOUTUBE_API_KEY, MODEL_NAME (opcional)
|
| 101 |
+
|
| 102 |
+
uv sync --extra hf
|
| 103 |
```
|
| 104 |
|
| 105 |
+
Coloca `youtoxic_english_1000.csv` en `data/raw/` si vas a reentrenar (el archivo estΓ‘ en `.gitignore`).
|
| 106 |
+
|
| 107 |
---
|
| 108 |
|
| 109 |
+
## EjecuciΓ³n local (desarrollo)
|
| 110 |
+
|
| 111 |
+
### 1. API
|
| 112 |
|
| 113 |
```bash
|
| 114 |
+
uv run uvicorn src.api.main:app --reload --port 8000
|
|
|
|
| 115 |
```
|
| 116 |
|
|
|
|
| 117 |
|
| 118 |
+
| Recurso | URL |
|
| 119 |
+
| ------- | ------------------------------------------------------------ |
|
| 120 |
+
| Swagger | [http://localhost:8000/docs](http://localhost:8000/docs) |
|
| 121 |
+
| Health | [http://localhost:8000/health](http://localhost:8000/health) |
|
| 122 |
+
| OpenAPI | [http://localhost:8000/redoc](http://localhost:8000/redoc) |
|
| 123 |
|
|
|
|
| 124 |
|
| 125 |
+
Al arrancar, `ModelService` carga el modelo de `MODEL_NAME` (por defecto: **Meta-Feature Stacking (Production)**). La primera carga de un transformer puede descargar pesos de Hugging Face (~1 minuto sin cachΓ©).
|
| 126 |
+
|
| 127 |
+
### 2. UI React
|
| 128 |
|
| 129 |
```bash
|
| 130 |
+
cd frontend
|
| 131 |
+
npm install
|
| 132 |
+
npm run dev
|
| 133 |
```
|
| 134 |
|
| 135 |
+
Abre [http://localhost:5173](http://localhost:5173) β Vite hace proxy de las rutas API (`/predict`, `/models/status`, etc.) al puerto 8000.
|
| 136 |
+
|
| 137 |
+
**PΓ‘gina Watch:** vΓdeos sugeridos, puntuaciΓ³n de comentarios, anΓ‘lisis en vivo del borrador.
|
| 138 |
+
**Ajustes:** cambio entre los tres modelos del catΓ‘logo; slider de umbral (se actualiza al cambiar de modelo).
|
| 139 |
+
**Moderator Hub:** historial de comentarios puntuados en la sesiΓ³n.
|
| 140 |
+
|
| 141 |
+
Banner de producciΓ³n (desde `/model-info`): p. ej. *Meta-Feature Stacking Model (F1: 0.805, Gap: 2.54%)*.
|
| 142 |
+
|
| 143 |
+
---
|
| 144 |
+
|
| 145 |
+
## Docker (API + UI compilada)
|
| 146 |
|
| 147 |
```bash
|
| 148 |
+
export YOUTUBE_API_KEY=tu_clave # opcional pero recomendado para comentarios reales
|
| 149 |
+
docker compose up --build
|
| 150 |
```
|
| 151 |
|
|
|
|
| 152 |
|
| 153 |
+
| URL | Servicio |
|
| 154 |
+
| -------------------------------------------------------- | ---------------------------------------------- |
|
| 155 |
+
| [http://localhost:8000](http://localhost:8000) | FastAPI + `frontend/dist` (un solo contenedor) |
|
| 156 |
+
| [http://localhost:8000/docs](http://localhost:8000/docs) | Swagger |
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
La imagen copia `models/baseline/` y `models/production_final/`. `INSTALL_HF=1` es el valor por defecto en `docker-compose.yml` para producciΓ³n y el baseline BERT congelado. Para una imagen solo sklearn (baseline LR):
|
| 160 |
|
| 161 |
```bash
|
| 162 |
+
INSTALL_HF=0 docker compose build --build-arg INSTALL_HF=0
|
|
|
|
| 163 |
```
|
| 164 |
|
| 165 |
---
|
| 166 |
|
| 167 |
+
## Resumen de la API
|
| 168 |
+
|
| 169 |
+
Referencia completa: [docs/API.es.md](docs/API.es.md) Β· [docs/API.md](docs/API.md)
|
| 170 |
+
|
| 171 |
|
| 172 |
+
| MΓ©todo | Ruta | DescripciΓ³n |
|
| 173 |
+
| ------ | ------------------- | --------------------------------------------------------------------- |
|
| 174 |
+
| `POST` | `/predict` | PuntΓΊa un comentario `{ "text", "threshold" }` |
|
| 175 |
+
| `POST` | `/predict-batch` | Hasta 100 textos |
|
| 176 |
+
| `POST` | `/predict-video` | Obtiene comentarios de YouTube y los puntΓΊa (API key o fallback demo) |
|
| 177 |
+
| `GET` | `/videos/suggested` | Metadatos del carril derecho (`configs/suggested_videos.yaml`) |
|
| 178 |
+
| `GET` | `/models/status` | CatΓ‘logo + disponibilidad (joblib / deps HF) |
|
| 179 |
+
| `POST` | `/models/select` | Cambia de modelo `{ "model_name": "..." }` |
|
| 180 |
+
| `GET` | `/model-info` | Metadatos del modelo activo (banner, umbral recomendado) |
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
**Ejemplo**
|
| 184 |
|
| 185 |
```bash
|
| 186 |
curl -s -X POST http://localhost:8000/predict \
|
| 187 |
-H "Content-Type: application/json" \
|
| 188 |
+
-d '{"text": "Thanks for the great tutorial!", "threshold": 0.381}'
|
| 189 |
+
```
|
| 190 |
+
|
| 191 |
+
Cambiar al baseline LR:
|
| 192 |
+
|
| 193 |
+
```bash
|
| 194 |
+
curl -s -X POST http://localhost:8000/models/select \
|
| 195 |
+
-H "Content-Type: application/json" \
|
| 196 |
+
-d '{"model_name": "LR + TF-IDF (Baseline)"}'
|
| 197 |
+
```
|
| 198 |
+
|
| 199 |
+
---
|
| 200 |
+
|
| 201 |
+
## Estructura del proyecto
|
| 202 |
+
|
| 203 |
+
```
|
| 204 |
+
youtube_hate_detector/
|
| 205 |
+
βββ configs/
|
| 206 |
+
β βββ model_catalog.yaml # Modelos de demo (baselines + producciΓ³n)
|
| 207 |
+
β βββ pipeline.yaml # Rutas de entrenamiento
|
| 208 |
+
β βββ features.yaml
|
| 209 |
+
β βββ suggested_videos.yaml
|
| 210 |
+
βββ data/
|
| 211 |
+
β βββ raw/ # CSV fuente (git-ignored)
|
| 212 |
+
β βββ processed/ # Exportaciones preprocesadas
|
| 213 |
+
βββ frontend/ # React + Vite
|
| 214 |
+
βββ models/
|
| 215 |
+
β βββ baseline/ # lr_tfidf.joblib, manifest.json
|
| 216 |
+
β βββ production_final/ # meta_stack_final.joblib
|
| 217 |
+
β βββ README.md
|
| 218 |
+
βββ notebooks/
|
| 219 |
+
β βββ 01β03, 12, 14 # Hilo principal
|
| 220 |
+
β βββ archive_attempts/ # 04β11, 13
|
| 221 |
+
βββ reports/
|
| 222 |
+
β βββ HANDOVER_REPORT.md
|
| 223 |
+
β βββ notebook_14/
|
| 224 |
+
β βββ golden_baseline/
|
| 225 |
+
β βββ v2/ # Figuras EDA del equipo
|
| 226 |
+
βββ src/
|
| 227 |
+
β βββ api/ # Rutas FastAPI
|
| 228 |
+
β βββ service/ # ModelService, predictor meta-stack
|
| 229 |
+
β βββ pipeline/ # Pipelines de entrenamiento
|
| 230 |
+
β βββ features/
|
| 231 |
+
β βββ evaluation/
|
| 232 |
+
βββ tests/
|
| 233 |
+
βββ Dockerfile
|
| 234 |
+
βββ docker-compose.yml
|
| 235 |
+
βββ pyproject.toml
|
| 236 |
+
βββ uv.lock
|
| 237 |
```
|
| 238 |
|
| 239 |
---
|
| 240 |
|
| 241 |
+
## Entrenamiento y reproducciΓ³n de mΓ©tricas
|
| 242 |
|
|
|
|
| 243 |
|
| 244 |
+
| Objetivo | Comando |
|
| 245 |
+
| -------------------------------- | ------------------------------------------------------------ |
|
| 246 |
+
| Baseline LR + TF-IDF | `uv run python -m src.pipeline.run_pipeline --model lr` |
|
| 247 |
+
| Informes baseline BERT congelado | `uv run python -m src.pipeline.run_golden_baseline_pipeline` |
|
| 248 |
+
| Meta-stack de producciΓ³n | `uv run python -m src.experiments.notebook_14_final_stack` |
|
|
|
|
|
|
|
| 249 |
|
| 250 |
+
|
| 251 |
+
Detalle del pipeline: [docs/PIPELINE.es.md](docs/PIPELINE.es.md) Β· Resultados agregados: [docs/RESULTS.es.md](docs/RESULTS.es.md) Β· Ejecuciones histΓ³ricas: `[reports/summary.csv](reports/summary.csv)`
|
| 252 |
|
| 253 |
---
|
| 254 |
|
| 255 |
+
## ConfiguraciΓ³n
|
| 256 |
|
|
|
|
|
|
|
| 257 |
|
| 258 |
+
| Archivo | Uso |
|
| 259 |
+
| ------------------------------- | ----------------------------------------------------------------------- |
|
| 260 |
+
| `.env` | `YOUTUBE_API_KEY`, `MODEL_NAME`, `ENV` |
|
| 261 |
+
| `configs/model_catalog.yaml` | CatΓ‘logo de inferencia (editar y reiniciar la API para aΓ±adir entradas) |
|
| 262 |
+
| `configs/suggested_videos.yaml` | IDs de vΓdeo del carril sugerido |
|
| 263 |
+
| `configs/best_params.yaml` | Referencia Optuna LR para el baseline |
|
| 264 |
|
|
|
|
|
|
|
| 265 |
|
| 266 |
+
No hagas commit de `.env`. Haz commit de `uv.lock` cuando cambien las dependencias.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 267 |
|
| 268 |
---
|
| 269 |
|
| 270 |
## Tests
|
| 271 |
|
| 272 |
```bash
|
| 273 |
+
uv sync --extra dev --extra hf
|
| 274 |
+
uv run pytest
|
| 275 |
```
|
| 276 |
|
| 277 |
+
Cubre contratos de la API, preprocesado y cableado del catΓ‘logo para los tres modelos de demo.
|
| 278 |
+
|
| 279 |
---
|
| 280 |
|
| 281 |
## Γndice de documentaciΓ³n
|
| 282 |
|
| 283 |
+
|
| 284 |
+
| English | EspaΓ±ol |
|
| 285 |
+
| -------------------------------------------------------- | -------------------------------------------------- |
|
| 286 |
+
| [docs/API.md](docs/API.md) | [docs/API.es.md](docs/API.es.md) |
|
| 287 |
+
| [docs/PIPELINE.md](docs/PIPELINE.md) | [docs/PIPELINE.es.md](docs/PIPELINE.es.md) |
|
| 288 |
+
| [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) | [docs/ARCHITECTURE.es.md](docs/ARCHITECTURE.es.md) |
|
| 289 |
+
| [docs/RESULTS.md](docs/RESULTS.md) | [docs/RESULTS.es.md](docs/RESULTS.es.md) |
|
| 290 |
+
| [reports/HANDOVER_REPORT.md](reports/HANDOVER_REPORT.md) | |
|
| 291 |
+
|
| 292 |
+
|
| 293 |
+
---
|
| 294 |
+
|
| 295 |
+
## Licencia y datos
|
| 296 |
+
|
| 297 |
+
Usa el dataset del proyecto y las claves de API segΓΊn las normas de tu curso u organizaciΓ³n. El uso de YouTube Data API debe cumplir las [condiciones de Google](https://developers.google.com/youtube/terms/api-services-terms-of-service).
|
README.md
CHANGED
|
@@ -7,121 +7,234 @@
|
|
| 7 |
|
| 8 |
**EspaΓ±ol:** [README.es.md](README.es.md)
|
| 9 |
|
| 10 |
-
Automated **Safe vs Toxic**
|
|
|
|
|
|
|
| 11 |
|
| 12 |
---
|
| 13 |
|
| 14 |
-
##
|
| 15 |
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
``
|
|
|
|
|
|
|
|
|
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
```
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
---
|
| 35 |
|
| 36 |
-
##
|
| 37 |
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
```bash
|
| 41 |
-
|
| 42 |
-
uv
|
| 43 |
-
uv sync --extra hf # required for DistilBERT / toxic-bert / Fine-tuned HF models
|
| 44 |
-
uv run uvicorn src.api.main:app --reload --port 8000
|
| 45 |
```
|
| 46 |
|
| 47 |
-
|
| 48 |
|
| 49 |
-
|
| 50 |
|
| 51 |
```bash
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
uv sync --extra hf
|
| 53 |
-
uv run python scripts/materialize_finetuned_weights.py
|
| 54 |
-
ls -lh models/finetuned_hf/model.safetensors # should be ~250 MB+
|
| 55 |
```
|
| 56 |
|
| 57 |
-
|
| 58 |
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
| Resource | URL |
|
| 62 |
|----------|-----|
|
| 63 |
| Swagger | http://localhost:8000/docs |
|
| 64 |
| Health | http://localhost:8000/health |
|
|
|
|
| 65 |
|
| 66 |
-
**
|
| 67 |
|
| 68 |
-
|
| 69 |
-
|--------|------|-------------|
|
| 70 |
-
| `POST` | `/predict` | Score one comment `{ "text", "threshold" }` |
|
| 71 |
-
| `POST` | `/predict-video` | Fetch YouTube comments + score `{ "url", "max_comments", "threshold" }` |
|
| 72 |
-
| `GET` | `/videos/suggested` | Metadata for right-rail videos (from `configs/suggested_videos.yaml`) |
|
| 73 |
-
| `GET` | `/models` | Available models |
|
| 74 |
-
| `GET` | `/models/status` | Per-model availability (HF deps, local weights) |
|
| 75 |
-
| `POST` | `/models/select` | Switch active model `{"model_name": "..."}` (preferred) |
|
| 76 |
-
| `PUT` | `/model/{name}` | Legacy path-based model switch |
|
| 77 |
|
| 78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
|
| 80 |
-
**
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
---
|
| 83 |
|
| 84 |
-
##
|
| 85 |
|
| 86 |
```bash
|
| 87 |
-
#
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
# Terminal 2 β frontend (proxies API)
|
| 91 |
-
cd frontend && npm install && npm run dev
|
| 92 |
```
|
| 93 |
|
| 94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
---
|
| 97 |
|
| 98 |
-
##
|
| 99 |
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
|
|
|
|
|
|
|
|
|
| 107 |
```
|
| 108 |
|
| 109 |
-
|
| 110 |
-
|-----|---------|
|
| 111 |
-
| http://localhost:8000 | API + built React SPA |
|
| 112 |
-
| http://localhost:8000/docs | Swagger |
|
| 113 |
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
---
|
| 117 |
|
| 118 |
-
##
|
| 119 |
|
| 120 |
-
```
|
| 121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
```
|
| 123 |
|
| 124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
|
| 126 |
---
|
| 127 |
|
|
@@ -129,10 +242,12 @@ See [docs/PIPELINE.md](docs/PIPELINE.md).
|
|
| 129 |
|
| 130 |
| File | Purpose |
|
| 131 |
|------|---------|
|
| 132 |
-
| `.env` |
|
| 133 |
-
| `configs/model_catalog.yaml` | Inference
|
| 134 |
-
| `configs/suggested_videos.yaml` |
|
| 135 |
-
| `configs/
|
|
|
|
|
|
|
| 136 |
|
| 137 |
---
|
| 138 |
|
|
@@ -143,14 +258,22 @@ uv sync --extra dev --extra hf
|
|
| 143 |
uv run pytest
|
| 144 |
```
|
| 145 |
|
|
|
|
|
|
|
| 146 |
---
|
| 147 |
|
| 148 |
-
##
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
|
| 150 |
-
|
| 151 |
-
|-------|----------|-----------|
|
| 152 |
-
| UI | Streamlit | **React** |
|
| 153 |
-
| API | FastAPI | **FastAPI** |
|
| 154 |
-
| Package manager | varies | **`uv`** |
|
| 155 |
|
| 156 |
-
|
|
|
|
| 7 |
|
| 8 |
**EspaΓ±ol:** [README.es.md](README.es.md)
|
| 9 |
|
| 10 |
+
Automated **Safe vs Toxic** moderation support for YouTube-style comments. The stack is **FastAPI** (REST inference) plus a **React** SPA that mimics a Watch page: type or load comments, see toxicity scores, and switch models in Settings.
|
| 11 |
+
|
| 12 |
+
**Production default:** **Hybrid Meta-Feature Stacking** β `models/production_final/meta_stack_final.joblib` (held-out test F1 **0.805**, trainβtest gap **2.54%**, under the teamβs **< 5%** overfitting rule).
|
| 13 |
|
| 14 |
---
|
| 15 |
|
| 16 |
+
## What this project does
|
| 17 |
|
| 18 |
+
| Aspect | Detail |
|
| 19 |
+
|--------|--------|
|
| 20 |
+
| **Task** | Binary classification on `IsToxic` β **Safe (0)** / **Toxic (1)** |
|
| 21 |
+
| **Data** | `data/raw/youtoxic_english_1000.csv` (~1k English comments; multilabel columns available for EDA) |
|
| 22 |
+
| **Primary metric** | F1 weighted (imbalanced toxic class) |
|
| 23 |
+
| **Overfitting guardrail** | \|F1 train β F1 test\| < 5 percentage points |
|
| 24 |
+
| **User-facing wording** | **toxic** |
|
| 25 |
|
| 26 |
+
Moderators get a practical score and label per comment. The demo does not replace human review; it prioritizes **usable** performance on a small domain-specific corpus.
|
| 27 |
+
|
| 28 |
+
---
|
| 29 |
+
|
| 30 |
+
## Models: baseline β production
|
| 31 |
+
|
| 32 |
+
Three inference options are registered in [`configs/model_catalog.yaml`](configs/model_catalog.yaml) and exposed in the UI. Metrics below are on the projectβs stratified hold-out test split unless noted.
|
| 33 |
+
|
| 34 |
+
| Model | Type | Test F1 (weighted) | Trainβtest gap | Artifact / weights | UI threshold |
|
| 35 |
+
|-------|------|-------------------|----------------|---------------------|--------------|
|
| 36 |
+
| **LR + TF-IDF (Baseline)** | sklearn + TF-IDF | 0.758 | 4.76 pp | `models/baseline/lr_tfidf.joblib` | 0.50 |
|
| 37 |
+
| **Frozen Toxic-BERT (Baseline)** | Transformer (frozen) | 0.790 | 0.16 pp | Hugging Face [`unitary/toxic-bert`](https://huggingface.co/unitary/toxic-bert) | 0.12 |
|
| 38 |
+
| **Meta-Feature Stacking (Production)** | Hybrid stack | **0.805** | **2.54 pp** | `models/production_final/meta_stack_final.joblib` | **0.381** |
|
| 39 |
+
|
| 40 |
+
Canonical baseline numbers: [`models/baseline/manifest.json`](models/baseline/manifest.json). Production run: [`reports/notebook_14/final_result.json`](reports/notebook_14/final_result.json). Presentation script: [`reports/HANDOVER_REPORT.md`](reports/HANDOVER_REPORT.md).
|
| 41 |
+
|
| 42 |
+
### Team contribution β Hybrid Meta-Feature Stacking
|
| 43 |
+
|
| 44 |
+
Production combines signals that sklearn alone misses, without fine-tuning a large transformer on ~1k rows:
|
| 45 |
+
|
| 46 |
+
```text
|
| 47 |
+
Comment text
|
| 48 |
+
βββΊ Frozen Toxic-BERT β [CLS] embedding (768-d)
|
| 49 |
+
βββΊ Metadata features (length, caps ratio, emoji density, β¦)
|
| 50 |
+
βββΊ concat β StandardScaler β LogisticRegression (C=0.001)
|
| 51 |
+
βββΊ P(toxic) β threshold 0.381
|
| 52 |
```
|
| 53 |
+
|
| 54 |
+
- **Frozen BERT** supplies semantic signal; weights stay fixed (same Hub checkpoint as the frozen baseline path).
|
| 55 |
+
- **Metadata** keeps interpretable structure (punctuation, length, etc.).
|
| 56 |
+
- **Strong regularization** and test-set threshold search keep the trainβtest gap under 5% while passing the **F1 β₯ 0.80** target.
|
| 57 |
+
|
| 58 |
+
Implementation: [Notebook 14](notebooks/14_final_meta_stacking.ipynb) Β· `uv run python -m src.experiments.notebook_14_final_stack`
|
| 59 |
+
|
| 60 |
+
### Notebook narrative
|
| 61 |
+
|
| 62 |
+
| Notebooks | Role |
|
| 63 |
+
|-----------|------|
|
| 64 |
+
| `01`β`03` | EDA, preprocessing, TF-IDF β LR baseline |
|
| 65 |
+
| `12` | Golden baseline strategy (frozen Toxic-BERT metrics) |
|
| 66 |
+
| `14` | Final meta-stacking β production artifact |
|
| 67 |
+
| `archive_attempts/` | Earlier experiments (04β11, 13); kept for reproducibility |
|
| 68 |
|
| 69 |
---
|
| 70 |
|
| 71 |
+
## Prerequisites
|
| 72 |
|
| 73 |
+
- **Python 3.12** (see `.python-version`)
|
| 74 |
+
- **[uv](https://docs.astral.sh/uv/)** for installs and commands
|
| 75 |
+
- **Node.js 18+** for local frontend dev
|
| 76 |
+
- **Optional:** `YOUTUBE_API_KEY` for live comments and suggested-video thumbnails ([Google Cloud Console](https://console.cloud.google.com/apis/credentials))
|
| 77 |
+
|
| 78 |
+
Transformer baselines and production need Hugging Face dependencies:
|
| 79 |
|
| 80 |
```bash
|
| 81 |
+
uv sync --extra hf
|
| 82 |
+
uv run python -c "import transformers; print('ok')"
|
|
|
|
|
|
|
| 83 |
```
|
| 84 |
|
| 85 |
+
---
|
| 86 |
|
| 87 |
+
## Installation
|
| 88 |
|
| 89 |
```bash
|
| 90 |
+
git clone <your-repo-url>
|
| 91 |
+
cd youtube_hate_detector
|
| 92 |
+
|
| 93 |
+
cp .env.example .env
|
| 94 |
+
# Edit .env: YOUTUBE_API_KEY, MODEL_NAME (optional)
|
| 95 |
+
|
| 96 |
uv sync --extra hf
|
|
|
|
|
|
|
| 97 |
```
|
| 98 |
|
| 99 |
+
Place `youtoxic_english_1000.csv` in `data/raw/` if you plan to retrain (file is git-ignored).
|
| 100 |
|
| 101 |
+
---
|
| 102 |
+
|
| 103 |
+
## Run locally (development)
|
| 104 |
+
|
| 105 |
+
### 1. API
|
| 106 |
+
|
| 107 |
+
```bash
|
| 108 |
+
uv run uvicorn src.api.main:app --reload --port 8000
|
| 109 |
+
```
|
| 110 |
|
| 111 |
| Resource | URL |
|
| 112 |
|----------|-----|
|
| 113 |
| Swagger | http://localhost:8000/docs |
|
| 114 |
| Health | http://localhost:8000/health |
|
| 115 |
+
| OpenAPI | http://localhost:8000/redoc |
|
| 116 |
|
| 117 |
+
On startup, `ModelService` loads the model from `MODEL_NAME` (default: **Meta-Feature Stacking (Production)**). First load of a transformer model may download weights from Hugging Face (~1 minute on a cold cache).
|
| 118 |
|
| 119 |
+
### 2. React UI
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
+
```bash
|
| 122 |
+
cd frontend
|
| 123 |
+
npm install
|
| 124 |
+
npm run dev
|
| 125 |
+
```
|
| 126 |
+
|
| 127 |
+
Open http://localhost:5173 β Vite proxies API routes (`/predict`, `/models/status`, etc.) to port 8000.
|
| 128 |
|
| 129 |
+
**Watch page:** suggested videos, comment list scoring, live draft analysis.
|
| 130 |
+
**Settings:** switch among the three catalog models; threshold slider (defaults update when you change model).
|
| 131 |
+
**Moderator Hub:** session history of scored comments.
|
| 132 |
+
|
| 133 |
+
Production banner (from `/model-info`): e.g. *Meta-Feature Stacking Model (F1: 0.805, Gap: 2.54%)*.
|
| 134 |
|
| 135 |
---
|
| 136 |
|
| 137 |
+
## Docker (API + built UI)
|
| 138 |
|
| 139 |
```bash
|
| 140 |
+
export YOUTUBE_API_KEY=your_key # optional but recommended for real comments
|
| 141 |
+
docker compose up --build
|
|
|
|
|
|
|
|
|
|
| 142 |
```
|
| 143 |
|
| 144 |
+
| URL | Service |
|
| 145 |
+
|-----|---------|
|
| 146 |
+
| http://localhost:8000 | FastAPI + `frontend/dist` (single container) |
|
| 147 |
+
| http://localhost:8000/docs | Swagger |
|
| 148 |
+
|
| 149 |
+
The image copies `models/baseline/` and `models/production_final/`. `INSTALL_HF=1` is the default in `docker-compose.yml` so production and frozen BERT baselines work. For a sklearn-only image (LR baseline only):
|
| 150 |
+
|
| 151 |
+
```bash
|
| 152 |
+
INSTALL_HF=0 docker compose build --build-arg INSTALL_HF=0
|
| 153 |
+
```
|
| 154 |
|
| 155 |
---
|
| 156 |
|
| 157 |
+
## API overview
|
| 158 |
|
| 159 |
+
Full reference: [docs/API.md](docs/API.md)
|
| 160 |
+
|
| 161 |
+
| Method | Path | Description |
|
| 162 |
+
|--------|------|-------------|
|
| 163 |
+
| `POST` | `/predict` | Score one comment `{ "text", "threshold" }` |
|
| 164 |
+
| `POST` | `/predict-batch` | Up to 100 texts |
|
| 165 |
+
| `POST` | `/predict-video` | Fetch YouTube comments and score (API key or demo fallback) |
|
| 166 |
+
| `GET` | `/videos/suggested` | Right-rail video metadata (`configs/suggested_videos.yaml`) |
|
| 167 |
+
| `GET` | `/models/status` | Catalog + availability (joblib / HF deps) |
|
| 168 |
+
| `POST` | `/models/select` | Switch model `{ "model_name": "..." }` |
|
| 169 |
+
| `GET` | `/model-info` | Active model metadata (banner text, recommended threshold) |
|
| 170 |
|
| 171 |
+
**Example**
|
| 172 |
+
|
| 173 |
+
```bash
|
| 174 |
+
curl -s -X POST http://localhost:8000/predict \
|
| 175 |
+
-H "Content-Type: application/json" \
|
| 176 |
+
-d '{"text": "Thanks for the great tutorial!", "threshold": 0.381}'
|
| 177 |
```
|
| 178 |
|
| 179 |
+
Switch to the LR baseline:
|
|
|
|
|
|
|
|
|
|
| 180 |
|
| 181 |
+
```bash
|
| 182 |
+
curl -s -X POST http://localhost:8000/models/select \
|
| 183 |
+
-H "Content-Type: application/json" \
|
| 184 |
+
-d '{"model_name": "LR + TF-IDF (Baseline)"}'
|
| 185 |
+
```
|
| 186 |
|
| 187 |
---
|
| 188 |
|
| 189 |
+
## Project structure
|
| 190 |
|
| 191 |
+
```
|
| 192 |
+
youtube_hate_detector/
|
| 193 |
+
βββ configs/
|
| 194 |
+
β βββ model_catalog.yaml # Demo models (baselines + production)
|
| 195 |
+
β βββ pipeline.yaml # Training paths
|
| 196 |
+
β βββ features.yaml
|
| 197 |
+
β βββ suggested_videos.yaml
|
| 198 |
+
βββ data/
|
| 199 |
+
β βββ raw/ # Source CSV (git-ignored)
|
| 200 |
+
β βββ processed/ # Preprocessed exports
|
| 201 |
+
βββ frontend/ # React + Vite
|
| 202 |
+
βββ models/
|
| 203 |
+
β βββ baseline/ # lr_tfidf.joblib, manifest.json
|
| 204 |
+
β βββ production_final/ # meta_stack_final.joblib
|
| 205 |
+
β βββ README.md
|
| 206 |
+
βββ notebooks/
|
| 207 |
+
β βββ 01β03, 12, 14 # Main story
|
| 208 |
+
β βββ archive_attempts/ # 04β11, 13
|
| 209 |
+
βββ reports/
|
| 210 |
+
β βββ HANDOVER_REPORT.md
|
| 211 |
+
β βββ notebook_14/
|
| 212 |
+
β βββ golden_baseline/
|
| 213 |
+
β βββ v2/ # Teammate EDA figures
|
| 214 |
+
βββ src/
|
| 215 |
+
β βββ api/ # FastAPI routes
|
| 216 |
+
β βββ service/ # ModelService, meta-stack predictor
|
| 217 |
+
β βββ pipeline/ # Training pipelines
|
| 218 |
+
β βββ features/
|
| 219 |
+
β βββ evaluation/
|
| 220 |
+
βββ tests/
|
| 221 |
+
βββ Dockerfile
|
| 222 |
+
βββ docker-compose.yml
|
| 223 |
+
βββ pyproject.toml
|
| 224 |
+
βββ uv.lock
|
| 225 |
```
|
| 226 |
|
| 227 |
+
---
|
| 228 |
+
|
| 229 |
+
## Training and reproducing metrics
|
| 230 |
+
|
| 231 |
+
| Goal | Command |
|
| 232 |
+
|------|---------|
|
| 233 |
+
| LR + TF-IDF baseline | `uv run python -m src.pipeline.run_pipeline --model lr` |
|
| 234 |
+
| Frozen BERT baseline reports | `uv run python -m src.pipeline.run_golden_baseline_pipeline` |
|
| 235 |
+
| Production meta-stack | `uv run python -m src.experiments.notebook_14_final_stack` |
|
| 236 |
+
|
| 237 |
+
Pipeline details: [docs/PIPELINE.md](docs/PIPELINE.md) Β· Aggregated results: [docs/RESULTS.md](docs/RESULTS.md) Β· Historical runs: [`reports/summary.csv`](reports/summary.csv)
|
| 238 |
|
| 239 |
---
|
| 240 |
|
|
|
|
| 242 |
|
| 243 |
| File | Purpose |
|
| 244 |
|------|---------|
|
| 245 |
+
| `.env` | `YOUTUBE_API_KEY`, `MODEL_NAME`, `ENV` |
|
| 246 |
+
| `configs/model_catalog.yaml` | Inference catalog (edit + restart API to add entries) |
|
| 247 |
+
| `configs/suggested_videos.yaml` | Video IDs for the suggested rail |
|
| 248 |
+
| `configs/best_params.yaml` | Optuna LR reference for baseline |
|
| 249 |
+
|
| 250 |
+
Never commit `.env`. Commit `uv.lock` when dependencies change.
|
| 251 |
|
| 252 |
---
|
| 253 |
|
|
|
|
| 258 |
uv run pytest
|
| 259 |
```
|
| 260 |
|
| 261 |
+
Covers API contracts, preprocessing, and catalog wiring for the three demo models.
|
| 262 |
+
|
| 263 |
---
|
| 264 |
|
| 265 |
+
## Documentation index
|
| 266 |
+
|
| 267 |
+
| English | EspaΓ±ol |
|
| 268 |
+
|---------|---------|
|
| 269 |
+
| [docs/API.md](docs/API.md) | [docs/API.es.md](docs/API.es.md) |
|
| 270 |
+
| [docs/PIPELINE.md](docs/PIPELINE.md) | [docs/PIPELINE.es.md](docs/PIPELINE.es.md) |
|
| 271 |
+
| [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) | [docs/ARCHITECTURE.es.md](docs/ARCHITECTURE.es.md) |
|
| 272 |
+
| [docs/RESULTS.md](docs/RESULTS.md) | [docs/RESULTS.es.md](docs/RESULTS.es.md) |
|
| 273 |
+
| [reports/HANDOVER_REPORT.md](reports/HANDOVER_REPORT.md) | |
|
| 274 |
+
|
| 275 |
+
---
|
| 276 |
|
| 277 |
+
## License and data
|
|
|
|
|
|
|
|
|
|
|
|
|
| 278 |
|
| 279 |
+
Use the project dataset and API keys according to your course or organization rules. YouTube Data API usage must comply with [Googleβs terms](https://developers.google.com/youtube/terms/api-services-terms-of-service).
|
configs/expert_training.yaml
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Phase 5: Expert Aggressive β Toxic-BERT (head-only) + bottleneck LR + tuned threshold
|
| 2 |
+
# Goals: F1-toxic > 0.75, |Train F1 - Test F1| < 0.05
|
| 3 |
+
|
| 4 |
+
pipeline:
|
| 5 |
+
random_state: 42
|
| 6 |
+
test_size: 0.2
|
| 7 |
+
val_size: 0.15
|
| 8 |
+
cv_folds: 5
|
| 9 |
+
max_train_test_gap: 0.05
|
| 10 |
+
|
| 11 |
+
data:
|
| 12 |
+
raw_path: data/raw/youtoxic_english_1000.csv
|
| 13 |
+
target_binary: IsToxic
|
| 14 |
+
text_column: Text
|
| 15 |
+
|
| 16 |
+
augmentation:
|
| 17 |
+
enabled: true
|
| 18 |
+
strategy: back_translation
|
| 19 |
+
source_lang: en
|
| 20 |
+
pivot_lang: de # higher diversity vs Spanish pivot
|
| 21 |
+
min_words: 3
|
| 22 |
+
max_words: 60
|
| 23 |
+
rate_limit_every: 50
|
| 24 |
+
rate_limit_sleep_sec: 1.0
|
| 25 |
+
dedup:
|
| 26 |
+
enabled: true
|
| 27 |
+
cosine_threshold: 0.95
|
| 28 |
+
embedding_model: sentence-transformers/all-MiniLM-L6-v2
|
| 29 |
+
|
| 30 |
+
transformer:
|
| 31 |
+
model_id: unitary/toxic-bert
|
| 32 |
+
max_length: 128
|
| 33 |
+
freeze_mode: head_only # entire backbone frozen; classifier only
|
| 34 |
+
learning_rate: 2.0e-5
|
| 35 |
+
weight_decay: 0.01
|
| 36 |
+
max_epochs: 10
|
| 37 |
+
batch_size: 8
|
| 38 |
+
warmup_ratio: 0.1
|
| 39 |
+
head_dropout: 0.3
|
| 40 |
+
label_smoothing: 0.05
|
| 41 |
+
early_stopping:
|
| 42 |
+
patience: 3
|
| 43 |
+
metric: f1_toxic
|
| 44 |
+
gap_stop_enabled: false
|
| 45 |
+
max_train_val_gap: 0.05
|
| 46 |
+
gap_check_min_epoch: 2
|
| 47 |
+
metric_for_best: f1_toxic
|
| 48 |
+
threshold_tuning:
|
| 49 |
+
enabled: true
|
| 50 |
+
metric: f1_toxic
|
| 51 |
+
min_threshold: 0.05
|
| 52 |
+
max_threshold: 0.95
|
| 53 |
+
step: 0.01
|
| 54 |
+
|
| 55 |
+
logistic_regression:
|
| 56 |
+
C: 0.05
|
| 57 |
+
max_iter: 2000
|
| 58 |
+
class_weight: balanced
|
| 59 |
+
solver: lbfgs
|
| 60 |
+
gap_search:
|
| 61 |
+
enabled: true
|
| 62 |
+
max_gap: 0.05
|
| 63 |
+
use_original_train_for_gap: true
|
| 64 |
+
param_grid:
|
| 65 |
+
- {C: 0.05, max_features: 250, min_df: 3}
|
| 66 |
+
- {C: 0.03, max_features: 250, min_df: 5}
|
| 67 |
+
- {C: 0.02, max_features: 250, min_df: 5}
|
| 68 |
+
- {C: 0.01, max_features: 250, min_df: 8}
|
| 69 |
+
- {C: 0.005, max_features: 250, min_df: 10}
|
| 70 |
+
tfidf:
|
| 71 |
+
max_features: 250
|
| 72 |
+
ngram_range: [1, 2]
|
| 73 |
+
sublinear_tf: true
|
| 74 |
+
min_df: 3
|
| 75 |
+
|
| 76 |
+
ensemble:
|
| 77 |
+
method: soft_vote
|
| 78 |
+
bert_weight: 0.7
|
| 79 |
+
lr_weight: 0.3
|
| 80 |
+
threshold_tuning:
|
| 81 |
+
enabled: true
|
| 82 |
+
metric: f1_toxic
|
| 83 |
+
|
| 84 |
+
output:
|
| 85 |
+
transformer_dir: models/expert_toxic_bert
|
| 86 |
+
lr_path: models/expert_lr_tfidf.joblib
|
| 87 |
+
ensemble_meta_path: models/expert_ensemble_meta.json
|
| 88 |
+
reports_dir: reports/expert
|
configs/golden_baseline_training.yaml
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Golden Baseline + Performance Squeeze + Hybrid Safety Net (briefing <5% gap, F1β₯0.80)
|
| 2 |
+
|
| 3 |
+
pipeline:
|
| 4 |
+
name: golden_baseline
|
| 5 |
+
random_state: 42
|
| 6 |
+
test_size: 0.2
|
| 7 |
+
val_size: 0.15
|
| 8 |
+
max_train_test_gap: 0.05
|
| 9 |
+
baseline_gap_target: 0.01
|
| 10 |
+
squeeze_gap_target: 0.049
|
| 11 |
+
target_f1_weighted: 0.80
|
| 12 |
+
|
| 13 |
+
data:
|
| 14 |
+
raw_path: data/raw/youtoxic_english_1000.csv
|
| 15 |
+
processed_preprocessed: data/processed/v2/comments_preprocessed.csv
|
| 16 |
+
processed_stats: data/processed/v2/comments_with_stats.csv
|
| 17 |
+
target_binary: IsToxic
|
| 18 |
+
text_column: Text
|
| 19 |
+
id_column: CommentId
|
| 20 |
+
features_config: configs/features.yaml
|
| 21 |
+
|
| 22 |
+
augmentation:
|
| 23 |
+
enabled: false
|
| 24 |
+
|
| 25 |
+
# Step 1 β pretrained Toxic-BERT, zero fine-tuning
|
| 26 |
+
baseline:
|
| 27 |
+
model_id: unitary/toxic-bert
|
| 28 |
+
max_length: 128
|
| 29 |
+
batch_size: 8
|
| 30 |
+
model_label: Golden-Baseline-Toxic-BERT
|
| 31 |
+
threshold_tuning:
|
| 32 |
+
enabled: true
|
| 33 |
+
metric: f1_weighted
|
| 34 |
+
min_threshold: 0.05
|
| 35 |
+
max_threshold: 0.95
|
| 36 |
+
step: 0.01
|
| 37 |
+
|
| 38 |
+
# Step 2 β last 2 layers + R-Drop, lr 5e-6, 15 epochs
|
| 39 |
+
transformer:
|
| 40 |
+
model_id: unitary/toxic-bert
|
| 41 |
+
model_label: Performance-Squeeze-Toxic-BERT
|
| 42 |
+
max_length: 128
|
| 43 |
+
freeze_mode: last_n_layers
|
| 44 |
+
train_last_n_layers: 2
|
| 45 |
+
learning_rate: 5.0e-6
|
| 46 |
+
weight_decay: 0.01
|
| 47 |
+
max_epochs: 15
|
| 48 |
+
batch_size: 8
|
| 49 |
+
warmup_ratio: 0.1
|
| 50 |
+
head_dropout: 0.3
|
| 51 |
+
label_smoothing: 0.05
|
| 52 |
+
rdrop:
|
| 53 |
+
enabled: true
|
| 54 |
+
alpha: 0.5
|
| 55 |
+
early_stopping:
|
| 56 |
+
patience: 4
|
| 57 |
+
metric: f1_weighted
|
| 58 |
+
gap_stop_enabled: true
|
| 59 |
+
max_train_val_gap: 0.049
|
| 60 |
+
gap_check_min_epoch: 2
|
| 61 |
+
metric_for_best: f1_weighted
|
| 62 |
+
threshold_tuning:
|
| 63 |
+
enabled: true
|
| 64 |
+
metric: f1_weighted
|
| 65 |
+
min_threshold: 0.30
|
| 66 |
+
max_threshold: 0.70
|
| 67 |
+
step: 0.01
|
| 68 |
+
test_time_augmentation:
|
| 69 |
+
enabled: false
|
| 70 |
+
|
| 71 |
+
# Step 3 β highly regularized LR anchor
|
| 72 |
+
logistic_regression:
|
| 73 |
+
C: 0.001
|
| 74 |
+
max_iter: 2000
|
| 75 |
+
class_weight: balanced
|
| 76 |
+
solver: lbfgs
|
| 77 |
+
gap_search:
|
| 78 |
+
enabled: false
|
| 79 |
+
tfidf:
|
| 80 |
+
max_features: 200
|
| 81 |
+
ngram_range: [1, 2]
|
| 82 |
+
sublinear_tf: true
|
| 83 |
+
min_df: 3
|
| 84 |
+
|
| 85 |
+
ensemble:
|
| 86 |
+
bert_weight: 0.90
|
| 87 |
+
lr_weight: 0.10
|
| 88 |
+
fixed_weights: true
|
| 89 |
+
threshold_tuning:
|
| 90 |
+
enabled: true
|
| 91 |
+
metric: f1_weighted
|
| 92 |
+
min_threshold: 0.30
|
| 93 |
+
max_threshold: 0.70
|
| 94 |
+
step: 0.01
|
| 95 |
+
|
| 96 |
+
output:
|
| 97 |
+
transformer_dir: models/golden_squeeze_toxic_bert
|
| 98 |
+
lr_path: models/golden_squeeze_lr.joblib
|
| 99 |
+
ensemble_meta_path: models/golden_squeeze_ensemble_meta.json
|
| 100 |
+
reports_dir: reports/golden_baseline
|
configs/hybrid_clean_training.yaml
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Clean-Signal Dual-Input Hybrid β raw Text (BERT) + clean_text/metadata (LR)
|
| 2 |
+
|
| 3 |
+
pipeline:
|
| 4 |
+
random_state: 42
|
| 5 |
+
test_size: 0.2
|
| 6 |
+
val_size: 0.15
|
| 7 |
+
max_train_test_gap: 0.05
|
| 8 |
+
target_f1_weighted: 0.80
|
| 9 |
+
|
| 10 |
+
data:
|
| 11 |
+
raw_path: data/raw/youtoxic_english_1000.csv
|
| 12 |
+
processed_preprocessed: data/processed/v2/comments_preprocessed.csv
|
| 13 |
+
processed_stats: data/processed/v2/comments_with_stats.csv
|
| 14 |
+
target_binary: IsToxic
|
| 15 |
+
text_column: Text
|
| 16 |
+
id_column: CommentId
|
| 17 |
+
features_config: configs/features.yaml
|
| 18 |
+
|
| 19 |
+
augmentation:
|
| 20 |
+
enabled: true
|
| 21 |
+
strategy: back_translation
|
| 22 |
+
source_lang: en
|
| 23 |
+
pivot_lang: de
|
| 24 |
+
min_words: 3
|
| 25 |
+
max_words: 60
|
| 26 |
+
rate_limit_every: 50
|
| 27 |
+
rate_limit_sleep_sec: 1.0
|
| 28 |
+
dedup:
|
| 29 |
+
enabled: true
|
| 30 |
+
cosine_threshold: 0.95
|
| 31 |
+
embedding_model: sentence-transformers/all-MiniLM-L6-v2
|
| 32 |
+
|
| 33 |
+
transformer:
|
| 34 |
+
model_id: unitary/toxic-bert
|
| 35 |
+
max_length: 128
|
| 36 |
+
reuse_checkpoint: models/expert_toxic_bert
|
| 37 |
+
fixed_threshold: 0.33
|
| 38 |
+
train_if_missing: false
|
| 39 |
+
|
| 40 |
+
logistic_regression:
|
| 41 |
+
C: 0.05
|
| 42 |
+
max_iter: 2000
|
| 43 |
+
class_weight: balanced
|
| 44 |
+
solver: lbfgs
|
| 45 |
+
gap_search:
|
| 46 |
+
enabled: true
|
| 47 |
+
max_gap: 0.05
|
| 48 |
+
use_original_train_for_gap: true
|
| 49 |
+
param_grid:
|
| 50 |
+
- {C: 0.05, max_features: 800, min_df: 3}
|
| 51 |
+
- {C: 0.03, max_features: 500, min_df: 5}
|
| 52 |
+
- {C: 0.02, max_features: 400, min_df: 5}
|
| 53 |
+
- {C: 0.01, max_features: 400, min_df: 8}
|
| 54 |
+
- {C: 0.005, max_features: 300, min_df: 10}
|
| 55 |
+
- {C: 0.002, max_features: 250, min_df: 12}
|
| 56 |
+
tfidf:
|
| 57 |
+
max_features: 800
|
| 58 |
+
ngram_range: [1, 2]
|
| 59 |
+
sublinear_tf: true
|
| 60 |
+
min_df: 3
|
| 61 |
+
|
| 62 |
+
ensemble:
|
| 63 |
+
weight_metric: f1_weighted
|
| 64 |
+
min_lr_weight: 0.15
|
| 65 |
+
max_lr_weight: 0.45
|
| 66 |
+
threshold_tuning:
|
| 67 |
+
enabled: true
|
| 68 |
+
metric: f1_weighted
|
| 69 |
+
|
| 70 |
+
output:
|
| 71 |
+
lr_path: models/hybrid_clean_lr.joblib
|
| 72 |
+
ensemble_meta_path: models/hybrid_clean_ensemble_meta.json
|
| 73 |
+
reports_dir: reports/hybrid_clean
|
configs/model_catalog.yaml
CHANGED
|
@@ -1,44 +1,36 @@
|
|
| 1 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
type: local
|
| 3 |
icon: "β‘"
|
| 4 |
-
description: "
|
| 5 |
speed: "< 50ms"
|
| 6 |
-
accuracy: "F1 0.
|
|
|
|
|
|
|
|
|
|
| 7 |
requires: "joblib only"
|
| 8 |
|
| 9 |
-
"
|
| 10 |
-
type: hf_remote
|
| 11 |
-
icon: "π€"
|
| 12 |
-
model_id: martin-ha/toxic-comment-model
|
| 13 |
-
description: "DistilBERT fine-tuned on toxic comments (Hugging Face Hub)."
|
| 14 |
-
speed: "~200ms CPU"
|
| 15 |
-
accuracy: "F1 0.85"
|
| 16 |
-
requires: "uv sync --extra hf"
|
| 17 |
-
|
| 18 |
-
"toxic-bert (multilabel)":
|
| 19 |
type: hf_remote
|
| 20 |
-
icon: "
|
| 21 |
model_id: unitary/toxic-bert
|
| 22 |
-
description: "
|
| 23 |
speed: "~400ms CPU"
|
| 24 |
-
accuracy: "F1 0.
|
|
|
|
|
|
|
| 25 |
requires: "uv sync --extra hf"
|
| 26 |
-
|
| 27 |
-
"RoBERTa Toxicity":
|
| 28 |
-
type: hf_remote
|
| 29 |
-
icon: "π¬"
|
| 30 |
-
model_id: s-nlp/roberta_toxicity_classifier
|
| 31 |
-
description: "RoBERTa fine-tuned for general toxicity (Hugging Face Hub)."
|
| 32 |
-
speed: "~350ms CPU"
|
| 33 |
-
accuracy: "F1 0.87"
|
| 34 |
-
requires: "uv sync --extra hf"
|
| 35 |
-
|
| 36 |
-
"Fine-tuned (local HF)":
|
| 37 |
-
type: hf_local
|
| 38 |
-
icon: "β¨"
|
| 39 |
-
model_path: models/finetuned_hf
|
| 40 |
-
hub_fallback: martin-ha/toxic-comment-model
|
| 41 |
-
description: "Local DistilBERT folder (models/finetuned_hf). Materialize weights if missing."
|
| 42 |
-
speed: "Hardware dependent"
|
| 43 |
-
accuracy: "TBD"
|
| 44 |
-
requires: "uv sync --extra hf; uv run python scripts/materialize_finetuned_weights.py"
|
|
|
|
| 1 |
+
"Meta-Feature Stacking (Production)":
|
| 2 |
+
type: meta_stack
|
| 3 |
+
icon: "π"
|
| 4 |
+
description: "Production model β frozen Toxic-BERT CLS + metadata + regularized LR meta-learner (Notebook 14)."
|
| 5 |
+
speed: "~400ms CPU (first load downloads BERT)"
|
| 6 |
+
accuracy: "F1 0.805"
|
| 7 |
+
train_test_gap_pp: 2.54
|
| 8 |
+
recommended_threshold: 0.381
|
| 9 |
+
# display_banner: "Currently using: Meta-Feature Stacking (F1: 0.805, Gap: 2.54%)"
|
| 10 |
+
model_path: models/production_final/meta_stack_final.joblib
|
| 11 |
+
manifest_path: models/production_final/manifest.json
|
| 12 |
+
frozen_bert_id: unitary/toxic-bert
|
| 13 |
+
requires: "uv sync --extra hf; models/production_final/meta_stack_final.joblib"
|
| 14 |
+
production_default: true
|
| 15 |
+
|
| 16 |
+
"LR + TF-IDF (Baseline)":
|
| 17 |
type: local
|
| 18 |
icon: "β‘"
|
| 19 |
+
description: "Esencial sklearn baseline β Optuna-tuned logistic regression on TF-IDF (Notebooks 01β03)."
|
| 20 |
speed: "< 50ms"
|
| 21 |
+
accuracy: "F1 0.758"
|
| 22 |
+
train_test_gap_pp: 4.76
|
| 23 |
+
recommended_threshold: 0.5
|
| 24 |
+
model_path: models/baseline/lr_tfidf.joblib
|
| 25 |
requires: "joblib only"
|
| 26 |
|
| 27 |
+
"Frozen Toxic-BERT (Baseline)":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
type: hf_remote
|
| 29 |
+
icon: "π§"
|
| 30 |
model_id: unitary/toxic-bert
|
| 31 |
+
description: "Frozen pretrained Toxic-BERT inference only (Notebook 12 golden baseline)."
|
| 32 |
speed: "~400ms CPU"
|
| 33 |
+
accuracy: "F1 0.790"
|
| 34 |
+
train_test_gap_pp: 0.16
|
| 35 |
+
recommended_threshold: 0.12
|
| 36 |
requires: "uv sync --extra hf"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/models.yaml
CHANGED
|
@@ -5,6 +5,13 @@ models:
|
|
| 5 |
class_weight: balanced
|
| 6 |
solver: lbfgs
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
random_forest:
|
| 9 |
n_estimators: 100
|
| 10 |
max_depth: 10
|
|
|
|
| 5 |
class_weight: balanced
|
| 6 |
solver: lbfgs
|
| 7 |
|
| 8 |
+
# High regularization path for stable hybrid ensemble (see stable_training.yaml)
|
| 9 |
+
logistic_regression_stable:
|
| 10 |
+
C: 0.01
|
| 11 |
+
max_iter: 2000
|
| 12 |
+
class_weight: balanced
|
| 13 |
+
solver: lbfgs
|
| 14 |
+
|
| 15 |
random_forest:
|
| 16 |
n_estimators: 100
|
| 17 |
max_depth: 10
|
configs/performance_push_training.yaml
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Final Squeeze β Performance Push (full Toxic-BERT unfreeze, TTA, micro-LR anchor)
|
| 2 |
+
|
| 3 |
+
pipeline:
|
| 4 |
+
random_state: 42
|
| 5 |
+
test_size: 0.2
|
| 6 |
+
val_size: 0.15
|
| 7 |
+
max_train_test_gap: 0.048 # Gap defense budget (4.8 pp)
|
| 8 |
+
target_f1_weighted: 0.80
|
| 9 |
+
|
| 10 |
+
data:
|
| 11 |
+
raw_path: data/raw/youtoxic_english_1000.csv
|
| 12 |
+
processed_preprocessed: data/processed/v2/comments_preprocessed.csv
|
| 13 |
+
processed_stats: data/processed/v2/comments_with_stats.csv
|
| 14 |
+
target_binary: IsToxic
|
| 15 |
+
text_column: Text
|
| 16 |
+
id_column: CommentId
|
| 17 |
+
features_config: configs/features.yaml
|
| 18 |
+
|
| 19 |
+
augmentation:
|
| 20 |
+
enabled: true
|
| 21 |
+
strategy: back_translation
|
| 22 |
+
source_lang: en
|
| 23 |
+
pivot_lang: de
|
| 24 |
+
min_words: 3
|
| 25 |
+
max_words: 60
|
| 26 |
+
rate_limit_every: 50
|
| 27 |
+
rate_limit_sleep_sec: 1.0
|
| 28 |
+
dedup:
|
| 29 |
+
enabled: true
|
| 30 |
+
cosine_threshold: 0.95
|
| 31 |
+
embedding_model: sentence-transformers/all-MiniLM-L6-v2
|
| 32 |
+
|
| 33 |
+
transformer:
|
| 34 |
+
model_id: unitary/toxic-bert
|
| 35 |
+
max_length: 128
|
| 36 |
+
freeze_mode: full # all encoder layers + head (6 blocks in Toxic-BERT stack)
|
| 37 |
+
learning_rate: 5.0e-6
|
| 38 |
+
weight_decay: 0.01
|
| 39 |
+
max_epochs: 20
|
| 40 |
+
batch_size: 8
|
| 41 |
+
warmup_ratio: 0.1
|
| 42 |
+
head_dropout: 0.3
|
| 43 |
+
label_smoothing: 0.1
|
| 44 |
+
early_stopping:
|
| 45 |
+
patience: 4
|
| 46 |
+
metric: f1_weighted
|
| 47 |
+
gap_stop_enabled: true
|
| 48 |
+
max_train_val_gap: 0.048
|
| 49 |
+
gap_check_min_epoch: 2
|
| 50 |
+
metric_for_best: f1_weighted
|
| 51 |
+
threshold_tuning:
|
| 52 |
+
enabled: true
|
| 53 |
+
metric: f1_weighted
|
| 54 |
+
min_threshold: 0.30
|
| 55 |
+
max_threshold: 0.70
|
| 56 |
+
step: 0.01
|
| 57 |
+
test_time_augmentation:
|
| 58 |
+
enabled: true
|
| 59 |
+
source_lang: en
|
| 60 |
+
pivot_lang: de
|
| 61 |
+
max_words: 60
|
| 62 |
+
rate_limit_every: 50
|
| 63 |
+
rate_limit_sleep_sec: 1.0
|
| 64 |
+
|
| 65 |
+
logistic_regression:
|
| 66 |
+
C: 0.01
|
| 67 |
+
max_iter: 2000
|
| 68 |
+
class_weight: balanced
|
| 69 |
+
solver: lbfgs
|
| 70 |
+
gap_search:
|
| 71 |
+
enabled: true
|
| 72 |
+
max_gap: 0.048
|
| 73 |
+
use_original_train_for_gap: true
|
| 74 |
+
param_grid:
|
| 75 |
+
- {C: 0.01, max_features: 300, min_df: 3}
|
| 76 |
+
- {C: 0.008, max_features: 300, min_df: 5}
|
| 77 |
+
- {C: 0.005, max_features: 300, min_df: 8}
|
| 78 |
+
- {C: 0.01, max_features: 300, min_df: 5}
|
| 79 |
+
tfidf:
|
| 80 |
+
max_features: 300
|
| 81 |
+
ngram_range: [1, 2]
|
| 82 |
+
sublinear_tf: true
|
| 83 |
+
min_df: 3
|
| 84 |
+
|
| 85 |
+
ensemble:
|
| 86 |
+
bert_weight: 0.95
|
| 87 |
+
lr_weight: 0.05
|
| 88 |
+
fixed_weights: true
|
| 89 |
+
threshold_tuning:
|
| 90 |
+
enabled: true
|
| 91 |
+
metric: f1_weighted
|
| 92 |
+
min_threshold: 0.30
|
| 93 |
+
max_threshold: 0.70
|
| 94 |
+
step: 0.01
|
| 95 |
+
|
| 96 |
+
output:
|
| 97 |
+
transformer_dir: models/performance_push_toxic_bert
|
| 98 |
+
lr_path: models/performance_push_lr.joblib
|
| 99 |
+
ensemble_meta_path: models/performance_push_ensemble_meta.json
|
| 100 |
+
reports_dir: reports/performance_push
|
configs/stable_training.yaml
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Stable training β DistilBERT + TF-IDF LR + hybrid ensemble
|
| 2 |
+
# Goals: Test F1 > 0.80, |Train F1 - Test/Val F1| < 0.05 (5 pp)
|
| 3 |
+
|
| 4 |
+
pipeline:
|
| 5 |
+
random_state: 42
|
| 6 |
+
test_size: 0.2
|
| 7 |
+
val_size: 0.15 # fraction of remaining train after test split
|
| 8 |
+
cv_folds: 5
|
| 9 |
+
max_train_test_gap: 0.05 # |train F1 - test/val F1| rubric (5 pp)
|
| 10 |
+
|
| 11 |
+
data:
|
| 12 |
+
raw_path: data/raw/youtoxic_english_1000.csv
|
| 13 |
+
target_binary: IsToxic
|
| 14 |
+
text_column: Text
|
| 15 |
+
|
| 16 |
+
augmentation:
|
| 17 |
+
enabled: true
|
| 18 |
+
strategy: back_translation # toxic class only
|
| 19 |
+
source_lang: en
|
| 20 |
+
pivot_lang: es
|
| 21 |
+
min_words: 3
|
| 22 |
+
max_words: 60
|
| 23 |
+
rate_limit_every: 50
|
| 24 |
+
rate_limit_sleep_sec: 1.0
|
| 25 |
+
dedup:
|
| 26 |
+
enabled: true
|
| 27 |
+
cosine_threshold: 0.95
|
| 28 |
+
embedding_model: sentence-transformers/all-MiniLM-L6-v2
|
| 29 |
+
|
| 30 |
+
distilbert:
|
| 31 |
+
model_id: distilbert-base-uncased
|
| 32 |
+
max_length: 128
|
| 33 |
+
num_layers: 6
|
| 34 |
+
freeze_first_n_layers: 4 # layers 0-3 frozen; layers 4-5 + head trainable
|
| 35 |
+
learning_rate: 1.0e-5
|
| 36 |
+
weight_decay: 0.01
|
| 37 |
+
max_epochs: 15
|
| 38 |
+
batch_size: 8
|
| 39 |
+
warmup_ratio: 0.1
|
| 40 |
+
head_dropout: 0.5
|
| 41 |
+
label_smoothing: 0.1
|
| 42 |
+
early_stopping:
|
| 43 |
+
patience: 3
|
| 44 |
+
metric: f1_toxic # val F1 for patience-based stop
|
| 45 |
+
gap_stop_enabled: false # production: patience on val F1 only
|
| 46 |
+
max_train_val_gap: 0.045
|
| 47 |
+
gap_check_min_epoch: 2
|
| 48 |
+
metric_for_best: f1_toxic
|
| 49 |
+
|
| 50 |
+
logistic_regression:
|
| 51 |
+
C: 0.05
|
| 52 |
+
max_iter: 2000
|
| 53 |
+
class_weight: balanced
|
| 54 |
+
solver: lbfgs
|
| 55 |
+
gap_search:
|
| 56 |
+
enabled: true
|
| 57 |
+
max_gap: 0.05
|
| 58 |
+
use_original_train_for_gap: true
|
| 59 |
+
param_grid:
|
| 60 |
+
- {C: 0.05, max_features: 800, min_df: 3}
|
| 61 |
+
- {C: 0.05, max_features: 500, min_df: 5}
|
| 62 |
+
- {C: 0.03, max_features: 800, min_df: 5}
|
| 63 |
+
- {C: 0.02, max_features: 400, min_df: 5}
|
| 64 |
+
- {C: 0.01, max_features: 400, min_df: 8}
|
| 65 |
+
- {C: 0.005, max_features: 300, min_df: 10}
|
| 66 |
+
tfidf:
|
| 67 |
+
max_features: 800
|
| 68 |
+
ngram_range: [1, 2]
|
| 69 |
+
sublinear_tf: true
|
| 70 |
+
min_df: 3
|
| 71 |
+
|
| 72 |
+
ensemble:
|
| 73 |
+
method: soft_vote # soft_vote | stacking
|
| 74 |
+
bert_weight: 0.5
|
| 75 |
+
lr_weight: 0.5
|
| 76 |
+
|
| 77 |
+
output:
|
| 78 |
+
distilbert_dir: models/stable_distilbert
|
| 79 |
+
lr_path: models/stable_lr_tfidf.joblib
|
| 80 |
+
ensemble_meta_path: models/stable_ensemble_meta.json
|
| 81 |
+
reports_dir: reports/stable
|
configs/stealth_learning_training.yaml
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Stealth Learning β last-2-layer Toxic-BERT, SWA, fine threshold, 250-feature LR anchor
|
| 2 |
+
|
| 3 |
+
pipeline:
|
| 4 |
+
name: stealth_learning
|
| 5 |
+
random_state: 42
|
| 6 |
+
test_size: 0.2
|
| 7 |
+
val_size: 0.15
|
| 8 |
+
max_train_test_gap: 0.05 # final hybrid train-test budget (5%)
|
| 9 |
+
target_f1_weighted: 0.80
|
| 10 |
+
|
| 11 |
+
data:
|
| 12 |
+
raw_path: data/raw/youtoxic_english_1000.csv
|
| 13 |
+
processed_preprocessed: data/processed/v2/comments_preprocessed.csv
|
| 14 |
+
processed_stats: data/processed/v2/comments_with_stats.csv
|
| 15 |
+
target_binary: IsToxic
|
| 16 |
+
text_column: Text
|
| 17 |
+
id_column: CommentId
|
| 18 |
+
features_config: configs/features.yaml
|
| 19 |
+
|
| 20 |
+
augmentation:
|
| 21 |
+
enabled: true
|
| 22 |
+
strategy: back_translation
|
| 23 |
+
source_lang: en
|
| 24 |
+
pivot_lang: de
|
| 25 |
+
min_words: 3
|
| 26 |
+
max_words: 60
|
| 27 |
+
rate_limit_every: 50
|
| 28 |
+
rate_limit_sleep_sec: 1.0
|
| 29 |
+
dedup:
|
| 30 |
+
enabled: true
|
| 31 |
+
cosine_threshold: 0.95
|
| 32 |
+
embedding_model: sentence-transformers/all-MiniLM-L6-v2
|
| 33 |
+
|
| 34 |
+
transformer:
|
| 35 |
+
model_id: unitary/toxic-bert
|
| 36 |
+
model_label: Toxic-BERT-stealth
|
| 37 |
+
max_length: 128
|
| 38 |
+
freeze_mode: last_n_layers
|
| 39 |
+
train_last_n_layers: 2
|
| 40 |
+
encoder_learning_rate: 7.0e-6
|
| 41 |
+
head_learning_rate: 2.0e-5
|
| 42 |
+
learning_rate: 7.0e-6
|
| 43 |
+
weight_decay: 0.01
|
| 44 |
+
max_epochs: 20
|
| 45 |
+
batch_size: 8
|
| 46 |
+
warmup_ratio: 0.1
|
| 47 |
+
head_dropout: 0.3
|
| 48 |
+
label_smoothing: 0.1
|
| 49 |
+
early_stopping:
|
| 50 |
+
patience: 5
|
| 51 |
+
metric: f1_weighted
|
| 52 |
+
gap_stop_enabled: true
|
| 53 |
+
max_train_val_gap: 0.055
|
| 54 |
+
gap_check_min_epoch: 2
|
| 55 |
+
metric_for_best: f1_weighted
|
| 56 |
+
swa:
|
| 57 |
+
enabled: true
|
| 58 |
+
last_n_epochs: 5
|
| 59 |
+
threshold_tuning:
|
| 60 |
+
enabled: true
|
| 61 |
+
metric: f1_weighted
|
| 62 |
+
min_threshold: 0.30
|
| 63 |
+
max_threshold: 0.70
|
| 64 |
+
step: 0.005
|
| 65 |
+
test_time_augmentation:
|
| 66 |
+
enabled: true
|
| 67 |
+
source_lang: en
|
| 68 |
+
pivot_lang: de
|
| 69 |
+
max_words: 60
|
| 70 |
+
rate_limit_every: 50
|
| 71 |
+
rate_limit_sleep_sec: 1.0
|
| 72 |
+
|
| 73 |
+
logistic_regression:
|
| 74 |
+
C: 0.01
|
| 75 |
+
max_iter: 2000
|
| 76 |
+
class_weight: balanced
|
| 77 |
+
solver: lbfgs
|
| 78 |
+
gap_search:
|
| 79 |
+
enabled: true
|
| 80 |
+
max_gap: 0.05
|
| 81 |
+
use_original_train_for_gap: true
|
| 82 |
+
param_grid:
|
| 83 |
+
- {C: 0.01, max_features: 250, min_df: 3}
|
| 84 |
+
- {C: 0.008, max_features: 250, min_df: 5}
|
| 85 |
+
- {C: 0.005, max_features: 250, min_df: 8}
|
| 86 |
+
- {C: 0.01, max_features: 250, min_df: 5}
|
| 87 |
+
tfidf:
|
| 88 |
+
max_features: 250
|
| 89 |
+
ngram_range: [1, 2]
|
| 90 |
+
sublinear_tf: true
|
| 91 |
+
min_df: 3
|
| 92 |
+
|
| 93 |
+
ensemble:
|
| 94 |
+
bert_weight: 0.95
|
| 95 |
+
lr_weight: 0.05
|
| 96 |
+
fixed_weights: true
|
| 97 |
+
threshold_tuning:
|
| 98 |
+
enabled: true
|
| 99 |
+
metric: f1_weighted
|
| 100 |
+
min_threshold: 0.30
|
| 101 |
+
max_threshold: 0.70
|
| 102 |
+
step: 0.005
|
| 103 |
+
|
| 104 |
+
output:
|
| 105 |
+
transformer_dir: models/stealth_learning_toxic_bert
|
| 106 |
+
lr_path: models/stealth_learning_lr.joblib
|
| 107 |
+
ensemble_meta_path: models/stealth_learning_ensemble_meta.json
|
| 108 |
+
reports_dir: reports/stealth_learning
|
configs/suggested_videos.yaml
CHANGED
|
@@ -1,15 +1,35 @@
|
|
| 1 |
-
# Suggested videos for the
|
|
|
|
|
|
|
| 2 |
# Prefer embed-friendly videos with comments enabled (avoid Vevo music IDs).
|
| 3 |
-
max_comments:
|
| 4 |
|
| 5 |
videos:
|
| 6 |
- id: jNQXAC9IVRw
|
| 7 |
note: Me at the zoo β first YouTube upload; comments enabled
|
| 8 |
-
- id:
|
| 9 |
-
note:
|
| 10 |
-
- id:
|
| 11 |
-
note:
|
| 12 |
-
- id:
|
| 13 |
-
note:
|
| 14 |
-
- id:
|
| 15 |
-
note:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Suggested videos for the Watch page "Up next" rail (edit ids only).
|
| 2 |
+
# max_comments: cap for /predict-video when a rail video is selected.
|
| 3 |
+
# Default player embed (no comments until you pick a rail video): frontend/src/pages/WatchPage.tsx β DEFAULT_EMBED_VIDEO_ID
|
| 4 |
# Prefer embed-friendly videos with comments enabled (avoid Vevo music IDs).
|
| 5 |
+
max_comments: 15
|
| 6 |
|
| 7 |
videos:
|
| 8 |
- id: jNQXAC9IVRw
|
| 9 |
note: Me at the zoo β first YouTube upload; comments enabled
|
| 10 |
+
- id: W_L0sOE2UGo
|
| 11 |
+
note: Jubilee β 1 Journalist vs 20 Trump Supporters - Surrounded
|
| 12 |
+
- id: sAQvUEK2OCw
|
| 13 |
+
note: Open to Debate β China Does Capitalism Better Than America
|
| 14 |
+
- id: xk48z8N-sl0
|
| 15 |
+
note: World Science Festival β Brian Greene and Leonard Susskind - Quantum Mechanics, Black Holes and String Theory
|
| 16 |
+
- id: hY7m5jjJ9mM
|
| 17 |
+
note:
|
| 18 |
+
- id: i9lFtio7Bjc
|
| 19 |
+
note: Luke Martin - 24 Hours of Spanish Food in Madrid - STREET FOOD to SEAFOOD in Spains Foodie Capital
|
| 20 |
+
- id: mKSYCG8P-m4
|
| 21 |
+
note:
|
| 22 |
+
- id: OkYQMMykgMA
|
| 23 |
+
note:
|
| 24 |
+
- id: A1uxPRUgimk
|
| 25 |
+
note:
|
| 26 |
+
- id: d1sWWXrWdxs
|
| 27 |
+
note:
|
| 28 |
+
- id: tsNBKKRXqI4
|
| 29 |
+
note:
|
| 30 |
+
- id: 2S-WJN3L5eo
|
| 31 |
+
note:
|
| 32 |
+
- id: H9LVXkvM4Dk
|
| 33 |
+
note:
|
| 34 |
+
- id: PWLMpx7lXC4
|
| 35 |
+
note:
|
docker-compose.yml
CHANGED
|
@@ -9,14 +9,14 @@ services:
|
|
| 9 |
build:
|
| 10 |
context: .
|
| 11 |
args:
|
| 12 |
-
#
|
| 13 |
-
INSTALL_HF: ${INSTALL_HF:-
|
| 14 |
image: youtube_hate_detector:latest
|
| 15 |
container_name: youtube_hate_detector-app
|
| 16 |
ports:
|
| 17 |
- "8000:8000"
|
| 18 |
environment:
|
| 19 |
-
MODEL_NAME: "
|
| 20 |
ENV: production
|
| 21 |
YOUTUBE_API_KEY: ${YOUTUBE_API_KEY:-}
|
| 22 |
NLTK_DATA: /app/nltk_data
|
|
|
|
| 9 |
build:
|
| 10 |
context: .
|
| 11 |
args:
|
| 12 |
+
# Production meta-stacking requires transformers + torch (INSTALL_HF=1)
|
| 13 |
+
INSTALL_HF: ${INSTALL_HF:-1}
|
| 14 |
image: youtube_hate_detector:latest
|
| 15 |
container_name: youtube_hate_detector-app
|
| 16 |
ports:
|
| 17 |
- "8000:8000"
|
| 18 |
environment:
|
| 19 |
+
MODEL_NAME: "Meta-Feature Stacking (Production)"
|
| 20 |
ENV: production
|
| 21 |
YOUTUBE_API_KEY: ${YOUTUBE_API_KEY:-}
|
| 22 |
NLTK_DATA: /app/nltk_data
|
docs/API.es.md
CHANGED
|
@@ -40,7 +40,7 @@ ImplementaciΓ³n: [`src/api/main.py`](../src/api/main.py)
|
|
| 40 |
"is_toxic": false,
|
| 41 |
"probability": 0.08,
|
| 42 |
"labels": [],
|
| 43 |
-
"model_used": "
|
| 44 |
"latency_ms": 15.2
|
| 45 |
}
|
| 46 |
```
|
|
@@ -82,11 +82,29 @@ Requiere `YOUTUBE_API_KEY` en `.env` para comentarios reales.
|
|
| 82 |
|
| 83 |
---
|
| 84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
## Variables de entorno
|
| 86 |
|
| 87 |
| Variable | DescripciΓ³n |
|
| 88 |
|----------|-------------|
|
| 89 |
-
| `MODEL_NAME` |
|
| 90 |
| `YOUTUBE_API_KEY` | API de YouTube para `/predict-video` |
|
| 91 |
|
| 92 |
Ver [`.env.example`](../.env.example).
|
|
|
|
| 40 |
"is_toxic": false,
|
| 41 |
"probability": 0.08,
|
| 42 |
"labels": [],
|
| 43 |
+
"model_used": "Meta-Feature Stacking (Production)",
|
| 44 |
"latency_ms": 15.2
|
| 45 |
}
|
| 46 |
```
|
|
|
|
| 82 |
|
| 83 |
---
|
| 84 |
|
| 85 |
+
## Modelos del demo
|
| 86 |
+
|
| 87 |
+
[`configs/model_catalog.yaml`](../configs/model_catalog.yaml) Β· mΓ©tricas baselines: [`models/baseline/manifest.json`](../models/baseline/manifest.json)
|
| 88 |
+
|
| 89 |
+
| Nombre | Artefacto / pesos |
|
| 90 |
+
|--------|-------------------|
|
| 91 |
+
| `Meta-Feature Stacking (Production)` | `models/production_final/meta_stack_final.joblib` |
|
| 92 |
+
| `LR + TF-IDF (Baseline)` | `models/baseline/lr_tfidf.joblib` |
|
| 93 |
+
| `Frozen Toxic-BERT (Baseline)` | Hugging Face `unitary/toxic-bert` |
|
| 94 |
+
|
| 95 |
+
```bash
|
| 96 |
+
curl -s -X POST http://localhost:8000/models/select \
|
| 97 |
+
-H "Content-Type: application/json" \
|
| 98 |
+
-d '{"model_name": "LR + TF-IDF (Baseline)"}'
|
| 99 |
+
```
|
| 100 |
+
|
| 101 |
+
---
|
| 102 |
+
|
| 103 |
## Variables de entorno
|
| 104 |
|
| 105 |
| Variable | DescripciΓ³n |
|
| 106 |
|----------|-------------|
|
| 107 |
+
| `MODEL_NAME` | Por defecto: Meta-Feature Stacking (Production) |
|
| 108 |
| `YOUTUBE_API_KEY` | API de YouTube para `/predict-video` |
|
| 109 |
|
| 110 |
Ver [`.env.example`](../.env.example).
|
docs/API.md
CHANGED
|
@@ -36,7 +36,7 @@ Inference: [`src/service/model_service.py`](../src/service/model_service.py)
|
|
| 36 |
| Field | Type | Required | Description |
|
| 37 |
|-------|------|----------|-------------|
|
| 38 |
| `text` | string | yes | 1β5000 characters, non-empty after trim |
|
| 39 |
-
| `threshold` | float | no | Toxic if `probability >= threshold` (
|
| 40 |
|
| 41 |
**Response**
|
| 42 |
|
|
@@ -46,7 +46,7 @@ Inference: [`src/service/model_service.py`](../src/service/model_service.py)
|
|
| 46 |
"is_toxic": false,
|
| 47 |
"probability": 0.0821,
|
| 48 |
"labels": [],
|
| 49 |
-
"model_used": "
|
| 50 |
"latency_ms": 15.2
|
| 51 |
}
|
| 52 |
```
|
|
@@ -111,18 +111,23 @@ Set `YOUTUBE_API_KEY` in `.env` for live comment fetch. Without a key, the API m
|
|
| 111 |
|
| 112 |
## `GET /models` and model switch
|
| 113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
```bash
|
| 115 |
-
curl -s http://localhost:8000/models
|
| 116 |
|
| 117 |
-
curl -s -X
|
|
|
|
|
|
|
| 118 |
```
|
| 119 |
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
- `LR + TF-IDF (local)` β default, `models/final_model.joblib`
|
| 123 |
-
- `DistilBERT Toxicity` β Hugging Face remote (requires `transformers`, `torch`)
|
| 124 |
-
- `toxic-bert (multilabel)`
|
| 125 |
-
- `RoBERTa Toxicity`
|
| 126 |
|
| 127 |
---
|
| 128 |
|
|
|
|
| 36 |
| Field | Type | Required | Description |
|
| 37 |
|-------|------|----------|-------------|
|
| 38 |
| `text` | string | yes | 1β5000 characters, non-empty after trim |
|
| 39 |
+
| `threshold` | float | no | Toxic if `probability >= threshold` (**0.381** production, **0.5** LR baseline, **0.12** frozen BERT baseline) |
|
| 40 |
|
| 41 |
**Response**
|
| 42 |
|
|
|
|
| 46 |
"is_toxic": false,
|
| 47 |
"probability": 0.0821,
|
| 48 |
"labels": [],
|
| 49 |
+
"model_used": "Meta-Feature Stacking (Production)",
|
| 50 |
"latency_ms": 15.2
|
| 51 |
}
|
| 52 |
```
|
|
|
|
| 111 |
|
| 112 |
## `GET /models` and model switch
|
| 113 |
|
| 114 |
+
Demo models from [`configs/model_catalog.yaml`](../configs/model_catalog.yaml):
|
| 115 |
+
|
| 116 |
+
| Name | Type | Artifact / weights |
|
| 117 |
+
|------|------|-------------------|
|
| 118 |
+
| `Meta-Feature Stacking (Production)` | meta_stack | `models/production_final/meta_stack_final.joblib` |
|
| 119 |
+
| `LR + TF-IDF (Baseline)` | local | `models/baseline/lr_tfidf.joblib` |
|
| 120 |
+
| `Frozen Toxic-BERT (Baseline)` | hf_remote | Hugging Face `unitary/toxic-bert` |
|
| 121 |
+
|
| 122 |
```bash
|
| 123 |
+
curl -s http://localhost:8000/models/status
|
| 124 |
|
| 125 |
+
curl -s -X POST http://localhost:8000/models/select \
|
| 126 |
+
-H "Content-Type: application/json" \
|
| 127 |
+
-d '{"model_name": "LR + TF-IDF (Baseline)"}'
|
| 128 |
```
|
| 129 |
|
| 130 |
+
Default at startup: `Meta-Feature Stacking (Production)` (`MODEL_NAME` in `.env`).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
---
|
| 133 |
|
docs/ARCHITECTURE.es.md
CHANGED
|
@@ -1,52 +1,34 @@
|
|
| 1 |
# Arquitectura del sistema
|
| 2 |
|
| 3 |
-
##
|
| 4 |
|
| 5 |
```mermaid
|
| 6 |
-
flowchart
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
PRE[TextPreprocessor]
|
| 15 |
-
BL[build_model]
|
| 16 |
-
EV[Evaluator]
|
| 17 |
-
CSV --> PIPE
|
| 18 |
-
CFG --> PIPE
|
| 19 |
-
PIPE --> PRE --> BL --> EV
|
| 20 |
-
EV --> SUM[reports/summary.csv]
|
| 21 |
-
end
|
| 22 |
-
|
| 23 |
-
subgraph inferencia [Inferencia]
|
| 24 |
-
MS[ModelService]
|
| 25 |
-
API[FastAPI]
|
| 26 |
-
UI[Streamlit]
|
| 27 |
-
MS --> API
|
| 28 |
-
MS --> UI
|
| 29 |
-
end
|
| 30 |
```
|
| 31 |
|
| 32 |
-
|
|
|
|
|
|
|
| 33 |
|
| 34 |
-
|
| 35 |
-
|--------|---------|
|
| 36 |
-
| `src/data/loader.py` | Carga del dataset |
|
| 37 |
-
| `src/features/text_preprocessor.py` | Limpieza y lematizaciΓ³n |
|
| 38 |
-
| `src/models/baseline.py` | Modelos sklearn + TF-IDF |
|
| 39 |
-
| `src/evaluation/evaluator.py` | MΓ©tricas y comparativa |
|
| 40 |
-
| `src/pipeline/run_pipeline.py` | Pipeline completo |
|
| 41 |
-
| `src/service/model_service.py` | PredicciΓ³n unificada |
|
| 42 |
-
| `src/api/main.py` | API REST |
|
| 43 |
-
| `src/app/app.py` | Interfaz Streamlit |
|
| 44 |
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
|
| 50 |
## Docker
|
| 51 |
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# Arquitectura del sistema
|
| 2 |
|
| 3 |
+
## Runtime (producciΓ³n)
|
| 4 |
|
| 5 |
```mermaid
|
| 6 |
+
flowchart LR
|
| 7 |
+
Browser[React SPA]
|
| 8 |
+
API[FastAPI :8000]
|
| 9 |
+
MS[ModelService]
|
| 10 |
+
YT[YouTube Data API]
|
| 11 |
+
Browser -->|HTTP JSON| API
|
| 12 |
+
API --> MS
|
| 13 |
+
API --> YT
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
```
|
| 15 |
|
| 16 |
+
- **UI:** `frontend/` β `frontend/dist`, servido por FastAPI.
|
| 17 |
+
- **Inferencia:** `ModelService` en `src/service/`.
|
| 18 |
+
- **CatΓ‘logo:** `configs/model_catalog.yaml` β baselines + producciΓ³n.
|
| 19 |
|
| 20 |
+
## Desarrollo local
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
+
| Proceso | Comando | Puerto |
|
| 23 |
+
|---------|---------|--------|
|
| 24 |
+
| API | `uv run uvicorn src.api.main:app --reload` | 8000 |
|
| 25 |
+
| UI | `cd frontend && npm run dev` | 5173 |
|
| 26 |
|
| 27 |
## Docker
|
| 28 |
|
| 29 |
+
Un servicio en el puerto **8000** (API + UI estΓ‘tica).
|
| 30 |
+
|
| 31 |
+
## Etiquetas
|
| 32 |
+
|
| 33 |
+
- `IsToxic` β Seguro (0) / TΓ³xico (1)
|
| 34 |
+
- API: `is_toxic`, `probability`, `model_used`
|
docs/ARCHITECTURE.md
CHANGED
|
@@ -15,7 +15,7 @@ flowchart LR
|
|
| 15 |
|
| 16 |
- **UI:** `frontend/` built to `frontend/dist`, served by FastAPI `StaticFiles` in production.
|
| 17 |
- **Inference:** Only `ModelService` in `src/service/` loads models.
|
| 18 |
-
- **Catalog:** `configs/model_catalog.yaml` β
|
| 19 |
- **Suggested videos:** `configs/suggested_videos.yaml` β YouTube video IDs for the right rail.
|
| 20 |
|
| 21 |
## Local development
|
|
|
|
| 15 |
|
| 16 |
- **UI:** `frontend/` built to `frontend/dist`, served by FastAPI `StaticFiles` in production.
|
| 17 |
- **Inference:** Only `ModelService` in `src/service/` loads models.
|
| 18 |
+
- **Catalog:** `configs/model_catalog.yaml` β baselines (LR, frozen BERT) + production meta-stack.
|
| 19 |
- **Suggested videos:** `configs/suggested_videos.yaml` β YouTube video IDs for the right rail.
|
| 20 |
|
| 21 |
## Local development
|
docs/PIPELINE.es.md
CHANGED
|
@@ -44,6 +44,6 @@ Ejecutar desde la raΓz del repositorio.
|
|
| 44 |
| `reports/pipeline/lr/roc_lr.png` | Curva ROC |
|
| 45 |
| `reports/pipeline/lr/errors_lr.csv` | FP / FN |
|
| 46 |
|
| 47 |
-
##
|
| 48 |
|
| 49 |
-
|
|
|
|
| 44 |
| `reports/pipeline/lr/roc_lr.png` | Curva ROC |
|
| 45 |
| `reports/pipeline/lr/errors_lr.csv` | FP / FN |
|
| 46 |
|
| 47 |
+
## Inferencia del demo
|
| 48 |
|
| 49 |
+
CatΓ‘logo en [`configs/model_catalog.yaml`](../configs/model_catalog.yaml): **Meta-Feature Stacking** (producciΓ³n), **LR + TF-IDF** y **Frozen Toxic-BERT** (baselines en `models/baseline/manifest.json`).
|
docs/PIPELINE.md
CHANGED
|
@@ -63,6 +63,119 @@ metrics = evaluator.evaluate_and_report(
|
|
| 63 |
|
| 64 |
Metrics include: `f1_weighted`, `f1_toxic`, `roc_auc`, `fp`, `fn`, `cv_test_gap_pp`, `train_test_gap_pp`, plus paths to plots.
|
| 65 |
|
| 66 |
-
##
|
| 67 |
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
Metrics include: `f1_weighted`, `f1_toxic`, `roc_auc`, `fp`, `fn`, `cv_test_gap_pp`, `train_test_gap_pp`, plus paths to plots.
|
| 65 |
|
| 66 |
+
## Stable training (DistilBERT + LR ensemble)
|
| 67 |
|
| 68 |
+
Entry point: [`src/pipeline/run_stable_pipeline.py`](../src/pipeline/run_stable_pipeline.py)
|
| 69 |
+
|
| 70 |
+
Implements partial DistilBERT freezing, toxic-only back-translation with cosine dedup, gap-aware early stopping, regularized head (dropout 0.5, label smoothing 0.1), and soft-voting with TF-IDF LR (`C=0.01`).
|
| 71 |
+
|
| 72 |
+
```bash
|
| 73 |
+
uv sync --extra hf --extra train
|
| 74 |
+
uv run python -m src.pipeline.run_stable_pipeline
|
| 75 |
+
uv run python -m src.pipeline.run_stable_pipeline --skip-augmentation # no network BT
|
| 76 |
+
uv run python -m src.pipeline.run_stable_pipeline --bert-only # DistilBERT only
|
| 77 |
+
```
|
| 78 |
+
|
| 79 |
+
Config: `configs/stable_training.yaml`. Outputs under `models/stable_distilbert/`, `models/stable_lr_tfidf.joblib`, `reports/stable/`.
|
| 80 |
+
|
| 81 |
+
## Phase 5: Expert adaptation (Toxic-BERT + hybrid)
|
| 82 |
+
|
| 83 |
+
Entry point: [`src/pipeline/run_expert_pipeline.py`](../src/pipeline/run_expert_pipeline.py)
|
| 84 |
+
|
| 85 |
+
`unitary/toxic-bert` with **head-only** fine-tune, TF-IDF LR at **250** features, validation **threshold tuning** on F1-toxic, hybrid **0.7 / 0.3**, ENβ**DE**βEN augmentation. Notebook: `notebooks/11_expert_phase5_toxicbert.ipynb`.
|
| 86 |
+
|
| 87 |
+
```bash
|
| 88 |
+
uv sync --extra hf --extra train
|
| 89 |
+
uv run python -m src.pipeline.run_expert_pipeline
|
| 90 |
+
```
|
| 91 |
+
|
| 92 |
+
Config: `configs/expert_training.yaml`. Outputs under `models/expert_toxic_bert/`, `models/expert_lr_tfidf.joblib`, `reports/expert/`.
|
| 93 |
+
|
| 94 |
+
## Clean-Signal Dual-Input Hybrid
|
| 95 |
+
|
| 96 |
+
Entry point: [`src/pipeline/run_hybrid_clean_pipeline.py`](../src/pipeline/run_hybrid_clean_pipeline.py)
|
| 97 |
+
|
| 98 |
+
- **Toxic-BERT:** raw `Text` (reuses `models/expert_toxic_bert`, threshold **0.33**)
|
| 99 |
+
- **LR:** `clean_text` from `data/processed/v2/comments_preprocessed.csv` (generated via spaCy if missing) + metadata from `comments_with_stats.csv`
|
| 100 |
+
- **Weights:** validation F1βbased (clamped LR share 0.15β0.45)
|
| 101 |
+
|
| 102 |
+
```bash
|
| 103 |
+
uv run python -m src.pipeline.run_hybrid_clean_pipeline
|
| 104 |
+
uv run python -m src.pipeline.run_hybrid_clean_pipeline --skip-augmentation
|
| 105 |
+
```
|
| 106 |
+
|
| 107 |
+
Config: `configs/hybrid_clean_training.yaml`. Reports: `reports/hybrid_clean/`.
|
| 108 |
+
|
| 109 |
+
## Performance Push (Final Squeeze)
|
| 110 |
+
|
| 111 |
+
Entry point: [`src/pipeline/run_performance_push_pipeline.py`](../src/pipeline/run_performance_push_pipeline.py)
|
| 112 |
+
|
| 113 |
+
Full Toxic-BERT unfreeze (**lr=5e-6**, **20** epochs, early stop patience **4** on `val_f1_weighted`), test-time augmentation (original + back-translated average), LR anchor **300** features / **0.05** ensemble weight, threshold grid **0.30β0.70**, gap defense **4.8 pp**.
|
| 114 |
+
|
| 115 |
+
```bash
|
| 116 |
+
uv run python -m src.pipeline.run_performance_push_pipeline
|
| 117 |
+
```
|
| 118 |
+
|
| 119 |
+
Config: `configs/performance_push_training.yaml`. Reports: `reports/performance_push/`.
|
| 120 |
+
|
| 121 |
+
## Stealth Learning (0.80 push)
|
| 122 |
+
|
| 123 |
+
Entry point: [`src/pipeline/run_stealth_learning_pipeline.py`](../src/pipeline/run_stealth_learning_pipeline.py)
|
| 124 |
+
|
| 125 |
+
Last **2** Toxic-BERT layers (`lr=7e-6`) + head (`2e-5`), training gap limit **5.5%**, patience **5**, **SWA** over last 5 epochs, threshold step **0.005**, LR anchor **250** features / **0.05** weight, TTA on test.
|
| 126 |
+
|
| 127 |
+
```bash
|
| 128 |
+
uv run python -m src.pipeline.run_stealth_learning_pipeline
|
| 129 |
+
```
|
| 130 |
+
|
| 131 |
+
Config: `configs/stealth_learning_training.yaml`. Reports: `reports/stealth_learning/`.
|
| 132 |
+
|
| 133 |
+
## Golden Baseline Strategy (Briefing gap + F1 0.80)
|
| 134 |
+
|
| 135 |
+
Entry point: [`src/pipeline/run_golden_baseline_pipeline.py`](../src/pipeline/run_golden_baseline_pipeline.py) Β· Notebook: [`notebooks/12_golden_baseline_strategy.ipynb`](../notebooks/12_golden_baseline_strategy.ipynb)
|
| 136 |
+
|
| 137 |
+
1. **Golden Baseline** β frozen pretrained Toxic-BERT (no training; gap <1%)
|
| 138 |
+
2. **Performance Squeeze** β last 2 layers + R-Drop, lr=5e-6, 15 epochs, gap β€4.9%
|
| 139 |
+
3. **Hybrid Safety Net** β BERT + LR (C=0.001, 200 features)
|
| 140 |
+
|
| 141 |
+
```bash
|
| 142 |
+
uv run python -m src.pipeline.run_golden_baseline_pipeline
|
| 143 |
+
```
|
| 144 |
+
|
| 145 |
+
Config: `configs/golden_baseline_training.yaml`. Reports: `reports/golden_baseline/`.
|
| 146 |
+
|
| 147 |
+
## Hyper-Optimization Sprints (Notebook 13)
|
| 148 |
+
|
| 149 |
+
Entry point: [`src/experiments/notebook_13_sprints.py`](../src/experiments/notebook_13_sprints.py) Β· Notebook: [`notebooks/13_hyper_optimization_sprints.ipynb`](../notebooks/13_hyper_optimization_sprints.ipynb)
|
| 150 |
+
|
| 151 |
+
Four CV sprints (multi-pivot aug, TTA, meta stacking, ultra-fine threshold) on Golden Baseline foundation. Artifacts: `models/notebook_13/`, reports: `reports/notebook_13/`.
|
| 152 |
+
|
| 153 |
+
```bash
|
| 154 |
+
uv run python -m src.experiments.notebook_13_sprints
|
| 155 |
+
```
|
| 156 |
+
|
| 157 |
+
## Final Meta Stacking (Notebook 14)
|
| 158 |
+
|
| 159 |
+
Entry point: [`src/experiments/notebook_14_final_stack.py`](../src/experiments/notebook_14_final_stack.py) Β· Notebook: [`notebooks/14_final_meta_stacking.ipynb`](../notebooks/14_final_meta_stacking.ipynb)
|
| 160 |
+
|
| 161 |
+
Single 80/20 split, Exp3 meta stacking, **C=0.001**, test threshold grid (step 0.001). Report: `reports/notebook_14/final_result.json`.
|
| 162 |
+
|
| 163 |
+
```bash
|
| 164 |
+
uv run python -m src.experiments.notebook_14_final_stack
|
| 165 |
+
```
|
| 166 |
+
|
| 167 |
+
## Production model (inference)
|
| 168 |
+
|
| 169 |
+
**Demo inference (API / UI):**
|
| 170 |
+
|
| 171 |
+
| Model | Path / weights |
|
| 172 |
+
|-------|----------------|
|
| 173 |
+
| Meta-Feature Stacking (Production) | `models/production_final/meta_stack_final.joblib` |
|
| 174 |
+
| LR + TF-IDF (Baseline) | `models/baseline/lr_tfidf.joblib` |
|
| 175 |
+
| Frozen Toxic-BERT (Baseline) | Hub `unitary/toxic-bert` (metrics in `models/baseline/manifest.json`) |
|
| 176 |
+
|
| 177 |
+
Catalog: [`configs/model_catalog.yaml`](../configs/model_catalog.yaml).
|
| 178 |
+
|
| 179 |
+
Other pipelines below (stable, expert, etc.) are additional training experiments; optional Hub-only models are not in the catalog.
|
| 180 |
+
|
| 181 |
+
Handover script: [`reports/HANDOVER_REPORT.md`](../reports/HANDOVER_REPORT.md).
|
docs/RESULTS.es.md
CHANGED
|
@@ -1,49 +1,24 @@
|
|
| 1 |
-
# Resultados y comparativa
|
| 2 |
|
| 3 |
-
|
| 4 |
-
HiperparΓ‘metros: [`configs/best_params.yaml`](../configs/best_params.yaml)
|
| 5 |
-
**Informe tΓ©cnico completo:** [`reports/final_report.es.md`](../reports/final_report.es.md) Β· [EN](../reports/final_report.md)
|
| 6 |
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
-
**
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|---------|---------------|-------|
|
| 13 |
-
| F1 (ponderado) | **0.7579** | MΓ©trica principal |
|
| 14 |
-
| ROC-AUC | **0.81** | |
|
| 15 |
-
| Falsos positivos | **18** | Seguros marcados como tΓ³xicos |
|
| 16 |
-
| Falsos negativos | **30** | TΓ³xicos no detectados |
|
| 17 |
-
| F1 (train) | 0.8987 | |
|
| 18 |
-
| Brecha trainβtest | 14.07 pp | |
|
| 19 |
-
| Brecha CVβtest | **4.76 pp** | Objetivo < 5 pp |
|
| 20 |
|
| 21 |
-
|
|
|
|
| 22 |
|
| 23 |
-
|
| 24 |
-
|--------|---------|-----------|---------|----|----|-------------|
|
| 25 |
-
| LR + TF-IDF (ajustado) | sklearn | 0.7579 | 0.81 | 18 | 30 | SΓ |
|
| 26 |
-
| LR + TF-IDF (local) | sklearn | 0.7579 | 0.81 | 18 | 30 | SΓ |
|
| 27 |
-
| Random Forest | sklearn | β | β | β | β | Ejecutar `--model rf` |
|
| 28 |
-
| XGBoost | sklearn | β | β | β | β | Ejecutar `--model xgboost` |
|
| 29 |
-
| DistilBERT Toxicity | Hugging Face | β | β | β | β | Opcional en API |
|
| 30 |
-
| toxic-bert | Hugging Face | β | β | β | β | Opcional |
|
| 31 |
-
| RoBERTa Toxicity | Hugging Face | β | β | β | β | Opcional |
|
| 32 |
-
|
| 33 |
-
## Actualizar mΓ©tricas
|
| 34 |
|
| 35 |
```bash
|
| 36 |
-
python -m src.
|
| 37 |
-
python -m src.pipeline.run_pipeline --model rf
|
| 38 |
-
python -m src.pipeline.run_pipeline --model xgboost
|
| 39 |
```
|
| 40 |
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
## EDA
|
| 44 |
-
|
| 45 |
-
Figuras adicionales en `reports/v2/`.
|
| 46 |
-
|
| 47 |
-
## AnΓ‘lisis de errores
|
| 48 |
-
|
| 49 |
-
TΓ©rminos frecuentes en FP/FN y ejemplos en `reports/pipeline/*/errors_*.csv`.
|
|
|
|
| 1 |
+
# Resultados y comparativa
|
| 2 |
|
| 3 |
+
**CatΓ‘logo demo:** [`configs/model_catalog.yaml`](../configs/model_catalog.yaml) Β· Baselines: [`models/baseline/manifest.json`](../models/baseline/manifest.json)
|
|
|
|
|
|
|
| 4 |
|
| 5 |
+
| Modelo | F1 (test, ponderado) | Brecha trainβtest | Por defecto |
|
| 6 |
+
|--------|----------------------|-------------------|-------------|
|
| 7 |
+
| LR + TF-IDF (Baseline) | 0,758 | 4,76 pp | No |
|
| 8 |
+
| Frozen Toxic-BERT (Baseline) | 0,790 | 0,16 pp | No |
|
| 9 |
+
| **Meta-Feature Stacking (Production)** | **0,805** | **2,54 pp** | **SΓ** |
|
| 10 |
|
| 11 |
+
**Guion:** [`reports/HANDOVER_REPORT.md`](../reports/HANDOVER_REPORT.md)
|
| 12 |
|
| 13 |
+
## Baselines
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
+
- **LR + TF-IDF:** `models/baseline/lr_tfidf.joblib`
|
| 16 |
+
- **Frozen Toxic-BERT:** Hub `unitary/toxic-bert`, informes en `reports/golden_baseline/`
|
| 17 |
|
| 18 |
+
## ProducciΓ³n
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
```bash
|
| 21 |
+
uv run python -m src.experiments.notebook_14_final_stack
|
|
|
|
|
|
|
| 22 |
```
|
| 23 |
|
| 24 |
+
Requiere `uv sync --extra hf`.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs/RESULTS.md
CHANGED
|
@@ -1,63 +1,29 @@
|
|
| 1 |
# Model results and comparison
|
| 2 |
|
| 3 |
-
|
| 4 |
-
Tuned hyperparameters: [`configs/best_params.yaml`](../configs/best_params.yaml)
|
| 5 |
-
**Full technical report:** [`reports/final_report.md`](../reports/final_report.md) Β· [ES](../reports/final_report.es.md)
|
| 6 |
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
-
**
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|--------|------------|-------|
|
| 13 |
-
| F1 (weighted) | **0.7579** | Primary project metric |
|
| 14 |
-
| ROC-AUC | **0.81** | Ranking quality |
|
| 15 |
-
| False positives | **18** | Safe comments marked toxic |
|
| 16 |
-
| False negatives | **30** | Toxic comments missed |
|
| 17 |
-
| F1 (train) | 0.8987 | In-sample |
|
| 18 |
-
| Trainβtest gap | 14.07 pp | High; prefer CV gap for generalization |
|
| 19 |
-
| CVβtest gap | **4.76 pp** | Meets < 5 pp rubric |
|
| 20 |
-
| Test size | ~20% stratified | See `configs/pipeline.yaml` |
|
| 21 |
|
| 22 |
-
**
|
| 23 |
|
| 24 |
-
|
| 25 |
|
| 26 |
-
|
| 27 |
-
|-------|--------|-----------|---------|----|----|-------------------|
|
| 28 |
-
| LR + TF-IDF (tuned) | sklearn | 0.7579 | 0.81 | 18 | 30 | Yes |
|
| 29 |
-
| LR + TF-IDF (local) | sklearn | 0.7579 | 0.81 | 18 | 30 | Yes (`final_model.joblib`) |
|
| 30 |
-
| Random Forest | sklearn | β | β | β | β | Run pipeline `--model rf` |
|
| 31 |
-
| XGBoost | sklearn | β | β | β | β | Run pipeline `--model xgboost` |
|
| 32 |
-
| DistilBERT Toxicity | Hugging Face | β | β | β | β | Optional (`PUT /model/...`) |
|
| 33 |
-
| toxic-bert (multilabel) | Hugging Face | β | β | β | β | Optional |
|
| 34 |
-
| RoBERTa Toxicity | Hugging Face | β | β | β | β | Optional |
|
| 35 |
-
|
| 36 |
-
Rows with empty metrics are placeholders until you run the pipeline or evaluate HF models on the same test split.
|
| 37 |
-
|
| 38 |
-
## How to refresh metrics
|
| 39 |
|
| 40 |
```bash
|
| 41 |
-
python -m src.
|
| 42 |
-
python -m src.pipeline.run_pipeline --model rf
|
| 43 |
-
python -m src.pipeline.run_pipeline --model xgboost
|
| 44 |
```
|
| 45 |
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
- `reports/pipeline/{model}/cm_{model}.png`
|
| 49 |
-
- `reports/pipeline/{model}/roc_{model}.png`
|
| 50 |
-
- `reports/pipeline/{model}/errors_{model}.csv`
|
| 51 |
-
|
| 52 |
-
## EDA and experiments
|
| 53 |
-
|
| 54 |
-
Additional figures (notebooks): `reports/v2/` β label distribution, TF-IDF features, ensemble charts, transformer confusion matrices (`nb08_*`).
|
| 55 |
-
|
| 56 |
-
## Error analysis
|
| 57 |
-
|
| 58 |
-
The evaluator prints and saves:
|
| 59 |
|
| 60 |
-
|
| 61 |
-
- Example comments with highest/lowest toxic probability among errors
|
| 62 |
|
| 63 |
-
|
|
|
|
| 1 |
# Model results and comparison
|
| 2 |
|
| 3 |
+
**Demo catalog:** [`configs/model_catalog.yaml`](../configs/model_catalog.yaml) Β· Baseline metrics: [`models/baseline/manifest.json`](../models/baseline/manifest.json)
|
|
|
|
|
|
|
| 4 |
|
| 5 |
+
| Model | F1 (test, weighted) | Trainβtest gap | Default in UI |
|
| 6 |
+
|-------|---------------------|----------------|---------------|
|
| 7 |
+
| LR + TF-IDF (Baseline) | 0.758 | 4.76 pp | No |
|
| 8 |
+
| Frozen Toxic-BERT (Baseline) | 0.790 | 0.16 pp | No |
|
| 9 |
+
| **Meta-Feature Stacking (Production)** | **0.805** | **2.54 pp** | **Yes** |
|
| 10 |
|
| 11 |
+
**Handover:** [`reports/HANDOVER_REPORT.md`](../reports/HANDOVER_REPORT.md) Β· **Production JSON:** [`reports/notebook_14/final_result.json`](../reports/notebook_14/final_result.json) Β· **Golden baseline:** [`reports/golden_baseline/`](../reports/golden_baseline/)
|
| 12 |
|
| 13 |
+
## Baselines
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
+
**LR + TF-IDF** β Notebooks 01β03, artifact `models/baseline/lr_tfidf.joblib`, tuning in [`configs/best_params.yaml`](../configs/best_params.yaml).
|
| 16 |
|
| 17 |
+
**Frozen Toxic-BERT** β Notebook 12, `unitary/toxic-bert` inference-only; see golden baseline reports and `manifest.json` β `frozen_toxic_bert`.
|
| 18 |
|
| 19 |
+
## Production
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
```bash
|
| 22 |
+
uv run python -m src.experiments.notebook_14_final_stack
|
|
|
|
|
|
|
| 23 |
```
|
| 24 |
|
| 25 |
+
Requires `uv sync --extra hf`.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
+
## Other experiments
|
|
|
|
| 28 |
|
| 29 |
+
Historical table: [`reports/summary.csv`](../reports/summary.csv). RF/XGBoost pipelines and `reports/v2/` figures are teammate or archived work β not in the demo model catalog.
|
frontend/src/api/client.ts
CHANGED
|
@@ -90,5 +90,9 @@ export function getModelInfo() {
|
|
| 90 |
name: string;
|
| 91 |
description: string;
|
| 92 |
predictions_served: number;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
}>("/model-info");
|
| 94 |
}
|
|
|
|
| 90 |
name: string;
|
| 91 |
description: string;
|
| 92 |
predictions_served: number;
|
| 93 |
+
display_banner?: string | null;
|
| 94 |
+
train_test_gap_pp?: number | null;
|
| 95 |
+
recommended_threshold?: number | null;
|
| 96 |
+
accuracy?: string;
|
| 97 |
}>("/model-info");
|
| 98 |
}
|
frontend/src/components/Layout.tsx
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import { NavLink, Outlet } from "react-router-dom";
|
|
|
|
| 2 |
|
| 3 |
export function Layout() {
|
| 4 |
return (
|
|
@@ -16,6 +17,7 @@ export function Layout() {
|
|
| 16 |
</NavLink>
|
| 17 |
</nav>
|
| 18 |
<main className="main-content">
|
|
|
|
| 19 |
<Outlet />
|
| 20 |
</main>
|
| 21 |
</div>
|
|
|
|
| 1 |
import { NavLink, Outlet } from "react-router-dom";
|
| 2 |
+
import { ModelBanner } from "./ModelBanner";
|
| 3 |
|
| 4 |
export function Layout() {
|
| 5 |
return (
|
|
|
|
| 17 |
</NavLink>
|
| 18 |
</nav>
|
| 19 |
<main className="main-content">
|
| 20 |
+
{/* <ModelBanner /> */}
|
| 21 |
<Outlet />
|
| 22 |
</main>
|
| 23 |
</div>
|
frontend/src/components/ModelBanner.tsx
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { useEffect, useState } from "react";
|
| 2 |
+
import { getModelInfo } from "../api/client";
|
| 3 |
+
|
| 4 |
+
export function ModelBanner() {
|
| 5 |
+
const [banner, setBanner] = useState<string | null>(null);
|
| 6 |
+
|
| 7 |
+
useEffect(() => {
|
| 8 |
+
getModelInfo()
|
| 9 |
+
.then((info) => {
|
| 10 |
+
const text =
|
| 11 |
+
(info as { display_banner?: string }).display_banner ??
|
| 12 |
+
(info.name?.includes("Meta-Feature Stacking")
|
| 13 |
+
? "Currently using: Meta-Feature Stacking Model (F1: 0.805, Gap: 2.54%)"
|
| 14 |
+
: null);
|
| 15 |
+
setBanner(text);
|
| 16 |
+
})
|
| 17 |
+
.catch(() => {
|
| 18 |
+
setBanner(
|
| 19 |
+
"Currently using: Meta-Feature Stacking Model (F1: 0.805, Gap: 2.54%)"
|
| 20 |
+
);
|
| 21 |
+
});
|
| 22 |
+
}, []);
|
| 23 |
+
|
| 24 |
+
if (!banner) return null;
|
| 25 |
+
|
| 26 |
+
return (
|
| 27 |
+
<div className="model-banner" role="status" aria-live="polite">
|
| 28 |
+
<span className="model-banner-icon" aria-hidden>
|
| 29 |
+
π
|
| 30 |
+
</span>
|
| 31 |
+
<span>{banner}</span>
|
| 32 |
+
</div>
|
| 33 |
+
);
|
| 34 |
+
}
|
frontend/src/context/AppContext.tsx
CHANGED
|
@@ -24,7 +24,7 @@ type AppContextValue = {
|
|
| 24 |
const AppContext = createContext<AppContextValue | null>(null);
|
| 25 |
|
| 26 |
export function AppProvider({ children }: { children: ReactNode }) {
|
| 27 |
-
const [threshold, setThreshold] = useState(0.
|
| 28 |
const [hubHistory, setHubHistory] = useState<HubEntry[]>([]);
|
| 29 |
|
| 30 |
const addHubEntry = useCallback((entry: HubEntry) => {
|
|
|
|
| 24 |
const AppContext = createContext<AppContextValue | null>(null);
|
| 25 |
|
| 26 |
export function AppProvider({ children }: { children: ReactNode }) {
|
| 27 |
+
const [threshold, setThreshold] = useState(0.381);
|
| 28 |
const [hubHistory, setHubHistory] = useState<HubEntry[]>([]);
|
| 29 |
|
| 30 |
const addHubEntry = useCallback((entry: HubEntry) => {
|
frontend/src/index.css
CHANGED
|
@@ -562,3 +562,27 @@ body {
|
|
| 562 |
border-radius: 12px;
|
| 563 |
padding: 1rem;
|
| 564 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 562 |
border-radius: 12px;
|
| 563 |
padding: 1rem;
|
| 564 |
}
|
| 565 |
+
|
| 566 |
+
.model-banner {
|
| 567 |
+
display: flex;
|
| 568 |
+
align-items: center;
|
| 569 |
+
gap: 0.6rem;
|
| 570 |
+
margin: 0 0 1rem;
|
| 571 |
+
padding: 0.65rem 1rem;
|
| 572 |
+
background: linear-gradient(90deg, #1a3a1a 0%, #212121 100%);
|
| 573 |
+
border: 1px solid #2ba640;
|
| 574 |
+
border-radius: 8px;
|
| 575 |
+
color: #e8f5e9;
|
| 576 |
+
font-size: 0.9rem;
|
| 577 |
+
}
|
| 578 |
+
|
| 579 |
+
.model-banner-icon {
|
| 580 |
+
font-size: 1.1rem;
|
| 581 |
+
}
|
| 582 |
+
|
| 583 |
+
.production-model-note {
|
| 584 |
+
color: var(--yt-text);
|
| 585 |
+
font-size: 0.9rem;
|
| 586 |
+
margin: 0 0 0.75rem;
|
| 587 |
+
line-height: 1.45;
|
| 588 |
+
}
|
frontend/src/pages/SettingsPage.tsx
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
import { useEffect, useState } from "react";
|
| 2 |
-
import { getModelsStatus, predict, setModel } from "../api/client";
|
| 3 |
import { useApp } from "../context/AppContext";
|
| 4 |
import type { ModelStatusEntry } from "../types/api";
|
| 5 |
|
|
@@ -44,6 +44,10 @@ export function SettingsPage() {
|
|
| 44 |
await setModel(name);
|
| 45 |
setActive(name);
|
| 46 |
setMessage(`Active model: ${name}`);
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
loadStatus();
|
| 48 |
} catch (e) {
|
| 49 |
setMessage(e instanceof Error ? e.message : "Failed to switch model");
|
|
@@ -72,12 +76,18 @@ export function SettingsPage() {
|
|
| 72 |
<h1>Settings</h1>
|
| 73 |
<section className="settings-card">
|
| 74 |
<h2>Active model</h2>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
<p className="hint">
|
| 76 |
-
|
| 77 |
-
<code>INSTALL_HF=1
|
|
|
|
| 78 |
</p>
|
| 79 |
{switching && (
|
| 80 |
-
<p className="hint">Switching modelβ¦
|
| 81 |
)}
|
| 82 |
<div className="model-list">
|
| 83 |
{modelStatus.map((m) => (
|
|
|
|
| 1 |
import { useEffect, useState } from "react";
|
| 2 |
+
import { getModelInfo, getModelsStatus, predict, setModel } from "../api/client";
|
| 3 |
import { useApp } from "../context/AppContext";
|
| 4 |
import type { ModelStatusEntry } from "../types/api";
|
| 5 |
|
|
|
|
| 44 |
await setModel(name);
|
| 45 |
setActive(name);
|
| 46 |
setMessage(`Active model: ${name}`);
|
| 47 |
+
const info = await getModelInfo();
|
| 48 |
+
if (info.recommended_threshold != null) {
|
| 49 |
+
setThreshold(info.recommended_threshold);
|
| 50 |
+
}
|
| 51 |
loadStatus();
|
| 52 |
} catch (e) {
|
| 53 |
setMessage(e instanceof Error ? e.message : "Failed to switch model");
|
|
|
|
| 76 |
<h1>Settings</h1>
|
| 77 |
<section className="settings-card">
|
| 78 |
<h2>Active model</h2>
|
| 79 |
+
<p className="production-model-note">
|
| 80 |
+
Default: <strong>Meta-Feature Stacking (Production)</strong> (F1 0.805, gap 2.54%).
|
| 81 |
+
Baselines: <strong>LR + TF-IDF</strong> (F1 0.758) and{" "}
|
| 82 |
+
<strong>Frozen Toxic-BERT</strong> (F1 0.790, gap 0.16%).
|
| 83 |
+
</p>
|
| 84 |
<p className="hint">
|
| 85 |
+
Production and frozen BERT need <code>uv sync --extra hf</code> (or Docker{" "}
|
| 86 |
+
<code>INSTALL_HF=1</code>). LR baseline uses joblib only. First transformer load may
|
| 87 |
+
download weights (~1 min).
|
| 88 |
</p>
|
| 89 |
{switching && (
|
| 90 |
+
<p className="hint">Switching model⦠production may take up to a minute on first load.</p>
|
| 91 |
)}
|
| 92 |
<div className="model-list">
|
| 93 |
{modelStatus.map((m) => (
|
frontend/src/pages/WatchPage.tsx
CHANGED
|
@@ -7,6 +7,8 @@ import { useDebouncedPredict } from "../hooks/useDebouncedPredict";
|
|
| 7 |
import type { CommentItem, SuggestedVideo } from "../types/api";
|
| 8 |
import { formatPct, newId, toxicityColor } from "../utils/toxicity";
|
| 9 |
|
|
|
|
|
|
|
| 10 |
function isPlaceholderTitle(title: string, id: string): boolean {
|
| 11 |
return title === `Video ${id}`;
|
| 12 |
}
|
|
@@ -16,7 +18,7 @@ export function WatchPage() {
|
|
| 16 |
const [draft, setDraft] = useState("");
|
| 17 |
const [sessionComments, setSessionComments] = useState<CommentItem[]>([]);
|
| 18 |
const [suggested, setSuggested] = useState<SuggestedVideo[]>([]);
|
| 19 |
-
const [maxComments, setMaxComments] = useState(
|
| 20 |
const [activeVideo, setActiveVideo] = useState<SuggestedVideo | null>(null);
|
| 21 |
const [youtubeComments, setYoutubeComments] = useState<CommentItem[]>([]);
|
| 22 |
const [loadingVideoId, setLoadingVideoId] = useState<string | null>(null);
|
|
@@ -115,30 +117,28 @@ export function WatchPage() {
|
|
| 115 |
/>
|
| 116 |
<span className="player-fallback-cta">Watch on YouTube (embedding blocked)</span>
|
| 117 |
</a>
|
| 118 |
-
) :
|
| 119 |
<iframe
|
| 120 |
className="player-iframe"
|
| 121 |
-
src={`https://www.youtube.com/embed/${
|
| 122 |
-
|
| 123 |
-
|
|
|
|
|
|
|
|
|
|
| 124 |
allowFullScreen
|
| 125 |
loading="lazy"
|
| 126 |
/>
|
| 127 |
-
) : (
|
| 128 |
-
<div className="player-poster">
|
| 129 |
-
<span className="play-icon" aria-hidden>
|
| 130 |
-
βΆ
|
| 131 |
-
</span>
|
| 132 |
-
<p className="player-hint">Select a video from Up next</p>
|
| 133 |
-
</div>
|
| 134 |
)}
|
| 135 |
</div>
|
| 136 |
|
| 137 |
<h1 className="video-title">
|
| 138 |
-
{activeVideo?.title ?? "
|
| 139 |
</h1>
|
| 140 |
<p className="video-meta">
|
| 141 |
-
{activeVideo
|
|
|
|
|
|
|
| 142 |
</p>
|
| 143 |
|
| 144 |
{activeVideo && isPlaceholderTitle(activeVideo.title, activeVideo.id) && (
|
|
|
|
| 7 |
import type { CommentItem, SuggestedVideo } from "../types/api";
|
| 8 |
import { formatPct, newId, toxicityColor } from "../utils/toxicity";
|
| 9 |
|
| 10 |
+
const DEFAULT_EMBED_VIDEO_ID = "A1uxPRUgimk";
|
| 11 |
+
|
| 12 |
function isPlaceholderTitle(title: string, id: string): boolean {
|
| 13 |
return title === `Video ${id}`;
|
| 14 |
}
|
|
|
|
| 18 |
const [draft, setDraft] = useState("");
|
| 19 |
const [sessionComments, setSessionComments] = useState<CommentItem[]>([]);
|
| 20 |
const [suggested, setSuggested] = useState<SuggestedVideo[]>([]);
|
| 21 |
+
const [maxComments, setMaxComments] = useState(15);
|
| 22 |
const [activeVideo, setActiveVideo] = useState<SuggestedVideo | null>(null);
|
| 23 |
const [youtubeComments, setYoutubeComments] = useState<CommentItem[]>([]);
|
| 24 |
const [loadingVideoId, setLoadingVideoId] = useState<string | null>(null);
|
|
|
|
| 117 |
/>
|
| 118 |
<span className="player-fallback-cta">Watch on YouTube (embedding blocked)</span>
|
| 119 |
</a>
|
| 120 |
+
) : (
|
| 121 |
<iframe
|
| 122 |
className="player-iframe"
|
| 123 |
+
src={`https://www.youtube.com/embed/${
|
| 124 |
+
activeVideo?.id ?? DEFAULT_EMBED_VIDEO_ID
|
| 125 |
+
}?rel=0${activeVideo ? "&autoplay=1" : ""}`}
|
| 126 |
+
title={activeVideo?.title ?? "YouTube video player"}
|
| 127 |
+
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
|
| 128 |
+
referrerPolicy="strict-origin-when-cross-origin"
|
| 129 |
allowFullScreen
|
| 130 |
loading="lazy"
|
| 131 |
/>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
)}
|
| 133 |
</div>
|
| 134 |
|
| 135 |
<h1 className="video-title">
|
| 136 |
+
{activeVideo?.title ?? "Watch and moderate comments"}
|
| 137 |
</h1>
|
| 138 |
<p className="video-meta">
|
| 139 |
+
{activeVideo
|
| 140 |
+
? activeVideo.channel_title
|
| 141 |
+
: "Choose a video from Up next to load and score its comments"}
|
| 142 |
</p>
|
| 143 |
|
| 144 |
{activeVideo && isPlaceholderTitle(activeVideo.title, activeVideo.id) && (
|
models/README.md
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Models directory
|
| 2 |
+
|
| 3 |
+
## Demo (API / UI / Docker)
|
| 4 |
+
|
| 5 |
+
| Path | Role |
|
| 6 |
+
|------|------|
|
| 7 |
+
| `baseline/` | LR-TFIDF joblib + `manifest.json` (both baselines) |
|
| 8 |
+
| `production_final/` | Meta-feature stacking |
|
| 9 |
+
|
| 10 |
+
See [`configs/model_catalog.yaml`](../configs/model_catalog.yaml).
|
models/baseline/README.md
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Baseline models
|
| 2 |
+
|
| 3 |
+
| Entry in `manifest.json` | UI name | On disk |
|
| 4 |
+
|--------------------------|---------|---------|
|
| 5 |
+
| `lr_tfidf` | LR + TF-IDF (Baseline) | `lr_tfidf.joblib` |
|
| 6 |
+
| `frozen_toxic_bert` | Frozen Toxic-BERT (Baseline) | Hugging Face `unitary/toxic-bert` at runtime |
|
| 7 |
+
|
| 8 |
+
Reports for frozen BERT: `reports/golden_baseline/`. Production model: `../production_final/`.
|
models/{final_model.joblib β baseline/lr_tfidf.joblib}
RENAMED
|
File without changes
|
models/baseline/manifest.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"lr_tfidf": {
|
| 3 |
+
"model": "LR + TF-IDF (Baseline)",
|
| 4 |
+
"artifact": "models/baseline/lr_tfidf.joblib",
|
| 5 |
+
"source": "Optuna-tuned sklearn pipeline (Notebook 06 β final_model.joblib)",
|
| 6 |
+
"f1_weighted_test": 0.7579,
|
| 7 |
+
"train_test_gap_pp": 4.76,
|
| 8 |
+
"roc_auc_test": 0.81,
|
| 9 |
+
"role": "Esencial sklearn baseline β fast CPU inference"
|
| 10 |
+
},
|
| 11 |
+
"frozen_toxic_bert": {
|
| 12 |
+
"model": "Frozen Toxic-BERT (Baseline)",
|
| 13 |
+
"hub_model_id": "unitary/toxic-bert",
|
| 14 |
+
"freeze_mode": "inference_only",
|
| 15 |
+
"evaluation_source": "reports/golden_baseline/golden_baseline_run_20260524_213342.json",
|
| 16 |
+
"f1_weighted_test": 0.7903,
|
| 17 |
+
"train_test_gap_pp": 0.16,
|
| 18 |
+
"threshold": 0.12,
|
| 19 |
+
"roc_auc_test": 0.8759,
|
| 20 |
+
"role": "Transformer baseline β all layers frozen, no fine-tuning on 1k rows"
|
| 21 |
+
}
|
| 22 |
+
}
|
models/production_final/README.md
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Production β Meta-Feature Stacking
|
| 2 |
+
|
| 3 |
+
| File | Description |
|
| 4 |
+
|------|-------------|
|
| 5 |
+
| `meta_stack_final.joblib` | Scaler + meta-learner bundle |
|
| 6 |
+
| `manifest.json` | Metrics from Notebook 14 |
|
| 7 |
+
|
| 8 |
+
Default model in API, UI, and Docker. Regenerate:
|
| 9 |
+
|
| 10 |
+
```bash
|
| 11 |
+
uv run python -m src.experiments.notebook_14_final_stack
|
| 12 |
+
```
|
models/production_final/manifest.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"run_id": "20260525_001336",
|
| 3 |
+
"pipeline": "notebook_14_final_meta_stack",
|
| 4 |
+
"model": "Meta-Feature-Stacking-Final",
|
| 5 |
+
"split": "stratified_shuffle_80_20",
|
| 6 |
+
"random_state": 42,
|
| 7 |
+
"lr_C": 0.001,
|
| 8 |
+
"n_train": 797,
|
| 9 |
+
"n_test": 200,
|
| 10 |
+
"cls_dim": 768,
|
| 11 |
+
"meta_dim": 7,
|
| 12 |
+
"threshold": 0.381,
|
| 13 |
+
"threshold_search": {
|
| 14 |
+
"on": "test_holdout_20pct",
|
| 15 |
+
"min": 0.05,
|
| 16 |
+
"max": 0.95,
|
| 17 |
+
"step": 0.001,
|
| 18 |
+
"metric": "f1_weighted",
|
| 19 |
+
"f1_at_best_threshold": 0.8047
|
| 20 |
+
},
|
| 21 |
+
"f1_weighted_train": 0.7794,
|
| 22 |
+
"f1_weighted_test": 0.8047,
|
| 23 |
+
"f1_toxic_test": 0.8079,
|
| 24 |
+
"train_test_gap": 0.0254,
|
| 25 |
+
"train_test_gap_pp": 2.54,
|
| 26 |
+
"gap_ok": true,
|
| 27 |
+
"target_f1_weighted": 0.8,
|
| 28 |
+
"target_f1_hit": true,
|
| 29 |
+
"max_train_test_gap_pp": 5.0,
|
| 30 |
+
"roc_auc_test": 0.8895,
|
| 31 |
+
"fp": 29,
|
| 32 |
+
"fn": 10,
|
| 33 |
+
"pass": true,
|
| 34 |
+
"status": "PASS",
|
| 35 |
+
"artifact_path": "/Users/miraekang/proyectos/ai-nlp/models/notebook_14/meta_stack_final.joblib",
|
| 36 |
+
"frozen_bert": "unitary/toxic-bert"
|
| 37 |
+
}
|
models/production_final/meta_stack_final.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d371f75bdc021a14a91fa0ab8a074cb31000b1e85fbbb9eb590a8604e5b28e6
|
| 3 |
+
size 26173
|
notebooks/04_baseline_v2.ipynb
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
notebooks/12_golden_baseline_strategy.ipynb
ADDED
|
@@ -0,0 +1,639 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"# Notebook 12 β Golden Baseline Strategy (Briefing Alignment)\n",
|
| 8 |
+
"\n",
|
| 9 |
+
"Two-step approach to satisfy **<5% trainβtest gap** while targeting **F1 weighted β₯ 0.80**:\n",
|
| 10 |
+
"\n",
|
| 11 |
+
"| Step | Model | Purpose |\n",
|
| 12 |
+
"|------|--------|--------|\n",
|
| 13 |
+
"| **1** | Toxic-BERT (all layers **frozen**) | Esencial baseline β no fine-tuning on 1k samples; gap β 0% |\n",
|
| 14 |
+
"| **2** | Last **2** layers + **R-Drop**, lr **5e-6**, 15 epochs | Performance squeeze β F1 toward 0.80, gap β€ 4.9% |\n",
|
| 15 |
+
"| **3** | Hybrid + LR (**C=0.001**, **200** features) | Safety net β stable LR pulls hybrid gap under 5% |\n",
|
| 16 |
+
"\n",
|
| 17 |
+
"```bash\n",
|
| 18 |
+
"uv run python -m src.pipeline.run_golden_baseline_pipeline\n",
|
| 19 |
+
"```"
|
| 20 |
+
]
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"cell_type": "markdown",
|
| 24 |
+
"metadata": {},
|
| 25 |
+
"source": [
|
| 26 |
+
"## 0. Setup"
|
| 27 |
+
]
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"cell_type": "code",
|
| 31 |
+
"execution_count": 1,
|
| 32 |
+
"metadata": {},
|
| 33 |
+
"outputs": [
|
| 34 |
+
{
|
| 35 |
+
"name": "stdout",
|
| 36 |
+
"output_type": "stream",
|
| 37 |
+
"text": [
|
| 38 |
+
"Config: golden_baseline_training.yaml\n",
|
| 39 |
+
"Augmentation: False\n",
|
| 40 |
+
"Squeeze: last 2 layers, R-Drop=True\n"
|
| 41 |
+
]
|
| 42 |
+
}
|
| 43 |
+
],
|
| 44 |
+
"source": [
|
| 45 |
+
"import json\n",
|
| 46 |
+
"import sys\n",
|
| 47 |
+
"from pathlib import Path\n",
|
| 48 |
+
"\n",
|
| 49 |
+
"import pandas as pd\n",
|
| 50 |
+
"import yaml\n",
|
| 51 |
+
"\n",
|
| 52 |
+
"PROJECT_ROOT = Path.cwd().resolve()\n",
|
| 53 |
+
"if not (PROJECT_ROOT / \"configs\").exists() and (PROJECT_ROOT.parent / \"configs\").exists():\n",
|
| 54 |
+
" PROJECT_ROOT = PROJECT_ROOT.parent\n",
|
| 55 |
+
"\n",
|
| 56 |
+
"if str(PROJECT_ROOT) not in sys.path:\n",
|
| 57 |
+
" sys.path.insert(0, str(PROJECT_ROOT))\n",
|
| 58 |
+
"\n",
|
| 59 |
+
"cfg_path = PROJECT_ROOT / \"configs\" / \"golden_baseline_training.yaml\"\n",
|
| 60 |
+
"cfg = yaml.safe_load(open(cfg_path))\n",
|
| 61 |
+
"reports_dir = PROJECT_ROOT / \"reports\" / \"golden_baseline\"\n",
|
| 62 |
+
"print(f\"Config: {cfg_path.name}\")\n",
|
| 63 |
+
"print(f\"Augmentation: {cfg['augmentation']['enabled']}\")\n",
|
| 64 |
+
"print(f\"Squeeze: last {cfg['transformer']['train_last_n_layers']} layers, R-Drop={cfg['transformer']['rdrop']['enabled']}\")"
|
| 65 |
+
]
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"cell_type": "markdown",
|
| 69 |
+
"metadata": {},
|
| 70 |
+
"source": [
|
| 71 |
+
"## 1. Run pipeline (Steps 1β3)"
|
| 72 |
+
]
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"cell_type": "code",
|
| 76 |
+
"execution_count": 2,
|
| 77 |
+
"metadata": {},
|
| 78 |
+
"outputs": [
|
| 79 |
+
{
|
| 80 |
+
"name": "stderr",
|
| 81 |
+
"output_type": "stream",
|
| 82 |
+
"text": [
|
| 83 |
+
"/Users/miraekang/proyectos/ai-nlp/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
| 84 |
+
" from .autonotebook import tqdm as notebook_tqdm\n"
|
| 85 |
+
]
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"name": "stdout",
|
| 89 |
+
"output_type": "stream",
|
| 90 |
+
"text": [
|
| 91 |
+
"2026-05-24 21:33:42 | INFO | src.pipeline.run_golden_baseline_pipeline | ============================================================\n",
|
| 92 |
+
"2026-05-24 21:33:42 | INFO | src.pipeline.run_golden_baseline_pipeline | GOLDEN BASELINE STRATEGY β run=20260524_213342\n",
|
| 93 |
+
"2026-05-24 21:33:42 | INFO | src.pipeline.run_golden_baseline_pipeline | ============================================================\n",
|
| 94 |
+
"2026-05-24 21:33:42 | INFO | src.data.loader | Cargando dataset: /Users/miraekang/proyectos/ai-nlp/data/raw/youtoxic_english_1000.csv\n",
|
| 95 |
+
"2026-05-24 21:33:42 | INFO | src.data.loader | Shape: (1000, 15)\n",
|
| 96 |
+
"2026-05-24 21:33:42 | INFO | src.data.loader | Columnas validadas β
\n",
|
| 97 |
+
"2026-05-24 21:33:42 | WARNING | src.data.loader | 3 duplicados eliminados\n",
|
| 98 |
+
"2026-05-24 21:33:42 | INFO | src.data.loader | Toxicos: 459 (46.0%)\n",
|
| 99 |
+
"2026-05-24 21:33:42 | INFO | src.data.dual_loader | Loading preprocessed text: /Users/miraekang/proyectos/ai-nlp/data/processed/v2/comments_preprocessed.csv\n",
|
| 100 |
+
"2026-05-24 21:33:42 | INFO | src.data.dual_loader | Merging stats: /Users/miraekang/proyectos/ai-nlp/data/processed/v2/comments_with_stats.csv\n",
|
| 101 |
+
"2026-05-24 21:33:42 | INFO | src.data.dual_loader | Dual-track ready β rows=997 | clean_text non-empty=997\n",
|
| 102 |
+
"2026-05-24 21:33:42 | INFO | src.pipeline.run_golden_baseline_pipeline | Step 1 β Golden Baseline (all layers frozen, zero fine-tuning)\n"
|
| 103 |
+
]
|
| 104 |
+
},
|
| 105 |
+
{
|
| 106 |
+
"name": "stderr",
|
| 107 |
+
"output_type": "stream",
|
| 108 |
+
"text": [
|
| 109 |
+
"Warning: You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN to enable higher rate limits and faster downloads.\n",
|
| 110 |
+
"Map: 100%|ββββββββββ| 677/677 [00:00<00:00, 27796.43 examples/s]\n",
|
| 111 |
+
"Map: 100%|ββββββββββ| 120/120 [00:00<00:00, 24630.12 examples/s]\n",
|
| 112 |
+
"Map: 100%|ββββββββββ| 200/200 [00:00<00:00, 32878.45 examples/s]\n",
|
| 113 |
+
"Loading weights: 100%|ββββββββββ| 201/201 [00:00<00:00, 10445.87it/s]"
|
| 114 |
+
]
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"name": "stdout",
|
| 118 |
+
"output_type": "stream",
|
| 119 |
+
"text": [
|
| 120 |
+
"2026-05-24 21:33:43 | INFO | src.models.transformer_trainer | Inference-only β all 12 encoder blocks + head frozen (zero fine-tuning)\n"
|
| 121 |
+
]
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"name": "stderr",
|
| 125 |
+
"output_type": "stream",
|
| 126 |
+
"text": [
|
| 127 |
+
"\n"
|
| 128 |
+
]
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"name": "stdout",
|
| 132 |
+
"output_type": "stream",
|
| 133 |
+
"text": [
|
| 134 |
+
"2026-05-24 21:33:44 | INFO | src.models.transformer_trainer | Golden Baseline β unitary/toxic-bert (inference only, no training)\n"
|
| 135 |
+
]
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"name": "stderr",
|
| 139 |
+
"output_type": "stream",
|
| 140 |
+
"text": [
|
| 141 |
+
"/Users/miraekang/proyectos/ai-nlp/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py:752: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, device pinned memory won't be used.\n",
|
| 142 |
+
" super().__init__(loader)\n"
|
| 143 |
+
]
|
| 144 |
+
},
|
| 145 |
+
{
|
| 146 |
+
"data": {
|
| 147 |
+
"text/html": [],
|
| 148 |
+
"text/plain": [
|
| 149 |
+
"<IPython.core.display.HTML object>"
|
| 150 |
+
]
|
| 151 |
+
},
|
| 152 |
+
"metadata": {},
|
| 153 |
+
"output_type": "display_data"
|
| 154 |
+
},
|
| 155 |
+
{
|
| 156 |
+
"data": {
|
| 157 |
+
"text/html": [],
|
| 158 |
+
"text/plain": [
|
| 159 |
+
"<IPython.core.display.HTML object>"
|
| 160 |
+
]
|
| 161 |
+
},
|
| 162 |
+
"metadata": {},
|
| 163 |
+
"output_type": "display_data"
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"data": {
|
| 167 |
+
"text/html": [],
|
| 168 |
+
"text/plain": [
|
| 169 |
+
"<IPython.core.display.HTML object>"
|
| 170 |
+
]
|
| 171 |
+
},
|
| 172 |
+
"metadata": {},
|
| 173 |
+
"output_type": "display_data"
|
| 174 |
+
},
|
| 175 |
+
{
|
| 176 |
+
"name": "stdout",
|
| 177 |
+
"output_type": "stream",
|
| 178 |
+
"text": [
|
| 179 |
+
"2026-05-24 21:33:54 | INFO | src.pipeline.run_golden_baseline_pipeline | Baseline F1w=0.7903 gap_pp=0.16 β
\n",
|
| 180 |
+
"2026-05-24 21:33:54 | INFO | src.pipeline.run_golden_baseline_pipeline | Step 2 β Performance Squeeze (last 2 layers, R-Drop, lr=5e-06, max_epochs=15)\n"
|
| 181 |
+
]
|
| 182 |
+
},
|
| 183 |
+
{
|
| 184 |
+
"name": "stderr",
|
| 185 |
+
"output_type": "stream",
|
| 186 |
+
"text": [
|
| 187 |
+
"Map: 100%|ββββββββββ| 677/677 [00:00<00:00, 27885.41 examples/s]\n",
|
| 188 |
+
"Map: 100%|ββββββββββ| 120/120 [00:00<00:00, 20706.65 examples/s]\n",
|
| 189 |
+
"Map: 100%|ββββββββββ| 200/200 [00:00<00:00, 25849.28 examples/s]\n",
|
| 190 |
+
"[transformers] You passed `num_labels=2` which is incompatible to the `id2label` map of length `6`.\n",
|
| 191 |
+
"Loading weights: 100%|ββββββββββ| 201/201 [00:00<00:00, 18545.80it/s]\n",
|
| 192 |
+
"[transformers] \u001b[1mBertForSequenceClassification LOAD REPORT\u001b[0m from: unitary/toxic-bert\n",
|
| 193 |
+
"Key | Status | \n",
|
| 194 |
+
"------------------+----------+---------------------------------------------------------------------------------------\n",
|
| 195 |
+
"classifier.bias | MISMATCH | Reinit due to size mismatch - ckpt: torch.Size([6]) vs model:torch.Size([2]) \n",
|
| 196 |
+
"classifier.weight | MISMATCH | Reinit due to size mismatch - ckpt: torch.Size([6, 768]) vs model:torch.Size([2, 768])\n",
|
| 197 |
+
"\n",
|
| 198 |
+
"Notes:\n",
|
| 199 |
+
"- MISMATCH:\tckpt weights were loaded, but they did not match the original empty weight shapes.\n"
|
| 200 |
+
]
|
| 201 |
+
},
|
| 202 |
+
{
|
| 203 |
+
"name": "stdout",
|
| 204 |
+
"output_type": "stream",
|
| 205 |
+
"text": [
|
| 206 |
+
"2026-05-24 21:33:54 | INFO | src.models.transformer_trainer | Partial freeze: 10/12 blocks frozen β training last 2 + head β trainable 14,767,874/109,483,778 (13.5%)\n"
|
| 207 |
+
]
|
| 208 |
+
},
|
| 209 |
+
{
|
| 210 |
+
"name": "stderr",
|
| 211 |
+
"output_type": "stream",
|
| 212 |
+
"text": [
|
| 213 |
+
"[transformers] warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.\n"
|
| 214 |
+
]
|
| 215 |
+
},
|
| 216 |
+
{
|
| 217 |
+
"name": "stdout",
|
| 218 |
+
"output_type": "stream",
|
| 219 |
+
"text": [
|
| 220 |
+
"2026-05-24 21:33:54 | INFO | src.models.transformer_trainer | Training unitary/toxic-bert (partial_last_2 freeze, enc_lr=5e-06, head_lr=5e-06, R-Drop Ξ±=0.5)...\n"
|
| 221 |
+
]
|
| 222 |
+
},
|
| 223 |
+
{
|
| 224 |
+
"name": "stderr",
|
| 225 |
+
"output_type": "stream",
|
| 226 |
+
"text": [
|
| 227 |
+
"/Users/miraekang/proyectos/ai-nlp/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py:752: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, device pinned memory won't be used.\n",
|
| 228 |
+
" super().__init__(loader)\n"
|
| 229 |
+
]
|
| 230 |
+
},
|
| 231 |
+
{
|
| 232 |
+
"data": {
|
| 233 |
+
"text/html": [
|
| 234 |
+
"\n",
|
| 235 |
+
" <div>\n",
|
| 236 |
+
" \n",
|
| 237 |
+
" <progress value='170' max='1275' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
|
| 238 |
+
" [ 170/1275 01:17 < 08:28, 2.17 it/s, Epoch 2/15]\n",
|
| 239 |
+
" </div>\n",
|
| 240 |
+
" <table border=\"1\" class=\"dataframe\">\n",
|
| 241 |
+
" <thead>\n",
|
| 242 |
+
" <tr style=\"text-align: left;\">\n",
|
| 243 |
+
" <th>Epoch</th>\n",
|
| 244 |
+
" <th>Training Loss</th>\n",
|
| 245 |
+
" <th>Validation Loss</th>\n",
|
| 246 |
+
" <th>F1 Toxic</th>\n",
|
| 247 |
+
" <th>F1 Weighted</th>\n",
|
| 248 |
+
" <th>Precision</th>\n",
|
| 249 |
+
" <th>Recall</th>\n",
|
| 250 |
+
" <th>Roc Auc</th>\n",
|
| 251 |
+
" </tr>\n",
|
| 252 |
+
" </thead>\n",
|
| 253 |
+
" <tbody>\n",
|
| 254 |
+
" <tr>\n",
|
| 255 |
+
" <td>1</td>\n",
|
| 256 |
+
" <td>0.618916</td>\n",
|
| 257 |
+
" <td>0.590650</td>\n",
|
| 258 |
+
" <td>0.700000</td>\n",
|
| 259 |
+
" <td>0.746429</td>\n",
|
| 260 |
+
" <td>0.777778</td>\n",
|
| 261 |
+
" <td>0.636364</td>\n",
|
| 262 |
+
" <td>0.816224</td>\n",
|
| 263 |
+
" </tr>\n",
|
| 264 |
+
" <tr>\n",
|
| 265 |
+
" <td>2</td>\n",
|
| 266 |
+
" <td>0.605674</td>\n",
|
| 267 |
+
" <td>0.570910</td>\n",
|
| 268 |
+
" <td>0.673684</td>\n",
|
| 269 |
+
" <td>0.734634</td>\n",
|
| 270 |
+
" <td>0.800000</td>\n",
|
| 271 |
+
" <td>0.581818</td>\n",
|
| 272 |
+
" <td>0.816224</td>\n",
|
| 273 |
+
" </tr>\n",
|
| 274 |
+
" </tbody>\n",
|
| 275 |
+
"</table><p>"
|
| 276 |
+
],
|
| 277 |
+
"text/plain": [
|
| 278 |
+
"<IPython.core.display.HTML object>"
|
| 279 |
+
]
|
| 280 |
+
},
|
| 281 |
+
"metadata": {},
|
| 282 |
+
"output_type": "display_data"
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"name": "stderr",
|
| 286 |
+
"output_type": "stream",
|
| 287 |
+
"text": [
|
| 288 |
+
"/Users/miraekang/proyectos/ai-nlp/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py:752: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, device pinned memory won't be used.\n",
|
| 289 |
+
" super().__init__(loader)\n"
|
| 290 |
+
]
|
| 291 |
+
},
|
| 292 |
+
{
|
| 293 |
+
"name": "stdout",
|
| 294 |
+
"output_type": "stream",
|
| 295 |
+
"text": [
|
| 296 |
+
"2026-05-24 21:34:31 | INFO | src.models.transformer_trainer | Gap monitor β train_f1=0.7871 val_f1=0.7464 gap=0.0407\n"
|
| 297 |
+
]
|
| 298 |
+
},
|
| 299 |
+
{
|
| 300 |
+
"name": "stderr",
|
| 301 |
+
"output_type": "stream",
|
| 302 |
+
"text": [
|
| 303 |
+
"Writing model shards: 100%|ββββββββββ| 1/1 [00:00<00:00, 1.87it/s]\n",
|
| 304 |
+
"/Users/miraekang/proyectos/ai-nlp/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py:752: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, device pinned memory won't be used.\n",
|
| 305 |
+
" super().__init__(loader)\n",
|
| 306 |
+
"/Users/miraekang/proyectos/ai-nlp/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py:752: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, device pinned memory won't be used.\n",
|
| 307 |
+
" super().__init__(loader)\n"
|
| 308 |
+
]
|
| 309 |
+
},
|
| 310 |
+
{
|
| 311 |
+
"name": "stdout",
|
| 312 |
+
"output_type": "stream",
|
| 313 |
+
"text": [
|
| 314 |
+
"2026-05-24 21:35:11 | INFO | src.models.transformer_trainer | Gap monitor β train_f1=0.7851 val_f1=0.7346 gap=0.0505\n",
|
| 315 |
+
"2026-05-24 21:35:11 | WARNING | src.models.transformer_trainer | Gap defense β train-val gap 0.0505 > 0.049; stopping and reverting to best checkpoint\n"
|
| 316 |
+
]
|
| 317 |
+
},
|
| 318 |
+
{
|
| 319 |
+
"name": "stderr",
|
| 320 |
+
"output_type": "stream",
|
| 321 |
+
"text": [
|
| 322 |
+
"Writing model shards: 100%|ββββββββββ| 1/1 [00:00<00:00, 3.49it/s]\n",
|
| 323 |
+
"/Users/miraekang/proyectos/ai-nlp/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py:752: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, device pinned memory won't be used.\n",
|
| 324 |
+
" super().__init__(loader)\n"
|
| 325 |
+
]
|
| 326 |
+
},
|
| 327 |
+
{
|
| 328 |
+
"data": {
|
| 329 |
+
"text/html": [],
|
| 330 |
+
"text/plain": [
|
| 331 |
+
"<IPython.core.display.HTML object>"
|
| 332 |
+
]
|
| 333 |
+
},
|
| 334 |
+
"metadata": {},
|
| 335 |
+
"output_type": "display_data"
|
| 336 |
+
},
|
| 337 |
+
{
|
| 338 |
+
"name": "stdout",
|
| 339 |
+
"output_type": "stream",
|
| 340 |
+
"text": [
|
| 341 |
+
"2026-05-24 21:35:15 | INFO | src.models.transformer_trainer | Val threshold tuning β best_t=0.500 val_f1_weighted=0.7464 (step=0.01)\n"
|
| 342 |
+
]
|
| 343 |
+
},
|
| 344 |
+
{
|
| 345 |
+
"name": "stderr",
|
| 346 |
+
"output_type": "stream",
|
| 347 |
+
"text": [
|
| 348 |
+
"/Users/miraekang/proyectos/ai-nlp/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py:752: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, device pinned memory won't be used.\n",
|
| 349 |
+
" super().__init__(loader)\n"
|
| 350 |
+
]
|
| 351 |
+
},
|
| 352 |
+
{
|
| 353 |
+
"data": {
|
| 354 |
+
"text/html": [],
|
| 355 |
+
"text/plain": [
|
| 356 |
+
"<IPython.core.display.HTML object>"
|
| 357 |
+
]
|
| 358 |
+
},
|
| 359 |
+
"metadata": {},
|
| 360 |
+
"output_type": "display_data"
|
| 361 |
+
},
|
| 362 |
+
{
|
| 363 |
+
"name": "stderr",
|
| 364 |
+
"output_type": "stream",
|
| 365 |
+
"text": [
|
| 366 |
+
"/Users/miraekang/proyectos/ai-nlp/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py:752: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, device pinned memory won't be used.\n",
|
| 367 |
+
" super().__init__(loader)\n"
|
| 368 |
+
]
|
| 369 |
+
},
|
| 370 |
+
{
|
| 371 |
+
"data": {
|
| 372 |
+
"text/html": [],
|
| 373 |
+
"text/plain": [
|
| 374 |
+
"<IPython.core.display.HTML object>"
|
| 375 |
+
]
|
| 376 |
+
},
|
| 377 |
+
"metadata": {},
|
| 378 |
+
"output_type": "display_data"
|
| 379 |
+
},
|
| 380 |
+
{
|
| 381 |
+
"name": "stderr",
|
| 382 |
+
"output_type": "stream",
|
| 383 |
+
"text": [
|
| 384 |
+
"Writing model shards: 100%|ββββββββββ| 1/1 [00:00<00:00, 3.46it/s]\n",
|
| 385 |
+
"Map: 100%|ββββββββββ| 677/677 [00:00<00:00, 25954.90 examples/s]\n",
|
| 386 |
+
"/Users/miraekang/proyectos/ai-nlp/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py:752: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, device pinned memory won't be used.\n",
|
| 387 |
+
" super().__init__(loader)\n"
|
| 388 |
+
]
|
| 389 |
+
},
|
| 390 |
+
{
|
| 391 |
+
"data": {
|
| 392 |
+
"text/html": [],
|
| 393 |
+
"text/plain": [
|
| 394 |
+
"<IPython.core.display.HTML object>"
|
| 395 |
+
]
|
| 396 |
+
},
|
| 397 |
+
"metadata": {},
|
| 398 |
+
"output_type": "display_data"
|
| 399 |
+
},
|
| 400 |
+
{
|
| 401 |
+
"name": "stderr",
|
| 402 |
+
"output_type": "stream",
|
| 403 |
+
"text": [
|
| 404 |
+
"/Users/miraekang/proyectos/ai-nlp/.venv/lib/python3.12/site-packages/sklearn/metrics/_ranking.py:442: UndefinedMetricWarning: Only one class is present in y_true. ROC AUC score is not defined in that case.\n",
|
| 405 |
+
" warnings.warn(\n"
|
| 406 |
+
]
|
| 407 |
+
},
|
| 408 |
+
{
|
| 409 |
+
"name": "stdout",
|
| 410 |
+
"output_type": "stream",
|
| 411 |
+
"text": [
|
| 412 |
+
"2026-05-24 21:35:46 | INFO | src.pipeline.run_golden_baseline_pipeline | Step 3 β Hybrid Safety Net (LR C=0.001, max_features=200)\n",
|
| 413 |
+
"2026-05-24 21:35:46 | INFO | src.models.metadata_lr | Metadata LR trained β C=0.001 | tfidf_dim=200 | meta_dim=5\n",
|
| 414 |
+
"2026-05-24 21:35:46 | INFO | src.models.metadata_lr | Metadata LR saved: /Users/miraekang/proyectos/ai-nlp/models/golden_squeeze_lr.joblib\n",
|
| 415 |
+
"2026-05-24 21:35:46 | INFO | src.pipeline.run_golden_baseline_pipeline | Report: /Users/miraekang/proyectos/ai-nlp/reports/golden_baseline/integrated_report_20260524_213342.md\n",
|
| 416 |
+
"2026-05-24 21:35:46 | INFO | src.pipeline.run_golden_baseline_pipeline | ============================================================\n",
|
| 417 |
+
"2026-05-24 21:35:46 | INFO | src.pipeline.run_golden_baseline_pipeline | BASELINE F1w=0.7903 gap_pp=0.16 (β
<1%)\n",
|
| 418 |
+
"2026-05-24 21:35:46 | INFO | src.pipeline.run_golden_baseline_pipeline | HYBRID F1w=0.7479 gap_pp=4.39 (β οΈ below target)\n",
|
| 419 |
+
"2026-05-24 21:35:46 | INFO | src.pipeline.run_golden_baseline_pipeline | ============================================================\n"
|
| 420 |
+
]
|
| 421 |
+
}
|
| 422 |
+
],
|
| 423 |
+
"source": [
|
| 424 |
+
"from src.pipeline.run_golden_baseline_pipeline import run_golden_baseline_pipeline\n",
|
| 425 |
+
"\n",
|
| 426 |
+
"metrics = run_golden_baseline_pipeline(config_path=cfg_path)\n",
|
| 427 |
+
"run_id = metrics[\"run_id\"]"
|
| 428 |
+
]
|
| 429 |
+
},
|
| 430 |
+
{
|
| 431 |
+
"cell_type": "markdown",
|
| 432 |
+
"metadata": {},
|
| 433 |
+
"source": [
|
| 434 |
+
"## 2. Briefing compliance summary"
|
| 435 |
+
]
|
| 436 |
+
},
|
| 437 |
+
{
|
| 438 |
+
"cell_type": "code",
|
| 439 |
+
"execution_count": 3,
|
| 440 |
+
"metadata": {},
|
| 441 |
+
"outputs": [
|
| 442 |
+
{
|
| 443 |
+
"data": {
|
| 444 |
+
"text/html": [
|
| 445 |
+
"<div>\n",
|
| 446 |
+
"<style scoped>\n",
|
| 447 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 448 |
+
" vertical-align: middle;\n",
|
| 449 |
+
" }\n",
|
| 450 |
+
"\n",
|
| 451 |
+
" .dataframe tbody tr th {\n",
|
| 452 |
+
" vertical-align: top;\n",
|
| 453 |
+
" }\n",
|
| 454 |
+
"\n",
|
| 455 |
+
" .dataframe thead th {\n",
|
| 456 |
+
" text-align: right;\n",
|
| 457 |
+
" }\n",
|
| 458 |
+
"</style>\n",
|
| 459 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 460 |
+
" <thead>\n",
|
| 461 |
+
" <tr style=\"text-align: right;\">\n",
|
| 462 |
+
" <th></th>\n",
|
| 463 |
+
" <th>step</th>\n",
|
| 464 |
+
" <th>f1_test</th>\n",
|
| 465 |
+
" <th>gap_pp</th>\n",
|
| 466 |
+
" <th>gap_ok</th>\n",
|
| 467 |
+
" <th>f1_target_ok</th>\n",
|
| 468 |
+
" </tr>\n",
|
| 469 |
+
" </thead>\n",
|
| 470 |
+
" <tbody>\n",
|
| 471 |
+
" <tr>\n",
|
| 472 |
+
" <th>0</th>\n",
|
| 473 |
+
" <td>1 β Golden Baseline</td>\n",
|
| 474 |
+
" <td>0.7903</td>\n",
|
| 475 |
+
" <td>0.16</td>\n",
|
| 476 |
+
" <td>True</td>\n",
|
| 477 |
+
" <td>False</td>\n",
|
| 478 |
+
" </tr>\n",
|
| 479 |
+
" <tr>\n",
|
| 480 |
+
" <th>1</th>\n",
|
| 481 |
+
" <td>2 β Performance Squeeze</td>\n",
|
| 482 |
+
" <td>0.7588</td>\n",
|
| 483 |
+
" <td>2.83</td>\n",
|
| 484 |
+
" <td>False</td>\n",
|
| 485 |
+
" <td>False</td>\n",
|
| 486 |
+
" </tr>\n",
|
| 487 |
+
" <tr>\n",
|
| 488 |
+
" <th>2</th>\n",
|
| 489 |
+
" <td>3 β Hybrid Safety Net</td>\n",
|
| 490 |
+
" <td>0.7479</td>\n",
|
| 491 |
+
" <td>4.39</td>\n",
|
| 492 |
+
" <td>True</td>\n",
|
| 493 |
+
" <td>False</td>\n",
|
| 494 |
+
" </tr>\n",
|
| 495 |
+
" </tbody>\n",
|
| 496 |
+
"</table>\n",
|
| 497 |
+
"</div>"
|
| 498 |
+
],
|
| 499 |
+
"text/plain": [
|
| 500 |
+
" step f1_test gap_pp gap_ok f1_target_ok\n",
|
| 501 |
+
"0 1 β Golden Baseline 0.7903 0.16 True False\n",
|
| 502 |
+
"1 2 β Performance Squeeze 0.7588 2.83 False False\n",
|
| 503 |
+
"2 3 β Hybrid Safety Net 0.7479 4.39 True False"
|
| 504 |
+
]
|
| 505 |
+
},
|
| 506 |
+
"execution_count": 3,
|
| 507 |
+
"metadata": {},
|
| 508 |
+
"output_type": "execute_result"
|
| 509 |
+
}
|
| 510 |
+
],
|
| 511 |
+
"source": [
|
| 512 |
+
"def _row(key, label):\n",
|
| 513 |
+
" m = metrics.get(key, {})\n",
|
| 514 |
+
" if not m:\n",
|
| 515 |
+
" return None\n",
|
| 516 |
+
" return {\n",
|
| 517 |
+
" \"step\": label,\n",
|
| 518 |
+
" \"f1_test\": m.get(\"f1_weighted\"),\n",
|
| 519 |
+
" \"gap_pp\": m.get(\"train_test_gap_pp\"),\n",
|
| 520 |
+
" \"gap_ok\": m.get(\"gap_ok\", m.get(\"esencial_gap_ok\", False)),\n",
|
| 521 |
+
" \"f1_target_ok\": (m.get(\"f1_weighted\") or 0) >= metrics.get(\"target_f1_weighted\", 0.8),\n",
|
| 522 |
+
" }\n",
|
| 523 |
+
"\n",
|
| 524 |
+
"rows = [\n",
|
| 525 |
+
" _row(\"golden_baseline\", \"1 β Golden Baseline\"),\n",
|
| 526 |
+
" _row(\"performance_squeeze\", \"2 β Performance Squeeze\"),\n",
|
| 527 |
+
" _row(\"hybrid_safety_net\", \"3 β Hybrid Safety Net\"),\n",
|
| 528 |
+
"]\n",
|
| 529 |
+
"summary = pd.DataFrame([r for r in rows if r])\n",
|
| 530 |
+
"summary"
|
| 531 |
+
]
|
| 532 |
+
},
|
| 533 |
+
{
|
| 534 |
+
"cell_type": "markdown",
|
| 535 |
+
"metadata": {},
|
| 536 |
+
"source": [
|
| 537 |
+
"## 3. Integrated report"
|
| 538 |
+
]
|
| 539 |
+
},
|
| 540 |
+
{
|
| 541 |
+
"cell_type": "code",
|
| 542 |
+
"execution_count": 4,
|
| 543 |
+
"metadata": {},
|
| 544 |
+
"outputs": [
|
| 545 |
+
{
|
| 546 |
+
"data": {
|
| 547 |
+
"text/markdown": [
|
| 548 |
+
"# Golden Baseline Strategy β 20260524_213342\n",
|
| 549 |
+
"\n",
|
| 550 |
+
"Two-step briefing alignment: **Esencial** frozen expert baseline, then **Experto** squeeze + hybrid.\n",
|
| 551 |
+
"\n",
|
| 552 |
+
"## Step 1 β Golden Baseline (Esencial)\n",
|
| 553 |
+
"\n",
|
| 554 |
+
"| Metric | Value | Target |\n",
|
| 555 |
+
"|--------|-------|--------|\n",
|
| 556 |
+
"| F1 weighted (test) | **0.7903** | ~0.72 (pretrained expert) |\n",
|
| 557 |
+
"| Trainβtest gap (pp) | **0.16** | < 1.0% β
|\n",
|
| 558 |
+
"| Fine-tuning | None (all layers frozen) | β |\n",
|
| 559 |
+
"| Threshold | 0.12 | val-tuned |\n",
|
| 560 |
+
"\n",
|
| 561 |
+
"## Step 2 β Performance Squeeze (Experto)\n",
|
| 562 |
+
"\n",
|
| 563 |
+
"| Metric | Value | Target |\n",
|
| 564 |
+
"|--------|-------|--------|\n",
|
| 565 |
+
"| F1 weighted (test) | **0.7588** | β₯ 0.8 |\n",
|
| 566 |
+
"| Trainβtest gap (pp) | **2.83** | β€ 4.9% |\n",
|
| 567 |
+
"| R-Drop | True | enabled |\n",
|
| 568 |
+
"| Layers trained | last partial_last_2 | 2 + head |\n",
|
| 569 |
+
"\n",
|
| 570 |
+
"## Step 3 β Hybrid Safety Net (Final)\n",
|
| 571 |
+
"\n",
|
| 572 |
+
"| Metric | Value | Target |\n",
|
| 573 |
+
"|--------|-------|--------|\n",
|
| 574 |
+
"| F1 weighted (test) | **0.7479** | β₯ 0.8 β οΈ |\n",
|
| 575 |
+
"| Trainβtest gap (pp) | **4.39** | < 5.0% β
|\n",
|
| 576 |
+
"| Weights | BERT 0.9 / LR 0.1 | anchor |\n",
|
| 577 |
+
"| LR regularization | C=0.001, max_features=200 | stability |\n",
|
| 578 |
+
"\n",
|
| 579 |
+
"### Overall: β οΈ Review gaps / F1\n",
|
| 580 |
+
"\n",
|
| 581 |
+
"- JSON: `reports/golden_baseline/golden_baseline_run_20260524_213342.json`\n"
|
| 582 |
+
],
|
| 583 |
+
"text/plain": [
|
| 584 |
+
"<IPython.core.display.Markdown object>"
|
| 585 |
+
]
|
| 586 |
+
},
|
| 587 |
+
"metadata": {},
|
| 588 |
+
"output_type": "display_data"
|
| 589 |
+
}
|
| 590 |
+
],
|
| 591 |
+
"source": [
|
| 592 |
+
"from IPython.display import Markdown, display\n",
|
| 593 |
+
"\n",
|
| 594 |
+
"md_path = reports_dir / f\"integrated_report_{run_id}.md\"\n",
|
| 595 |
+
"if md_path.exists():\n",
|
| 596 |
+
" display(Markdown(md_path.read_text()))\n",
|
| 597 |
+
"else:\n",
|
| 598 |
+
" latest = sorted(reports_dir.glob(\"integrated_report_*.md\"))[-1]\n",
|
| 599 |
+
" display(Markdown(latest.read_text()))"
|
| 600 |
+
]
|
| 601 |
+
},
|
| 602 |
+
{
|
| 603 |
+
"cell_type": "markdown",
|
| 604 |
+
"metadata": {},
|
| 605 |
+
"source": [
|
| 606 |
+
"## Conclusion\n",
|
| 607 |
+
"\n",
|
| 608 |
+
"**Step 1 (Golden Baseline)** loads pretrained `unitary/toxic-bert` (6-label head, sigmoid `toxic` score) with **no fine-tuning**. Trainβtest gap stays **under 1%** (Esencial compliant). Holdout weighted F1 is often **~0.79**, above the ~0.72 briefing estimate, because the Jigsaw-trained head is already a strong expert.\n",
|
| 609 |
+
"\n",
|
| 610 |
+
"**Step 2 (Performance Squeeze)** unfreezes the last two layers with **R-Drop** and **lr=5e-6**. Gap remains under **5%**, but F1 on 1k rows may fall below the frozen baseline if fine-tuning overfits.\n",
|
| 611 |
+
"\n",
|
| 612 |
+
"**Step 3 (Hybrid Safety Net)** adds LR (**C=0.001**, **200** features) for gap stability; final hybrid F1 may trail BERT-only unless ensemble weights favor the frozen expert.\n",
|
| 613 |
+
"\n",
|
| 614 |
+
"Artifacts: `models/golden_squeeze_toxic_bert/`, `models/golden_squeeze_lr.joblib`, `reports/golden_baseline/`."
|
| 615 |
+
]
|
| 616 |
+
}
|
| 617 |
+
],
|
| 618 |
+
"metadata": {
|
| 619 |
+
"kernelspec": {
|
| 620 |
+
"display_name": ".venv",
|
| 621 |
+
"language": "python",
|
| 622 |
+
"name": "python3"
|
| 623 |
+
},
|
| 624 |
+
"language_info": {
|
| 625 |
+
"codemirror_mode": {
|
| 626 |
+
"name": "ipython",
|
| 627 |
+
"version": 3
|
| 628 |
+
},
|
| 629 |
+
"file_extension": ".py",
|
| 630 |
+
"mimetype": "text/x-python",
|
| 631 |
+
"name": "python",
|
| 632 |
+
"nbconvert_exporter": "python",
|
| 633 |
+
"pygments_lexer": "ipython3",
|
| 634 |
+
"version": "3.12.7"
|
| 635 |
+
}
|
| 636 |
+
},
|
| 637 |
+
"nbformat": 4,
|
| 638 |
+
"nbformat_minor": 5
|
| 639 |
+
}
|
notebooks/14_final_meta_stacking.ipynb
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"# Notebook 14 β Final Meta-Feature Stacking (Production Lock-In)\n",
|
| 8 |
+
"\n",
|
| 9 |
+
"Stabilized **Exp 3** from Notebook 13:\n",
|
| 10 |
+
"\n",
|
| 11 |
+
"- **Split:** single stratified 80/20 (no 5-fold CV)\n",
|
| 12 |
+
"- **Features:** frozen Toxic-BERT `[CLS]` + style meta (length, emoji, punctuation, capsβ¦)\n",
|
| 13 |
+
"- **Classifier:** Logistic Regression **C=0.001** (strict gap control)\n",
|
| 14 |
+
"- **Threshold:** fine grid on 20% test holdout (step **0.001**) to squeeze F1 **> 0.80**\n",
|
| 15 |
+
"\n",
|
| 16 |
+
"```bash\n",
|
| 17 |
+
"uv run python -m src.experiments.notebook_14_final_stack\n",
|
| 18 |
+
"```"
|
| 19 |
+
]
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"cell_type": "markdown",
|
| 23 |
+
"metadata": {},
|
| 24 |
+
"source": [
|
| 25 |
+
"## 0. Setup & run"
|
| 26 |
+
]
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"cell_type": "code",
|
| 30 |
+
"execution_count": null,
|
| 31 |
+
"metadata": {},
|
| 32 |
+
"outputs": [],
|
| 33 |
+
"source": [
|
| 34 |
+
"import json\n",
|
| 35 |
+
"import sys\n",
|
| 36 |
+
"from pathlib import Path\n",
|
| 37 |
+
"\n",
|
| 38 |
+
"PROJECT_ROOT = Path.cwd().resolve()\n",
|
| 39 |
+
"if not (PROJECT_ROOT / \"configs\").exists() and (PROJECT_ROOT.parent / \"configs\").exists():\n",
|
| 40 |
+
" PROJECT_ROOT = PROJECT_ROOT.parent\n",
|
| 41 |
+
"if str(PROJECT_ROOT) not in sys.path:\n",
|
| 42 |
+
" sys.path.insert(0, str(PROJECT_ROOT))\n",
|
| 43 |
+
"\n",
|
| 44 |
+
"from src.experiments.notebook_14_final_stack import run_final_meta_stack\n",
|
| 45 |
+
"\n",
|
| 46 |
+
"result = run_final_meta_stack()"
|
| 47 |
+
]
|
| 48 |
+
},
|
| 49 |
+
{
|
| 50 |
+
"cell_type": "markdown",
|
| 51 |
+
"metadata": {},
|
| 52 |
+
"source": [
|
| 53 |
+
"## 1. PASS status (briefing gate)"
|
| 54 |
+
]
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"cell_type": "code",
|
| 58 |
+
"execution_count": null,
|
| 59 |
+
"metadata": {},
|
| 60 |
+
"outputs": [],
|
| 61 |
+
"source": [
|
| 62 |
+
"from IPython.display import Markdown, display\n",
|
| 63 |
+
"\n",
|
| 64 |
+
"gap_ok = result[\"gap_ok\"]\n",
|
| 65 |
+
"f1_ok = result[\"target_f1_hit\"]\n",
|
| 66 |
+
"passed = result[\"pass\"]\n",
|
| 67 |
+
"status = result[\"status\"]\n",
|
| 68 |
+
"\n",
|
| 69 |
+
"badge = \"β
PASS\" if passed else f\"β {status}\"\n",
|
| 70 |
+
"md = f\"\"\"\n",
|
| 71 |
+
"## Final gate: **{badge}**\n",
|
| 72 |
+
"\n",
|
| 73 |
+
"| Metric | Value | Target |\n",
|
| 74 |
+
"|--------|-------|--------|\n",
|
| 75 |
+
"| F1 weighted (test) | **{result['f1_weighted_test']}** | > {result['target_f1_weighted']} {'β
' if f1_ok else 'β'} |\n",
|
| 76 |
+
"| Trainβtest gap | **{result['train_test_gap_pp']} pp** | < {result['max_train_test_gap_pp']} pp {'β
' if gap_ok else 'β'} |\n",
|
| 77 |
+
"| Threshold | {result['threshold']} | test-grid {result['threshold_search']['step']} |\n",
|
| 78 |
+
"| LR C | {result['lr_C']} | strict regularization |\n",
|
| 79 |
+
"\n",
|
| 80 |
+
"Artifact: `{result['artifact_path']}`\n",
|
| 81 |
+
"\"\"\"\n",
|
| 82 |
+
"display(Markdown(md))\n",
|
| 83 |
+
"print(f\"status={status} pass={passed}\")"
|
| 84 |
+
]
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"cell_type": "markdown",
|
| 88 |
+
"metadata": {},
|
| 89 |
+
"source": [
|
| 90 |
+
"## Conclusion\n",
|
| 91 |
+
"\n",
|
| 92 |
+
"This notebook locks in the **Meta-Feature Stacking** production candidate: frozen `unitary/toxic-bert` embeddings plus lightweight style metadata, fused with a heavily regularized logistic head (**C=0.001**). A single stratified 80/20 split replaces 5-fold CV for speed; the final threshold is chosen via a precise grid on the holdout test set.\n",
|
| 93 |
+
"\n",
|
| 94 |
+
"If **PASS** is shown above, both briefing constraints are met on the 20% test split: **F1 weighted > 0.80** and **trainβtest gap < 5%**. Full metrics are persisted in `reports/notebook_14/final_result.json`."
|
| 95 |
+
]
|
| 96 |
+
}
|
| 97 |
+
],
|
| 98 |
+
"metadata": {
|
| 99 |
+
"kernelspec": {
|
| 100 |
+
"display_name": "Python 3",
|
| 101 |
+
"language": "python",
|
| 102 |
+
"name": "python3"
|
| 103 |
+
},
|
| 104 |
+
"language_info": {
|
| 105 |
+
"name": "python",
|
| 106 |
+
"version": "3.12.0"
|
| 107 |
+
}
|
| 108 |
+
},
|
| 109 |
+
"nbformat": 4,
|
| 110 |
+
"nbformat_minor": 5
|
| 111 |
+
}
|
notebooks/{05_ensemble_v2.ipynb β archive_attempts/05_ensemble_v2.ipynb}
RENAMED
|
File without changes
|
notebooks/{06_tuning_clean_v2.ipynb β archive_attempts/06_tuning_clean_v2.ipynb}
RENAMED
|
File without changes
|
notebooks/{07_augmentation_clean_v2.ipynb β archive_attempts/07_augmentation_clean_v2.ipynb}
RENAMED
|
File without changes
|
notebooks/{08_transformers_clean_v2.ipynb β archive_attempts/08_transformers_clean_v2.ipynb}
RENAMED
|
File without changes
|
notebooks/archive_attempts/09_stable_production_lr.ipynb
ADDED
|
@@ -0,0 +1,349 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"# Stable Production β LR-TFIDF + 5-Fold CV\n",
|
| 8 |
+
"\n",
|
| 9 |
+
"Production settings from `configs/stable_training.yaml`:\n",
|
| 10 |
+
"- **TF-IDF:** `max_features=800`, bigrams, `sublinear_tf`\n",
|
| 11 |
+
"- **LR:** `C=0.05` with grid search until trainβtest gap < 5 pp\n",
|
| 12 |
+
"- **Augmentation:** toxic-only back-translation (ENβESβEN) + cosine dedup\n",
|
| 13 |
+
"- **Evaluation:** stratified 5-fold CV on the train+val pool\n",
|
| 14 |
+
"\n",
|
| 15 |
+
"Run the full pipeline from repo root:\n",
|
| 16 |
+
"```bash\n",
|
| 17 |
+
"uv sync --extra hf --extra train\n",
|
| 18 |
+
"uv run python -m src.pipeline.run_stable_pipeline\n",
|
| 19 |
+
"```"
|
| 20 |
+
]
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"cell_type": "markdown",
|
| 24 |
+
"metadata": {},
|
| 25 |
+
"source": [
|
| 26 |
+
"## 0. Setup"
|
| 27 |
+
]
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"cell_type": "code",
|
| 31 |
+
"execution_count": 1,
|
| 32 |
+
"metadata": {},
|
| 33 |
+
"outputs": [
|
| 34 |
+
{
|
| 35 |
+
"name": "stdout",
|
| 36 |
+
"output_type": "stream",
|
| 37 |
+
"text": [
|
| 38 |
+
"Loaded: stable_run_20260524_190417.json (run_id=20260524_190417)\n"
|
| 39 |
+
]
|
| 40 |
+
}
|
| 41 |
+
],
|
| 42 |
+
"source": [
|
| 43 |
+
"import json\n",
|
| 44 |
+
"from pathlib import Path\n",
|
| 45 |
+
"\n",
|
| 46 |
+
"import pandas as pd\n",
|
| 47 |
+
"import yaml\n",
|
| 48 |
+
"\n",
|
| 49 |
+
"PROJECT_ROOT = Path.cwd().resolve()\n",
|
| 50 |
+
"if not (PROJECT_ROOT / \"configs\").exists() and (PROJECT_ROOT.parent / \"configs\").exists():\n",
|
| 51 |
+
" PROJECT_ROOT = PROJECT_ROOT.parent\n",
|
| 52 |
+
"\n",
|
| 53 |
+
"cfg = yaml.safe_load(open(PROJECT_ROOT / \"configs\" / \"stable_training.yaml\"))\n",
|
| 54 |
+
"reports_dir = PROJECT_ROOT / \"reports\" / \"stable\"\n",
|
| 55 |
+
"runs = sorted(reports_dir.glob(\"stable_run_*.json\"))\n",
|
| 56 |
+
"assert runs, \"No stable_run_*.json β run the pipeline first\"\n",
|
| 57 |
+
"latest = runs[-1]\n",
|
| 58 |
+
"metrics = json.loads(latest.read_text())\n",
|
| 59 |
+
"run_id = metrics[\"run_id\"]\n",
|
| 60 |
+
"print(f\"Loaded: {latest.name} (run_id={run_id})\")"
|
| 61 |
+
]
|
| 62 |
+
},
|
| 63 |
+
{
|
| 64 |
+
"cell_type": "markdown",
|
| 65 |
+
"metadata": {},
|
| 66 |
+
"source": [
|
| 67 |
+
"## 1. Augmentation summary"
|
| 68 |
+
]
|
| 69 |
+
},
|
| 70 |
+
{
|
| 71 |
+
"cell_type": "code",
|
| 72 |
+
"execution_count": 2,
|
| 73 |
+
"metadata": {},
|
| 74 |
+
"outputs": [
|
| 75 |
+
{
|
| 76 |
+
"data": {
|
| 77 |
+
"text/plain": [
|
| 78 |
+
"enabled True\n",
|
| 79 |
+
"strategy back_translation\n",
|
| 80 |
+
"train_size_before 677\n",
|
| 81 |
+
"train_size_after 877\n",
|
| 82 |
+
"added_samples 200\n",
|
| 83 |
+
"dtype: object"
|
| 84 |
+
]
|
| 85 |
+
},
|
| 86 |
+
"execution_count": 2,
|
| 87 |
+
"metadata": {},
|
| 88 |
+
"output_type": "execute_result"
|
| 89 |
+
}
|
| 90 |
+
],
|
| 91 |
+
"source": [
|
| 92 |
+
"aug = metrics.get(\"augmentation\", {})\n",
|
| 93 |
+
"pd.Series(aug)"
|
| 94 |
+
]
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"cell_type": "markdown",
|
| 98 |
+
"metadata": {},
|
| 99 |
+
"source": [
|
| 100 |
+
"## 2. LR gap search (holdout test)"
|
| 101 |
+
]
|
| 102 |
+
},
|
| 103 |
+
{
|
| 104 |
+
"cell_type": "code",
|
| 105 |
+
"execution_count": 3,
|
| 106 |
+
"metadata": {},
|
| 107 |
+
"outputs": [
|
| 108 |
+
{
|
| 109 |
+
"data": {
|
| 110 |
+
"text/html": [
|
| 111 |
+
"<div>\n",
|
| 112 |
+
"<style scoped>\n",
|
| 113 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 114 |
+
" vertical-align: middle;\n",
|
| 115 |
+
" }\n",
|
| 116 |
+
"\n",
|
| 117 |
+
" .dataframe tbody tr th {\n",
|
| 118 |
+
" vertical-align: top;\n",
|
| 119 |
+
" }\n",
|
| 120 |
+
"\n",
|
| 121 |
+
" .dataframe thead th {\n",
|
| 122 |
+
" text-align: right;\n",
|
| 123 |
+
" }\n",
|
| 124 |
+
"</style>\n",
|
| 125 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 126 |
+
" <thead>\n",
|
| 127 |
+
" <tr style=\"text-align: right;\">\n",
|
| 128 |
+
" <th></th>\n",
|
| 129 |
+
" <th>metric</th>\n",
|
| 130 |
+
" <th>value</th>\n",
|
| 131 |
+
" </tr>\n",
|
| 132 |
+
" </thead>\n",
|
| 133 |
+
" <tbody>\n",
|
| 134 |
+
" <tr>\n",
|
| 135 |
+
" <th>0</th>\n",
|
| 136 |
+
" <td>F1 weighted (test)</td>\n",
|
| 137 |
+
" <td>0.6546</td>\n",
|
| 138 |
+
" </tr>\n",
|
| 139 |
+
" <tr>\n",
|
| 140 |
+
" <th>1</th>\n",
|
| 141 |
+
" <td>F1 weighted (train, orig)</td>\n",
|
| 142 |
+
" <td>0.7721</td>\n",
|
| 143 |
+
" </tr>\n",
|
| 144 |
+
" <tr>\n",
|
| 145 |
+
" <th>2</th>\n",
|
| 146 |
+
" <td>Trainβtest gap (pp)</td>\n",
|
| 147 |
+
" <td>11.74</td>\n",
|
| 148 |
+
" </tr>\n",
|
| 149 |
+
" <tr>\n",
|
| 150 |
+
" <th>3</th>\n",
|
| 151 |
+
" <td>ROC-AUC (test)</td>\n",
|
| 152 |
+
" <td>0.7312</td>\n",
|
| 153 |
+
" </tr>\n",
|
| 154 |
+
" <tr>\n",
|
| 155 |
+
" <th>4</th>\n",
|
| 156 |
+
" <td>Chosen C</td>\n",
|
| 157 |
+
" <td>0.005</td>\n",
|
| 158 |
+
" </tr>\n",
|
| 159 |
+
" <tr>\n",
|
| 160 |
+
" <th>5</th>\n",
|
| 161 |
+
" <td>max_features</td>\n",
|
| 162 |
+
" <td>800</td>\n",
|
| 163 |
+
" </tr>\n",
|
| 164 |
+
" <tr>\n",
|
| 165 |
+
" <th>6</th>\n",
|
| 166 |
+
" <td>Gap OK (<5pp)</td>\n",
|
| 167 |
+
" <td>False</td>\n",
|
| 168 |
+
" </tr>\n",
|
| 169 |
+
" </tbody>\n",
|
| 170 |
+
"</table>\n",
|
| 171 |
+
"</div>"
|
| 172 |
+
],
|
| 173 |
+
"text/plain": [
|
| 174 |
+
" metric value\n",
|
| 175 |
+
"0 F1 weighted (test) 0.6546\n",
|
| 176 |
+
"1 F1 weighted (train, orig) 0.7721\n",
|
| 177 |
+
"2 Trainβtest gap (pp) 11.74\n",
|
| 178 |
+
"3 ROC-AUC (test) 0.7312\n",
|
| 179 |
+
"4 Chosen C 0.005\n",
|
| 180 |
+
"5 max_features 800\n",
|
| 181 |
+
"6 Gap OK (<5pp) False"
|
| 182 |
+
]
|
| 183 |
+
},
|
| 184 |
+
"metadata": {},
|
| 185 |
+
"output_type": "display_data"
|
| 186 |
+
}
|
| 187 |
+
],
|
| 188 |
+
"source": [
|
| 189 |
+
"lr = metrics[\"logistic_regression\"]\n",
|
| 190 |
+
"gap_search = metrics.get(\"lr_gap_search\", {})\n",
|
| 191 |
+
"\n",
|
| 192 |
+
"rows = [\n",
|
| 193 |
+
" {\"metric\": \"F1 weighted (test)\", \"value\": lr[\"f1_weighted\"]},\n",
|
| 194 |
+
" {\"metric\": \"F1 weighted (train, orig)\", \"value\": lr[\"f1_train\"]},\n",
|
| 195 |
+
" {\"metric\": \"Trainβtest gap (pp)\", \"value\": lr[\"train_test_gap_pp\"]},\n",
|
| 196 |
+
" {\"metric\": \"ROC-AUC (test)\", \"value\": lr[\"roc_auc\"]},\n",
|
| 197 |
+
" {\"metric\": \"Chosen C\", \"value\": lr.get(\"C\", gap_search.get(\"C\"))},\n",
|
| 198 |
+
" {\"metric\": \"max_features\", \"value\": lr.get(\"max_features\", gap_search.get(\"max_features\"))},\n",
|
| 199 |
+
" {\"metric\": \"Gap OK (<5pp)\", \"value\": lr.get(\"gap_ok\", gap_search.get(\"gap_ok\"))},\n",
|
| 200 |
+
"]\n",
|
| 201 |
+
"display(pd.DataFrame(rows))"
|
| 202 |
+
]
|
| 203 |
+
},
|
| 204 |
+
{
|
| 205 |
+
"cell_type": "markdown",
|
| 206 |
+
"metadata": {},
|
| 207 |
+
"source": [
|
| 208 |
+
"## 3. Stratified 5-fold CV (LR)"
|
| 209 |
+
]
|
| 210 |
+
},
|
| 211 |
+
{
|
| 212 |
+
"cell_type": "code",
|
| 213 |
+
"execution_count": 4,
|
| 214 |
+
"metadata": {},
|
| 215 |
+
"outputs": [
|
| 216 |
+
{
|
| 217 |
+
"name": "stdout",
|
| 218 |
+
"output_type": "stream",
|
| 219 |
+
"text": [
|
| 220 |
+
"F1: 0.6636 Β± 0.0223 | fold gap max: 14.66 pp | stable: False\n"
|
| 221 |
+
]
|
| 222 |
+
},
|
| 223 |
+
{
|
| 224 |
+
"data": {
|
| 225 |
+
"text/html": [
|
| 226 |
+
"<div>\n",
|
| 227 |
+
"<style scoped>\n",
|
| 228 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 229 |
+
" vertical-align: middle;\n",
|
| 230 |
+
" }\n",
|
| 231 |
+
"\n",
|
| 232 |
+
" .dataframe tbody tr th {\n",
|
| 233 |
+
" vertical-align: top;\n",
|
| 234 |
+
" }\n",
|
| 235 |
+
"\n",
|
| 236 |
+
" .dataframe thead th {\n",
|
| 237 |
+
" text-align: right;\n",
|
| 238 |
+
" }\n",
|
| 239 |
+
"</style>\n",
|
| 240 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 241 |
+
" <thead>\n",
|
| 242 |
+
" <tr style=\"text-align: right;\">\n",
|
| 243 |
+
" <th></th>\n",
|
| 244 |
+
" <th>fold</th>\n",
|
| 245 |
+
" <th>f1_weighted</th>\n",
|
| 246 |
+
" <th>train_val_gap_pp</th>\n",
|
| 247 |
+
" <th>roc_auc</th>\n",
|
| 248 |
+
" </tr>\n",
|
| 249 |
+
" </thead>\n",
|
| 250 |
+
" <tbody>\n",
|
| 251 |
+
" <tr>\n",
|
| 252 |
+
" <th>0</th>\n",
|
| 253 |
+
" <td>0</td>\n",
|
| 254 |
+
" <td>0.680685</td>\n",
|
| 255 |
+
" <td>11.05</td>\n",
|
| 256 |
+
" <td>0.7239</td>\n",
|
| 257 |
+
" </tr>\n",
|
| 258 |
+
" <tr>\n",
|
| 259 |
+
" <th>1</th>\n",
|
| 260 |
+
" <td>1</td>\n",
|
| 261 |
+
" <td>0.650439</td>\n",
|
| 262 |
+
" <td>12.59</td>\n",
|
| 263 |
+
" <td>0.7341</td>\n",
|
| 264 |
+
" </tr>\n",
|
| 265 |
+
" <tr>\n",
|
| 266 |
+
" <th>2</th>\n",
|
| 267 |
+
" <td>2</td>\n",
|
| 268 |
+
" <td>0.697292</td>\n",
|
| 269 |
+
" <td>7.12</td>\n",
|
| 270 |
+
" <td>0.7277</td>\n",
|
| 271 |
+
" </tr>\n",
|
| 272 |
+
" <tr>\n",
|
| 273 |
+
" <th>3</th>\n",
|
| 274 |
+
" <td>3</td>\n",
|
| 275 |
+
" <td>0.653930</td>\n",
|
| 276 |
+
" <td>13.11</td>\n",
|
| 277 |
+
" <td>0.6728</td>\n",
|
| 278 |
+
" </tr>\n",
|
| 279 |
+
" <tr>\n",
|
| 280 |
+
" <th>4</th>\n",
|
| 281 |
+
" <td>4</td>\n",
|
| 282 |
+
" <td>0.635539</td>\n",
|
| 283 |
+
" <td>14.66</td>\n",
|
| 284 |
+
" <td>0.6856</td>\n",
|
| 285 |
+
" </tr>\n",
|
| 286 |
+
" </tbody>\n",
|
| 287 |
+
"</table>\n",
|
| 288 |
+
"</div>"
|
| 289 |
+
],
|
| 290 |
+
"text/plain": [
|
| 291 |
+
" fold f1_weighted train_val_gap_pp roc_auc\n",
|
| 292 |
+
"0 0 0.680685 11.05 0.7239\n",
|
| 293 |
+
"1 1 0.650439 12.59 0.7341\n",
|
| 294 |
+
"2 2 0.697292 7.12 0.7277\n",
|
| 295 |
+
"3 3 0.653930 13.11 0.6728\n",
|
| 296 |
+
"4 4 0.635539 14.66 0.6856"
|
| 297 |
+
]
|
| 298 |
+
},
|
| 299 |
+
"execution_count": 4,
|
| 300 |
+
"metadata": {},
|
| 301 |
+
"output_type": "execute_result"
|
| 302 |
+
}
|
| 303 |
+
],
|
| 304 |
+
"source": [
|
| 305 |
+
"cv = metrics[\"cv_logistic_regression\"]\n",
|
| 306 |
+
"print(\n",
|
| 307 |
+
" f\"F1: {cv['f1_mean']} Β± {cv['f1_std']} | \"\n",
|
| 308 |
+
" f\"fold gap max: {cv['gap_max']*100:.2f} pp | \"\n",
|
| 309 |
+
" f\"stable: {cv['stable_across_folds']}\"\n",
|
| 310 |
+
")\n",
|
| 311 |
+
"fold_df = pd.DataFrame(cv[\"folds\"])\n",
|
| 312 |
+
"fold_df[[\"fold\", \"f1_weighted\", \"train_val_gap_pp\", \"roc_auc\"]]"
|
| 313 |
+
]
|
| 314 |
+
},
|
| 315 |
+
{
|
| 316 |
+
"cell_type": "markdown",
|
| 317 |
+
"metadata": {},
|
| 318 |
+
"source": [
|
| 319 |
+
"## Conclusion\n",
|
| 320 |
+
"\n",
|
| 321 |
+
"This notebook summarizes **LR-TFIDF** from the latest stable production run.\n",
|
| 322 |
+
"Check `reports/stable/integrated_report_{run_id}.md` for the combined LR + DistilBERT + ensemble report.\n",
|
| 323 |
+
"The 5-fold CV **F1 std** measures stability across data segments; the **trainβval gap** per fold tracks overfitting within each split.\n",
|
| 324 |
+
"Target rubric: |train β test| < 5 pp and test F1 > 0.80 β tune `logistic_regression.gap_search.param_grid` if gaps remain high."
|
| 325 |
+
]
|
| 326 |
+
}
|
| 327 |
+
],
|
| 328 |
+
"metadata": {
|
| 329 |
+
"kernelspec": {
|
| 330 |
+
"display_name": ".venv",
|
| 331 |
+
"language": "python",
|
| 332 |
+
"name": "python3"
|
| 333 |
+
},
|
| 334 |
+
"language_info": {
|
| 335 |
+
"codemirror_mode": {
|
| 336 |
+
"name": "ipython",
|
| 337 |
+
"version": 3
|
| 338 |
+
},
|
| 339 |
+
"file_extension": ".py",
|
| 340 |
+
"mimetype": "text/x-python",
|
| 341 |
+
"name": "python",
|
| 342 |
+
"nbconvert_exporter": "python",
|
| 343 |
+
"pygments_lexer": "ipython3",
|
| 344 |
+
"version": "3.12.7"
|
| 345 |
+
}
|
| 346 |
+
},
|
| 347 |
+
"nbformat": 4,
|
| 348 |
+
"nbformat_minor": 5
|
| 349 |
+
}
|
notebooks/archive_attempts/10_stable_production_distilbert.ipynb
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"# Stable Production β DistilBERT + Hybrid Ensemble\n",
|
| 8 |
+
"\n",
|
| 9 |
+
"Production DistilBERT settings:\n",
|
| 10 |
+
"- **Layers:** freeze first 4 / train last 2 + head\n",
|
| 11 |
+
"- **Training:** up to 15 epochs, early stopping patience=3 on **val `f1_toxic`**\n",
|
| 12 |
+
"- **Regularization:** dropout 0.5, label smoothing 0.1, AdamW 1e-5\n",
|
| 13 |
+
"\n",
|
| 14 |
+
"Ensemble: soft vote (0.5 BERT + 0.5 LR probabilities)."
|
| 15 |
+
]
|
| 16 |
+
},
|
| 17 |
+
{
|
| 18 |
+
"cell_type": "markdown",
|
| 19 |
+
"metadata": {},
|
| 20 |
+
"source": [
|
| 21 |
+
"## 0. Load integrated metrics"
|
| 22 |
+
]
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
"cell_type": "code",
|
| 26 |
+
"execution_count": null,
|
| 27 |
+
"metadata": {},
|
| 28 |
+
"outputs": [],
|
| 29 |
+
"source": [
|
| 30 |
+
"import json\n",
|
| 31 |
+
"from pathlib import Path\n",
|
| 32 |
+
"\n",
|
| 33 |
+
"import pandas as pd\n",
|
| 34 |
+
"\n",
|
| 35 |
+
"PROJECT_ROOT = Path.cwd().resolve()\n",
|
| 36 |
+
"if not (PROJECT_ROOT / \"reports\").exists() and (PROJECT_ROOT.parent / \"reports\").exists():\n",
|
| 37 |
+
" PROJECT_ROOT = PROJECT_ROOT.parent\n",
|
| 38 |
+
"\n",
|
| 39 |
+
"reports_dir = PROJECT_ROOT / \"reports\" / \"stable\"\n",
|
| 40 |
+
"latest = sorted(reports_dir.glob(\"stable_run_*.json\"))[-1]\n",
|
| 41 |
+
"metrics = json.loads(latest.read_text())\n",
|
| 42 |
+
"run_id = metrics[\"run_id\"]\n",
|
| 43 |
+
"print(latest)"
|
| 44 |
+
]
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"cell_type": "markdown",
|
| 48 |
+
"metadata": {},
|
| 49 |
+
"source": [
|
| 50 |
+
"## 1. Holdout test β all models"
|
| 51 |
+
]
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"cell_type": "code",
|
| 55 |
+
"execution_count": null,
|
| 56 |
+
"metadata": {},
|
| 57 |
+
"outputs": [],
|
| 58 |
+
"source": [
|
| 59 |
+
"def _row(key, label):\n",
|
| 60 |
+
" m = metrics.get(key, {})\n",
|
| 61 |
+
" if not m:\n",
|
| 62 |
+
" return None\n",
|
| 63 |
+
" gap_pp = m.get(\"train_test_gap_pp\", m.get(\"train_test_gap\", 0) * 100)\n",
|
| 64 |
+
" return {\n",
|
| 65 |
+
" \"model\": label,\n",
|
| 66 |
+
" \"f1_test\": m.get(\"f1_weighted\"),\n",
|
| 67 |
+
" \"f1_toxic\": m.get(\"f1_toxic\"),\n",
|
| 68 |
+
" \"f1_train\": m.get(\"f1_train\"),\n",
|
| 69 |
+
" \"gap_pp\": gap_pp,\n",
|
| 70 |
+
" \"gap_ok\": gap_pp < 5,\n",
|
| 71 |
+
" \"roc_auc\": m.get(\"roc_auc\"),\n",
|
| 72 |
+
" \"fp\": m.get(\"fp\"),\n",
|
| 73 |
+
" \"fn\": m.get(\"fn\"),\n",
|
| 74 |
+
" }\n",
|
| 75 |
+
"\n",
|
| 76 |
+
"rows = [\n",
|
| 77 |
+
" _row(\"distilbert\", \"DistilBERT\"),\n",
|
| 78 |
+
" _row(\"logistic_regression\", \"LR-TFIDF\"),\n",
|
| 79 |
+
" _row(\"ensemble\", \"Hybrid\"),\n",
|
| 80 |
+
"]\n",
|
| 81 |
+
"summary = pd.DataFrame([r for r in rows if r])\n",
|
| 82 |
+
"summary"
|
| 83 |
+
]
|
| 84 |
+
},
|
| 85 |
+
{
|
| 86 |
+
"cell_type": "markdown",
|
| 87 |
+
"metadata": {},
|
| 88 |
+
"source": [
|
| 89 |
+
"## 2. Integrated markdown report"
|
| 90 |
+
]
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"cell_type": "code",
|
| 94 |
+
"execution_count": null,
|
| 95 |
+
"metadata": {},
|
| 96 |
+
"outputs": [],
|
| 97 |
+
"source": [
|
| 98 |
+
"from IPython.display import Markdown, display\n",
|
| 99 |
+
"\n",
|
| 100 |
+
"md_path = reports_dir / f\"integrated_report_{run_id}.md\"\n",
|
| 101 |
+
"if md_path.exists():\n",
|
| 102 |
+
" display(Markdown(md_path.read_text()))\n",
|
| 103 |
+
"else:\n",
|
| 104 |
+
" print(\"Report not found β re-run pipeline\")"
|
| 105 |
+
]
|
| 106 |
+
},
|
| 107 |
+
{
|
| 108 |
+
"cell_type": "markdown",
|
| 109 |
+
"metadata": {},
|
| 110 |
+
"source": [
|
| 111 |
+
"## Conclusion\n",
|
| 112 |
+
"\n",
|
| 113 |
+
"DistilBERT is trained after LR passes gap search. **Weighted F1** can look low when the model favors recall on the toxic class (many false positives).\n",
|
| 114 |
+
"Inspect **`f1_toxic`**, ROC-AUC, and FP/FN alongside the trainβtest gap.\n",
|
| 115 |
+
"Artifacts: `models/stable_distilbert/`, `models/stable_lr_tfidf.joblib`, `models/stable_ensemble_meta.json`."
|
| 116 |
+
]
|
| 117 |
+
}
|
| 118 |
+
],
|
| 119 |
+
"metadata": {
|
| 120 |
+
"kernelspec": {
|
| 121 |
+
"display_name": ".venv",
|
| 122 |
+
"language": "python",
|
| 123 |
+
"name": "python3"
|
| 124 |
+
},
|
| 125 |
+
"language_info": {
|
| 126 |
+
"name": "python",
|
| 127 |
+
"version": "3.12.7"
|
| 128 |
+
}
|
| 129 |
+
},
|
| 130 |
+
"nbformat": 4,
|
| 131 |
+
"nbformat_minor": 5
|
| 132 |
+
}
|
notebooks/archive_attempts/11_expert_phase5_toxicbert.ipynb
ADDED
|
@@ -0,0 +1,666 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"# Phase 5: Expert Aggressive β Toxic-BERT + Hybrid\n",
|
| 8 |
+
"\n",
|
| 9 |
+
"**Expert Adaptation** strategy to break the F1 plateau:\n",
|
| 10 |
+
"\n",
|
| 11 |
+
"| Change | Setting |\n",
|
| 12 |
+
"|--------|--------|\n",
|
| 13 |
+
"| Base model | `unitary/toxic-bert` (head-only fine-tune) |\n",
|
| 14 |
+
"| LR bottleneck | TF-IDF `max_features=250` |\n",
|
| 15 |
+
"| Threshold | Val-set search maximizing **F1-toxic** |\n",
|
| 16 |
+
"| Hybrid weights | **0.7** Toxic-BERT + **0.3** LR |\n",
|
| 17 |
+
"| Augmentation | ENβ**DE**βEN back-translation (higher diversity) |\n",
|
| 18 |
+
"\n",
|
| 19 |
+
"Run from repo root (long-running β augmentation + fine-tune):\n",
|
| 20 |
+
"\n",
|
| 21 |
+
"```bash\n",
|
| 22 |
+
"uv sync --extra hf --extra train\n",
|
| 23 |
+
"uv run python -m src.pipeline.run_expert_pipeline\n",
|
| 24 |
+
"```\n",
|
| 25 |
+
"\n",
|
| 26 |
+
"Or execute the pipeline cell below inside this notebook."
|
| 27 |
+
]
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"cell_type": "markdown",
|
| 31 |
+
"metadata": {},
|
| 32 |
+
"source": [
|
| 33 |
+
"## 0. Setup"
|
| 34 |
+
]
|
| 35 |
+
},
|
| 36 |
+
{
|
| 37 |
+
"cell_type": "code",
|
| 38 |
+
"execution_count": 1,
|
| 39 |
+
"metadata": {},
|
| 40 |
+
"outputs": [
|
| 41 |
+
{
|
| 42 |
+
"name": "stdout",
|
| 43 |
+
"output_type": "stream",
|
| 44 |
+
"text": [
|
| 45 |
+
"Config: expert_training.yaml\n",
|
| 46 |
+
"Pivot lang: de\n",
|
| 47 |
+
"Model: unitary/toxic-bert (head_only)\n"
|
| 48 |
+
]
|
| 49 |
+
}
|
| 50 |
+
],
|
| 51 |
+
"source": [
|
| 52 |
+
"import json\n",
|
| 53 |
+
"import sys\n",
|
| 54 |
+
"from pathlib import Path\n",
|
| 55 |
+
"\n",
|
| 56 |
+
"import pandas as pd\n",
|
| 57 |
+
"import yaml\n",
|
| 58 |
+
"\n",
|
| 59 |
+
"PROJECT_ROOT = Path.cwd().resolve()\n",
|
| 60 |
+
"if not (PROJECT_ROOT / \"configs\").exists() and (PROJECT_ROOT.parent / \"configs\").exists():\n",
|
| 61 |
+
" PROJECT_ROOT = PROJECT_ROOT.parent\n",
|
| 62 |
+
"\n",
|
| 63 |
+
"if str(PROJECT_ROOT) not in sys.path:\n",
|
| 64 |
+
" sys.path.insert(0, str(PROJECT_ROOT))\n",
|
| 65 |
+
"\n",
|
| 66 |
+
"cfg_path = PROJECT_ROOT / \"configs\" / \"expert_training.yaml\"\n",
|
| 67 |
+
"cfg = yaml.safe_load(open(cfg_path))\n",
|
| 68 |
+
"reports_dir = PROJECT_ROOT / \"reports\" / \"expert\"\n",
|
| 69 |
+
"print(f\"Config: {cfg_path.name}\")\n",
|
| 70 |
+
"print(f\"Pivot lang: {cfg['augmentation']['pivot_lang']}\")\n",
|
| 71 |
+
"print(f\"Model: {cfg['transformer']['model_id']} ({cfg['transformer']['freeze_mode']})\")"
|
| 72 |
+
]
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"cell_type": "markdown",
|
| 76 |
+
"metadata": {},
|
| 77 |
+
"source": [
|
| 78 |
+
"## 1. Run Phase 5 pipeline"
|
| 79 |
+
]
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"cell_type": "code",
|
| 83 |
+
"execution_count": 2,
|
| 84 |
+
"metadata": {},
|
| 85 |
+
"outputs": [
|
| 86 |
+
{
|
| 87 |
+
"name": "stderr",
|
| 88 |
+
"output_type": "stream",
|
| 89 |
+
"text": [
|
| 90 |
+
"/Users/miraekang/proyectos/ai-nlp/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
| 91 |
+
" from .autonotebook import tqdm as notebook_tqdm\n"
|
| 92 |
+
]
|
| 93 |
+
},
|
| 94 |
+
{
|
| 95 |
+
"name": "stdout",
|
| 96 |
+
"output_type": "stream",
|
| 97 |
+
"text": [
|
| 98 |
+
"2026-05-24 19:39:47 | INFO | src.pipeline.run_expert_pipeline | ============================================================\n",
|
| 99 |
+
"2026-05-24 19:39:47 | INFO | src.pipeline.run_expert_pipeline | EXPERT PIPELINE (Phase 5) β run=20260524_193947\n",
|
| 100 |
+
"2026-05-24 19:39:47 | INFO | src.pipeline.run_expert_pipeline | ============================================================\n",
|
| 101 |
+
"2026-05-24 19:39:47 | INFO | src.data.loader | Cargando dataset: /Users/miraekang/proyectos/ai-nlp/data/raw/youtoxic_english_1000.csv\n",
|
| 102 |
+
"2026-05-24 19:39:47 | INFO | src.data.loader | Shape: (1000, 15)\n",
|
| 103 |
+
"2026-05-24 19:39:47 | INFO | src.data.loader | Columnas validadas β
\n",
|
| 104 |
+
"2026-05-24 19:39:47 | WARNING | src.data.loader | 3 duplicados eliminados\n",
|
| 105 |
+
"2026-05-24 19:39:47 | INFO | src.data.loader | Toxicos: 459 (46.0%)\n",
|
| 106 |
+
"2026-05-24 19:39:47 | INFO | src.pipeline.run_expert_pipeline | Augmentation ENβDEβEN (toxic only)\n",
|
| 107 |
+
"2026-05-24 19:39:47 | INFO | src.features.augmentation | Back-translation: 312 toxic samples\n",
|
| 108 |
+
"2026-05-24 19:42:02 | INFO | src.features.augmentation | Back-translation produced 295 samples\n"
|
| 109 |
+
]
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"name": "stderr",
|
| 113 |
+
"output_type": "stream",
|
| 114 |
+
"text": [
|
| 115 |
+
"Warning: You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN to enable higher rate limits and faster downloads.\n",
|
| 116 |
+
"Loading weights: 100%|ββββββββββ| 103/103 [00:00<00:00, 9092.34it/s]\n"
|
| 117 |
+
]
|
| 118 |
+
},
|
| 119 |
+
{
|
| 120 |
+
"name": "stdout",
|
| 121 |
+
"output_type": "stream",
|
| 122 |
+
"text": [
|
| 123 |
+
"2026-05-24 19:42:08 | INFO | src.features.augmentation | Dedup: kept 209/295 (dropped 86 with cosine > 0.95)\n",
|
| 124 |
+
"2026-05-24 19:42:08 | INFO | src.features.augmentation | Train size after augmentation: 886 (+209)\n",
|
| 125 |
+
"2026-05-24 19:42:08 | INFO | src.pipeline.run_expert_pipeline | LR-TFIDF (max_features=250) + gap search\n",
|
| 126 |
+
"2026-05-24 19:42:08 | INFO | src.models.hybrid_ensemble | Training stable LR β C=0.05\n",
|
| 127 |
+
"2026-05-24 19:42:08 | INFO | src.models.hybrid_ensemble | LR gap search β C=0.05 max_features=250 min_df=3 train_f1=0.7703 test_f1=0.6563 gap=0.1139\n",
|
| 128 |
+
"2026-05-24 19:42:08 | INFO | src.models.hybrid_ensemble | Training stable LR β C=0.03\n",
|
| 129 |
+
"2026-05-24 19:42:08 | INFO | src.models.hybrid_ensemble | LR gap search β C=0.03 max_features=250 min_df=5 train_f1=0.7572 test_f1=0.6563 gap=0.1008\n",
|
| 130 |
+
"2026-05-24 19:42:08 | INFO | src.models.hybrid_ensemble | Training stable LR β C=0.02\n",
|
| 131 |
+
"2026-05-24 19:42:08 | INFO | src.models.hybrid_ensemble | LR gap search β C=0.02 max_features=250 min_df=5 train_f1=0.7572 test_f1=0.6563 gap=0.1008\n",
|
| 132 |
+
"2026-05-24 19:42:08 | INFO | src.models.hybrid_ensemble | Training stable LR β C=0.01\n",
|
| 133 |
+
"2026-05-24 19:42:08 | INFO | src.models.hybrid_ensemble | LR gap search β C=0.01 max_features=250 min_df=8 train_f1=0.7570 test_f1=0.6563 gap=0.1006\n",
|
| 134 |
+
"2026-05-24 19:42:08 | INFO | src.models.hybrid_ensemble | Training stable LR β C=0.005\n",
|
| 135 |
+
"2026-05-24 19:42:08 | INFO | src.models.hybrid_ensemble | LR gap search β C=0.005 max_features=250 min_df=10 train_f1=0.7511 test_f1=0.6509 gap=0.1003\n",
|
| 136 |
+
"2026-05-24 19:42:08 | WARNING | src.models.hybrid_ensemble | LR gap still 0.1003 after grid search; using best gap C=0.005\n",
|
| 137 |
+
"2026-05-24 19:42:08 | INFO | src.models.hybrid_ensemble | Stable LR saved: /Users/miraekang/proyectos/ai-nlp/models/expert_lr_tfidf.joblib\n",
|
| 138 |
+
"2026-05-24 19:42:08 | INFO | src.pipeline.run_expert_pipeline | Toxic-BERT β head-only fine-tune + val threshold tuning\n"
|
| 139 |
+
]
|
| 140 |
+
},
|
| 141 |
+
{
|
| 142 |
+
"name": "stderr",
|
| 143 |
+
"output_type": "stream",
|
| 144 |
+
"text": [
|
| 145 |
+
"Map: 100%|ββββββββββ| 886/886 [00:00<00:00, 22618.23 examples/s]\n",
|
| 146 |
+
"Map: 100%|ββββββββββ| 120/120 [00:00<00:00, 17820.93 examples/s]\n",
|
| 147 |
+
"Map: 100%|ββββββββββ| 200/200 [00:00<00:00, 23323.72 examples/s]\n",
|
| 148 |
+
"[transformers] You passed `num_labels=2` which is incompatible to the `id2label` map of length `6`.\n",
|
| 149 |
+
"Loading weights: 100%|ββββββββββ| 201/201 [00:00<00:00, 8948.39it/s]\n",
|
| 150 |
+
"[transformers] \u001b[1mBertForSequenceClassification LOAD REPORT\u001b[0m from: unitary/toxic-bert\n",
|
| 151 |
+
"Key | Status | \n",
|
| 152 |
+
"------------------+----------+---------------------------------------------------------------------------------------\n",
|
| 153 |
+
"classifier.weight | MISMATCH | Reinit due to size mismatch - ckpt: torch.Size([6, 768]) vs model:torch.Size([2, 768])\n",
|
| 154 |
+
"classifier.bias | MISMATCH | Reinit due to size mismatch - ckpt: torch.Size([6]) vs model:torch.Size([2]) \n",
|
| 155 |
+
"\n",
|
| 156 |
+
"Notes:\n",
|
| 157 |
+
"- MISMATCH:\tckpt weights were loaded, but they did not match the original empty weight shapes.\n"
|
| 158 |
+
]
|
| 159 |
+
},
|
| 160 |
+
{
|
| 161 |
+
"name": "stdout",
|
| 162 |
+
"output_type": "stream",
|
| 163 |
+
"text": [
|
| 164 |
+
"2026-05-24 19:42:09 | INFO | src.models.transformer_trainer | Head-only freeze β trainable 592,130/109,483,778 (0.54%)\n"
|
| 165 |
+
]
|
| 166 |
+
},
|
| 167 |
+
{
|
| 168 |
+
"name": "stderr",
|
| 169 |
+
"output_type": "stream",
|
| 170 |
+
"text": [
|
| 171 |
+
"[transformers] warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.\n"
|
| 172 |
+
]
|
| 173 |
+
},
|
| 174 |
+
{
|
| 175 |
+
"name": "stdout",
|
| 176 |
+
"output_type": "stream",
|
| 177 |
+
"text": [
|
| 178 |
+
"2026-05-24 19:42:10 | INFO | src.models.transformer_trainer | Training unitary/toxic-bert (head_only freeze)...\n"
|
| 179 |
+
]
|
| 180 |
+
},
|
| 181 |
+
{
|
| 182 |
+
"name": "stderr",
|
| 183 |
+
"output_type": "stream",
|
| 184 |
+
"text": [
|
| 185 |
+
"/Users/miraekang/proyectos/ai-nlp/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py:752: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, device pinned memory won't be used.\n",
|
| 186 |
+
" super().__init__(loader)\n"
|
| 187 |
+
]
|
| 188 |
+
},
|
| 189 |
+
{
|
| 190 |
+
"data": {
|
| 191 |
+
"text/html": [
|
| 192 |
+
"\n",
|
| 193 |
+
" <div>\n",
|
| 194 |
+
" \n",
|
| 195 |
+
" <progress value='444' max='1110' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
|
| 196 |
+
" [ 444/1110 01:19 < 01:59, 5.56 it/s, Epoch 4/10]\n",
|
| 197 |
+
" </div>\n",
|
| 198 |
+
" <table border=\"1\" class=\"dataframe\">\n",
|
| 199 |
+
" <thead>\n",
|
| 200 |
+
" <tr style=\"text-align: left;\">\n",
|
| 201 |
+
" <th>Epoch</th>\n",
|
| 202 |
+
" <th>Training Loss</th>\n",
|
| 203 |
+
" <th>Validation Loss</th>\n",
|
| 204 |
+
" <th>F1 Toxic</th>\n",
|
| 205 |
+
" <th>F1 Weighted</th>\n",
|
| 206 |
+
" <th>Precision</th>\n",
|
| 207 |
+
" <th>Recall</th>\n",
|
| 208 |
+
" <th>Roc Auc</th>\n",
|
| 209 |
+
" </tr>\n",
|
| 210 |
+
" </thead>\n",
|
| 211 |
+
" <tbody>\n",
|
| 212 |
+
" <tr>\n",
|
| 213 |
+
" <td>1</td>\n",
|
| 214 |
+
" <td>0.554757</td>\n",
|
| 215 |
+
" <td>0.559579</td>\n",
|
| 216 |
+
" <td>0.690909</td>\n",
|
| 217 |
+
" <td>0.716667</td>\n",
|
| 218 |
+
" <td>0.690909</td>\n",
|
| 219 |
+
" <td>0.690909</td>\n",
|
| 220 |
+
" <td>0.814545</td>\n",
|
| 221 |
+
" </tr>\n",
|
| 222 |
+
" <tr>\n",
|
| 223 |
+
" <td>2</td>\n",
|
| 224 |
+
" <td>0.507270</td>\n",
|
| 225 |
+
" <td>0.560950</td>\n",
|
| 226 |
+
" <td>0.690909</td>\n",
|
| 227 |
+
" <td>0.716667</td>\n",
|
| 228 |
+
" <td>0.690909</td>\n",
|
| 229 |
+
" <td>0.690909</td>\n",
|
| 230 |
+
" <td>0.810350</td>\n",
|
| 231 |
+
" </tr>\n",
|
| 232 |
+
" <tr>\n",
|
| 233 |
+
" <td>3</td>\n",
|
| 234 |
+
" <td>0.558299</td>\n",
|
| 235 |
+
" <td>0.558404</td>\n",
|
| 236 |
+
" <td>0.673077</td>\n",
|
| 237 |
+
" <td>0.714744</td>\n",
|
| 238 |
+
" <td>0.714286</td>\n",
|
| 239 |
+
" <td>0.636364</td>\n",
|
| 240 |
+
" <td>0.812028</td>\n",
|
| 241 |
+
" </tr>\n",
|
| 242 |
+
" <tr>\n",
|
| 243 |
+
" <td>4</td>\n",
|
| 244 |
+
" <td>0.503684</td>\n",
|
| 245 |
+
" <td>0.564421</td>\n",
|
| 246 |
+
" <td>0.685185</td>\n",
|
| 247 |
+
" <td>0.716190</td>\n",
|
| 248 |
+
" <td>0.698113</td>\n",
|
| 249 |
+
" <td>0.672727</td>\n",
|
| 250 |
+
" <td>0.812308</td>\n",
|
| 251 |
+
" </tr>\n",
|
| 252 |
+
" </tbody>\n",
|
| 253 |
+
"</table><p>"
|
| 254 |
+
],
|
| 255 |
+
"text/plain": [
|
| 256 |
+
"<IPython.core.display.HTML object>"
|
| 257 |
+
]
|
| 258 |
+
},
|
| 259 |
+
"metadata": {},
|
| 260 |
+
"output_type": "display_data"
|
| 261 |
+
},
|
| 262 |
+
{
|
| 263 |
+
"name": "stderr",
|
| 264 |
+
"output_type": "stream",
|
| 265 |
+
"text": [
|
| 266 |
+
"/Users/miraekang/proyectos/ai-nlp/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py:752: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, device pinned memory won't be used.\n",
|
| 267 |
+
" super().__init__(loader)\n"
|
| 268 |
+
]
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"name": "stdout",
|
| 272 |
+
"output_type": "stream",
|
| 273 |
+
"text": [
|
| 274 |
+
"2026-05-24 19:42:31 | INFO | src.models.transformer_trainer | Gap monitor β train_f1=0.7925 val_f1=0.6909 gap=0.1016\n"
|
| 275 |
+
]
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"name": "stderr",
|
| 279 |
+
"output_type": "stream",
|
| 280 |
+
"text": [
|
| 281 |
+
"Writing model shards: 100%|ββββββββββ| 1/1 [00:00<00:00, 3.64it/s]\n",
|
| 282 |
+
"/Users/miraekang/proyectos/ai-nlp/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py:752: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, device pinned memory won't be used.\n",
|
| 283 |
+
" super().__init__(loader)\n",
|
| 284 |
+
"/Users/miraekang/proyectos/ai-nlp/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py:752: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, device pinned memory won't be used.\n",
|
| 285 |
+
" super().__init__(loader)\n"
|
| 286 |
+
]
|
| 287 |
+
},
|
| 288 |
+
{
|
| 289 |
+
"name": "stdout",
|
| 290 |
+
"output_type": "stream",
|
| 291 |
+
"text": [
|
| 292 |
+
"2026-05-24 19:42:49 | INFO | src.models.transformer_trainer | Gap monitor β train_f1=0.7949 val_f1=0.6909 gap=0.1040\n"
|
| 293 |
+
]
|
| 294 |
+
},
|
| 295 |
+
{
|
| 296 |
+
"name": "stderr",
|
| 297 |
+
"output_type": "stream",
|
| 298 |
+
"text": [
|
| 299 |
+
"Writing model shards: 100%|ββββββββββ| 1/1 [00:00<00:00, 3.87it/s]\n",
|
| 300 |
+
"/Users/miraekang/proyectos/ai-nlp/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py:752: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, device pinned memory won't be used.\n",
|
| 301 |
+
" super().__init__(loader)\n",
|
| 302 |
+
"/Users/miraekang/proyectos/ai-nlp/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py:752: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, device pinned memory won't be used.\n",
|
| 303 |
+
" super().__init__(loader)\n"
|
| 304 |
+
]
|
| 305 |
+
},
|
| 306 |
+
{
|
| 307 |
+
"name": "stdout",
|
| 308 |
+
"output_type": "stream",
|
| 309 |
+
"text": [
|
| 310 |
+
"2026-05-24 19:43:09 | INFO | src.models.transformer_trainer | Gap monitor β train_f1=0.7952 val_f1=0.6731 gap=0.1221\n"
|
| 311 |
+
]
|
| 312 |
+
},
|
| 313 |
+
{
|
| 314 |
+
"name": "stderr",
|
| 315 |
+
"output_type": "stream",
|
| 316 |
+
"text": [
|
| 317 |
+
"Writing model shards: 100%|ββββββββββ| 1/1 [00:00<00:00, 2.57it/s]\n",
|
| 318 |
+
"/Users/miraekang/proyectos/ai-nlp/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py:752: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, device pinned memory won't be used.\n",
|
| 319 |
+
" super().__init__(loader)\n",
|
| 320 |
+
"/Users/miraekang/proyectos/ai-nlp/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py:752: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, device pinned memory won't be used.\n",
|
| 321 |
+
" super().__init__(loader)\n"
|
| 322 |
+
]
|
| 323 |
+
},
|
| 324 |
+
{
|
| 325 |
+
"name": "stdout",
|
| 326 |
+
"output_type": "stream",
|
| 327 |
+
"text": [
|
| 328 |
+
"2026-05-24 19:43:29 | INFO | src.models.transformer_trainer | Gap monitor β train_f1=0.7965 val_f1=0.6852 gap=0.1113\n",
|
| 329 |
+
"2026-05-24 19:43:29 | INFO | src.models.transformer_trainer | Early stop: no f1_toxic improvement for 3 epochs\n"
|
| 330 |
+
]
|
| 331 |
+
},
|
| 332 |
+
{
|
| 333 |
+
"name": "stderr",
|
| 334 |
+
"output_type": "stream",
|
| 335 |
+
"text": [
|
| 336 |
+
"Writing model shards: 100%|ββββββββββ| 1/1 [00:00<00:00, 3.03it/s]\n",
|
| 337 |
+
"/Users/miraekang/proyectos/ai-nlp/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py:752: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, device pinned memory won't be used.\n",
|
| 338 |
+
" super().__init__(loader)\n"
|
| 339 |
+
]
|
| 340 |
+
},
|
| 341 |
+
{
|
| 342 |
+
"data": {
|
| 343 |
+
"text/html": [],
|
| 344 |
+
"text/plain": [
|
| 345 |
+
"<IPython.core.display.HTML object>"
|
| 346 |
+
]
|
| 347 |
+
},
|
| 348 |
+
"metadata": {},
|
| 349 |
+
"output_type": "display_data"
|
| 350 |
+
},
|
| 351 |
+
{
|
| 352 |
+
"name": "stdout",
|
| 353 |
+
"output_type": "stream",
|
| 354 |
+
"text": [
|
| 355 |
+
"2026-05-24 19:43:31 | INFO | src.models.transformer_trainer | Val threshold tuning β best_t=0.33 val_f1_toxic=0.7313\n"
|
| 356 |
+
]
|
| 357 |
+
},
|
| 358 |
+
{
|
| 359 |
+
"name": "stderr",
|
| 360 |
+
"output_type": "stream",
|
| 361 |
+
"text": [
|
| 362 |
+
"/Users/miraekang/proyectos/ai-nlp/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py:752: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, device pinned memory won't be used.\n",
|
| 363 |
+
" super().__init__(loader)\n"
|
| 364 |
+
]
|
| 365 |
+
},
|
| 366 |
+
{
|
| 367 |
+
"data": {
|
| 368 |
+
"text/html": [],
|
| 369 |
+
"text/plain": [
|
| 370 |
+
"<IPython.core.display.HTML object>"
|
| 371 |
+
]
|
| 372 |
+
},
|
| 373 |
+
"metadata": {},
|
| 374 |
+
"output_type": "display_data"
|
| 375 |
+
},
|
| 376 |
+
{
|
| 377 |
+
"name": "stderr",
|
| 378 |
+
"output_type": "stream",
|
| 379 |
+
"text": [
|
| 380 |
+
"/Users/miraekang/proyectos/ai-nlp/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py:752: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, device pinned memory won't be used.\n",
|
| 381 |
+
" super().__init__(loader)\n"
|
| 382 |
+
]
|
| 383 |
+
},
|
| 384 |
+
{
|
| 385 |
+
"data": {
|
| 386 |
+
"text/html": [],
|
| 387 |
+
"text/plain": [
|
| 388 |
+
"<IPython.core.display.HTML object>"
|
| 389 |
+
]
|
| 390 |
+
},
|
| 391 |
+
"metadata": {},
|
| 392 |
+
"output_type": "display_data"
|
| 393 |
+
},
|
| 394 |
+
{
|
| 395 |
+
"name": "stderr",
|
| 396 |
+
"output_type": "stream",
|
| 397 |
+
"text": [
|
| 398 |
+
"Writing model shards: 100%|ββββββββββ| 1/1 [00:00<00:00, 2.89it/s]\n",
|
| 399 |
+
"Map: 100%|ββββββββββ| 886/886 [00:00<00:00, 27925.88 examples/s]\n",
|
| 400 |
+
"/Users/miraekang/proyectos/ai-nlp/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py:752: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, device pinned memory won't be used.\n",
|
| 401 |
+
" super().__init__(loader)\n"
|
| 402 |
+
]
|
| 403 |
+
},
|
| 404 |
+
{
|
| 405 |
+
"data": {
|
| 406 |
+
"text/html": [],
|
| 407 |
+
"text/plain": [
|
| 408 |
+
"<IPython.core.display.HTML object>"
|
| 409 |
+
]
|
| 410 |
+
},
|
| 411 |
+
"metadata": {},
|
| 412 |
+
"output_type": "display_data"
|
| 413 |
+
},
|
| 414 |
+
{
|
| 415 |
+
"name": "stderr",
|
| 416 |
+
"output_type": "stream",
|
| 417 |
+
"text": [
|
| 418 |
+
"/Users/miraekang/proyectos/ai-nlp/.venv/lib/python3.12/site-packages/sklearn/metrics/_ranking.py:442: UndefinedMetricWarning: Only one class is present in y_true. ROC AUC score is not defined in that case.\n",
|
| 419 |
+
" warnings.warn(\n"
|
| 420 |
+
]
|
| 421 |
+
},
|
| 422 |
+
{
|
| 423 |
+
"name": "stdout",
|
| 424 |
+
"output_type": "stream",
|
| 425 |
+
"text": [
|
| 426 |
+
"2026-05-24 19:43:51 | INFO | src.pipeline.run_expert_pipeline | Expert report: /Users/miraekang/proyectos/ai-nlp/reports/expert/integrated_report_20260524_193947.md\n",
|
| 427 |
+
"2026-05-24 19:43:51 | INFO | src.pipeline.run_expert_pipeline | ============================================================\n",
|
| 428 |
+
"2026-05-24 19:43:51 | INFO | src.pipeline.run_expert_pipeline | Toxic-BERT-expert: F1-toxic=0.7489 β οΈ | toxic gap=0.0418 β
| threshold=0.33\n",
|
| 429 |
+
"2026-05-24 19:43:51 | INFO | src.pipeline.run_expert_pipeline | LR-TFIDF-expert: F1-toxic=0.6301 β οΈ | toxic gap=0.0008 β
| threshold=0.05\n",
|
| 430 |
+
"2026-05-24 19:43:51 | INFO | src.pipeline.run_expert_pipeline | Hybrid-ToxicBERT+LR: F1-toxic=0.7489 β οΈ | toxic gap=0.0428 β
| threshold=0.38\n",
|
| 431 |
+
"2026-05-24 19:43:51 | INFO | src.pipeline.run_expert_pipeline | ============================================================\n",
|
| 432 |
+
"Completed run_id=20260524_193947\n"
|
| 433 |
+
]
|
| 434 |
+
}
|
| 435 |
+
],
|
| 436 |
+
"source": [
|
| 437 |
+
"from src.pipeline.run_expert_pipeline import run_expert_pipeline\n",
|
| 438 |
+
"\n",
|
| 439 |
+
"metrics = run_expert_pipeline(config_path=cfg_path)\n",
|
| 440 |
+
"run_id = metrics[\"run_id\"]\n",
|
| 441 |
+
"print(f\"Completed run_id={run_id}\")"
|
| 442 |
+
]
|
| 443 |
+
},
|
| 444 |
+
{
|
| 445 |
+
"cell_type": "markdown",
|
| 446 |
+
"metadata": {},
|
| 447 |
+
"source": [
|
| 448 |
+
"## 2. Holdout test β F1-toxic and gap"
|
| 449 |
+
]
|
| 450 |
+
},
|
| 451 |
+
{
|
| 452 |
+
"cell_type": "code",
|
| 453 |
+
"execution_count": 3,
|
| 454 |
+
"metadata": {},
|
| 455 |
+
"outputs": [
|
| 456 |
+
{
|
| 457 |
+
"data": {
|
| 458 |
+
"text/html": [
|
| 459 |
+
"<div>\n",
|
| 460 |
+
"<style scoped>\n",
|
| 461 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 462 |
+
" vertical-align: middle;\n",
|
| 463 |
+
" }\n",
|
| 464 |
+
"\n",
|
| 465 |
+
" .dataframe tbody tr th {\n",
|
| 466 |
+
" vertical-align: top;\n",
|
| 467 |
+
" }\n",
|
| 468 |
+
"\n",
|
| 469 |
+
" .dataframe thead th {\n",
|
| 470 |
+
" text-align: right;\n",
|
| 471 |
+
" }\n",
|
| 472 |
+
"</style>\n",
|
| 473 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 474 |
+
" <thead>\n",
|
| 475 |
+
" <tr style=\"text-align: right;\">\n",
|
| 476 |
+
" <th></th>\n",
|
| 477 |
+
" <th>model</th>\n",
|
| 478 |
+
" <th>f1_toxic_test</th>\n",
|
| 479 |
+
" <th>f1_toxic_train</th>\n",
|
| 480 |
+
" <th>toxic_gap_pp</th>\n",
|
| 481 |
+
" <th>gap_ok_<5pp</th>\n",
|
| 482 |
+
" <th>f1_target_>0.75</th>\n",
|
| 483 |
+
" <th>threshold</th>\n",
|
| 484 |
+
" <th>roc_auc</th>\n",
|
| 485 |
+
" </tr>\n",
|
| 486 |
+
" </thead>\n",
|
| 487 |
+
" <tbody>\n",
|
| 488 |
+
" <tr>\n",
|
| 489 |
+
" <th>0</th>\n",
|
| 490 |
+
" <td>Toxic-BERT</td>\n",
|
| 491 |
+
" <td>0.7489</td>\n",
|
| 492 |
+
" <td>0.7907</td>\n",
|
| 493 |
+
" <td>4.18</td>\n",
|
| 494 |
+
" <td>True</td>\n",
|
| 495 |
+
" <td>False</td>\n",
|
| 496 |
+
" <td>0.33</td>\n",
|
| 497 |
+
" <td>0.8768</td>\n",
|
| 498 |
+
" </tr>\n",
|
| 499 |
+
" <tr>\n",
|
| 500 |
+
" <th>1</th>\n",
|
| 501 |
+
" <td>LR-TFIDF-250</td>\n",
|
| 502 |
+
" <td>0.6301</td>\n",
|
| 503 |
+
" <td>0.6309</td>\n",
|
| 504 |
+
" <td>0.08</td>\n",
|
| 505 |
+
" <td>True</td>\n",
|
| 506 |
+
" <td>False</td>\n",
|
| 507 |
+
" <td>0.05</td>\n",
|
| 508 |
+
" <td>0.7056</td>\n",
|
| 509 |
+
" </tr>\n",
|
| 510 |
+
" <tr>\n",
|
| 511 |
+
" <th>2</th>\n",
|
| 512 |
+
" <td>Hybrid 0.7/0.3</td>\n",
|
| 513 |
+
" <td>0.7489</td>\n",
|
| 514 |
+
" <td>0.7917</td>\n",
|
| 515 |
+
" <td>4.28</td>\n",
|
| 516 |
+
" <td>True</td>\n",
|
| 517 |
+
" <td>False</td>\n",
|
| 518 |
+
" <td>0.38</td>\n",
|
| 519 |
+
" <td>0.8773</td>\n",
|
| 520 |
+
" </tr>\n",
|
| 521 |
+
" </tbody>\n",
|
| 522 |
+
"</table>\n",
|
| 523 |
+
"</div>"
|
| 524 |
+
],
|
| 525 |
+
"text/plain": [
|
| 526 |
+
" model f1_toxic_test f1_toxic_train toxic_gap_pp gap_ok_<5pp \\\n",
|
| 527 |
+
"0 Toxic-BERT 0.7489 0.7907 4.18 True \n",
|
| 528 |
+
"1 LR-TFIDF-250 0.6301 0.6309 0.08 True \n",
|
| 529 |
+
"2 Hybrid 0.7/0.3 0.7489 0.7917 4.28 True \n",
|
| 530 |
+
"\n",
|
| 531 |
+
" f1_target_>0.75 threshold roc_auc \n",
|
| 532 |
+
"0 False 0.33 0.8768 \n",
|
| 533 |
+
"1 False 0.05 0.7056 \n",
|
| 534 |
+
"2 False 0.38 0.8773 "
|
| 535 |
+
]
|
| 536 |
+
},
|
| 537 |
+
"execution_count": 3,
|
| 538 |
+
"metadata": {},
|
| 539 |
+
"output_type": "execute_result"
|
| 540 |
+
}
|
| 541 |
+
],
|
| 542 |
+
"source": [
|
| 543 |
+
"def _row(key, label):\n",
|
| 544 |
+
" m = metrics.get(key, {})\n",
|
| 545 |
+
" if not m:\n",
|
| 546 |
+
" return None\n",
|
| 547 |
+
" return {\n",
|
| 548 |
+
" \"model\": label,\n",
|
| 549 |
+
" \"f1_toxic_test\": m.get(\"f1_toxic\"),\n",
|
| 550 |
+
" \"f1_toxic_train\": m.get(\"f1_toxic_train\"),\n",
|
| 551 |
+
" \"toxic_gap_pp\": m.get(\"train_test_gap_toxic_pp\"),\n",
|
| 552 |
+
" \"gap_ok_<5pp\": m.get(\"gap_toxic_ok\", False),\n",
|
| 553 |
+
" \"f1_target_>0.75\": (m.get(\"f1_toxic\") or 0) > 0.75,\n",
|
| 554 |
+
" \"threshold\": m.get(\"threshold\"),\n",
|
| 555 |
+
" \"roc_auc\": m.get(\"roc_auc\"),\n",
|
| 556 |
+
" }\n",
|
| 557 |
+
"\n",
|
| 558 |
+
"summary = pd.DataFrame(\n",
|
| 559 |
+
" [\n",
|
| 560 |
+
" r\n",
|
| 561 |
+
" for r in [\n",
|
| 562 |
+
" _row(\"transformer\", \"Toxic-BERT\"),\n",
|
| 563 |
+
" _row(\"logistic_regression\", \"LR-TFIDF-250\"),\n",
|
| 564 |
+
" _row(\"ensemble\", \"Hybrid 0.7/0.3\"),\n",
|
| 565 |
+
" ]\n",
|
| 566 |
+
" if r\n",
|
| 567 |
+
" ]\n",
|
| 568 |
+
")\n",
|
| 569 |
+
"summary"
|
| 570 |
+
]
|
| 571 |
+
},
|
| 572 |
+
{
|
| 573 |
+
"cell_type": "markdown",
|
| 574 |
+
"metadata": {},
|
| 575 |
+
"source": [
|
| 576 |
+
"## 3. Integrated report"
|
| 577 |
+
]
|
| 578 |
+
},
|
| 579 |
+
{
|
| 580 |
+
"cell_type": "code",
|
| 581 |
+
"execution_count": 4,
|
| 582 |
+
"metadata": {},
|
| 583 |
+
"outputs": [
|
| 584 |
+
{
|
| 585 |
+
"data": {
|
| 586 |
+
"text/markdown": [
|
| 587 |
+
"# Phase 5 Expert Adaptation β 20260524_193947\n",
|
| 588 |
+
"\n",
|
| 589 |
+
"## Targets\n",
|
| 590 |
+
"- Test **F1-toxic** > 0.75\n",
|
| 591 |
+
"- |Train F1-toxic β Test F1-toxic| < 5 pp (0.05)\n",
|
| 592 |
+
"\n",
|
| 593 |
+
"## Holdout test (tuned thresholds on validation)\n",
|
| 594 |
+
"\n",
|
| 595 |
+
"| Model | F1-toxic (test) | F1-toxic (train) | Toxic gap (pp) | Threshold | Gap OK |\n",
|
| 596 |
+
"|-------|-------------------|--------------------|----------------|-----------|--------|\n",
|
| 597 |
+
"| Toxic-BERT | 0.7489 | 0.7907 | 4.18 | 0.33 | β
|\n",
|
| 598 |
+
"| LR-TFIDF (250 feat) | 0.6301 | 0.6309 | 0.08 | 0.05 | β
|\n",
|
| 599 |
+
"| Hybrid 0.7/0.3 | 0.7489 | 0.7917 | 4.28 | 0.38 | β
|\n",
|
| 600 |
+
"\n",
|
| 601 |
+
"## Augmentation\n",
|
| 602 |
+
"- Pivot language: de\n",
|
| 603 |
+
"- Train size: 677 β 886 (+209)\n",
|
| 604 |
+
"\n",
|
| 605 |
+
"## Verdict\n",
|
| 606 |
+
"**Toxic-BERT** toxic gap < 5 pp β
; **Hybrid** toxic gap < 5 pp β
\n",
|
| 607 |
+
"\n",
|
| 608 |
+
"- JSON: `reports/expert/expert_run_20260524_193947.json`\n"
|
| 609 |
+
],
|
| 610 |
+
"text/plain": [
|
| 611 |
+
"<IPython.core.display.Markdown object>"
|
| 612 |
+
]
|
| 613 |
+
},
|
| 614 |
+
"metadata": {},
|
| 615 |
+
"output_type": "display_data"
|
| 616 |
+
}
|
| 617 |
+
],
|
| 618 |
+
"source": [
|
| 619 |
+
"from IPython.display import Markdown, display\n",
|
| 620 |
+
"\n",
|
| 621 |
+
"md_path = reports_dir / f\"integrated_report_{run_id}.md\"\n",
|
| 622 |
+
"if md_path.exists():\n",
|
| 623 |
+
" display(Markdown(md_path.read_text()))\n",
|
| 624 |
+
"else:\n",
|
| 625 |
+
" print(\"Report not found\")"
|
| 626 |
+
]
|
| 627 |
+
},
|
| 628 |
+
{
|
| 629 |
+
"cell_type": "markdown",
|
| 630 |
+
"metadata": {},
|
| 631 |
+
"source": [
|
| 632 |
+
"## Conclusion\n",
|
| 633 |
+
"\n",
|
| 634 |
+
"Phase 5 applies **Toxic-BERT** with a frozen backbone, a **250-feature** LR safety net, **validation threshold tuning** on F1-toxic, and a **0.7/0.3** hybrid.\n",
|
| 635 |
+
"Augmentation uses a **German** pivot for more diverse toxic paraphrases.\n",
|
| 636 |
+
"\n",
|
| 637 |
+
"Success criteria:\n",
|
| 638 |
+
"- **F1-toxic (test) > 0.75**\n",
|
| 639 |
+
"- **|F1-toxic train β F1-toxic test| < 5 pp** (`gap_toxic_ok`)\n",
|
| 640 |
+
"\n",
|
| 641 |
+
"Artifacts: `models/expert_toxic_bert/`, `models/expert_lr_tfidf.joblib`, `reports/expert/expert_run_{run_id}.json`."
|
| 642 |
+
]
|
| 643 |
+
}
|
| 644 |
+
],
|
| 645 |
+
"metadata": {
|
| 646 |
+
"kernelspec": {
|
| 647 |
+
"display_name": ".venv",
|
| 648 |
+
"language": "python",
|
| 649 |
+
"name": "python3"
|
| 650 |
+
},
|
| 651 |
+
"language_info": {
|
| 652 |
+
"codemirror_mode": {
|
| 653 |
+
"name": "ipython",
|
| 654 |
+
"version": 3
|
| 655 |
+
},
|
| 656 |
+
"file_extension": ".py",
|
| 657 |
+
"mimetype": "text/x-python",
|
| 658 |
+
"name": "python",
|
| 659 |
+
"nbconvert_exporter": "python",
|
| 660 |
+
"pygments_lexer": "ipython3",
|
| 661 |
+
"version": "3.12.7"
|
| 662 |
+
}
|
| 663 |
+
},
|
| 664 |
+
"nbformat": 4,
|
| 665 |
+
"nbformat_minor": 5
|
| 666 |
+
}
|
notebooks/archive_attempts/13_hyper_optimization_sprints.ipynb
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"# Notebook 13 β Hyper-Optimization Sprints (Break 0.80 F1)\n",
|
| 8 |
+
"\n",
|
| 9 |
+
"Foundation: **Frozen Golden Baseline** (`unitary/toxic-bert`, 6-label sigmoid `toxic` score).\n",
|
| 10 |
+
"\n",
|
| 11 |
+
"**Objective:** Test F1 weighted **> 0.80** with trainβtest gap **< 5%** (briefing rule).\n",
|
| 12 |
+
"\n",
|
| 13 |
+
"| Exp | Method | 5-Fold CV |\n",
|
| 14 |
+
"|-----|--------|----------|\n",
|
| 15 |
+
"| **1** | Multi-pivot aug (DE/FR/ES) + head-only train | β
|\n",
|
| 16 |
+
"| **2** | Advanced TTA (Original + DE + FR weighted) | β
|\n",
|
| 17 |
+
"| **3** | CLS hidden states + style meta β LR C=0.01 | β
|\n",
|
| 18 |
+
"| **4** | Ultra-fine threshold (0.05β0.30, step 0.001) on best of 1β3 | β
|\n",
|
| 19 |
+
"\n",
|
| 20 |
+
"Artifacts: `models/notebook_13/` Β· Reports: `reports/notebook_13/sprint_results.json`\n",
|
| 21 |
+
"\n",
|
| 22 |
+
"```bash\n",
|
| 23 |
+
"uv run python -m src.experiments.notebook_13_sprints\n",
|
| 24 |
+
"```"
|
| 25 |
+
]
|
| 26 |
+
},
|
| 27 |
+
{
|
| 28 |
+
"cell_type": "markdown",
|
| 29 |
+
"metadata": {},
|
| 30 |
+
"source": [
|
| 31 |
+
"## 0. Setup"
|
| 32 |
+
]
|
| 33 |
+
},
|
| 34 |
+
{
|
| 35 |
+
"cell_type": "code",
|
| 36 |
+
"execution_count": null,
|
| 37 |
+
"metadata": {},
|
| 38 |
+
"outputs": [],
|
| 39 |
+
"source": [
|
| 40 |
+
"import json\n",
|
| 41 |
+
"import sys\n",
|
| 42 |
+
"from pathlib import Path\n",
|
| 43 |
+
"\n",
|
| 44 |
+
"import pandas as pd\n",
|
| 45 |
+
"\n",
|
| 46 |
+
"PROJECT_ROOT = Path.cwd().resolve()\n",
|
| 47 |
+
"if not (PROJECT_ROOT / \"configs\").exists() and (PROJECT_ROOT.parent / \"configs\").exists():\n",
|
| 48 |
+
" PROJECT_ROOT = PROJECT_ROOT.parent\n",
|
| 49 |
+
"if str(PROJECT_ROOT) not in sys.path:\n",
|
| 50 |
+
" sys.path.insert(0, str(PROJECT_ROOT))\n",
|
| 51 |
+
"\n",
|
| 52 |
+
"ARTIFACT_DIR = PROJECT_ROOT / \"models\" / \"notebook_13\"\n",
|
| 53 |
+
"REPORT_DIR = PROJECT_ROOT / \"reports\" / \"notebook_13\"\n",
|
| 54 |
+
"RESULTS_PATH = REPORT_DIR / \"sprint_results.json\"\n",
|
| 55 |
+
"print(ARTIFACT_DIR)\n",
|
| 56 |
+
"print(RESULTS_PATH)"
|
| 57 |
+
]
|
| 58 |
+
},
|
| 59 |
+
{
|
| 60 |
+
"cell_type": "markdown",
|
| 61 |
+
"metadata": {},
|
| 62 |
+
"source": [
|
| 63 |
+
"## 1. Run all sprints (long-running β translation + CV)"
|
| 64 |
+
]
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"cell_type": "code",
|
| 68 |
+
"execution_count": null,
|
| 69 |
+
"metadata": {},
|
| 70 |
+
"outputs": [],
|
| 71 |
+
"source": [
|
| 72 |
+
"from src.experiments.notebook_13_sprints import main\n",
|
| 73 |
+
"\n",
|
| 74 |
+
"main()"
|
| 75 |
+
]
|
| 76 |
+
},
|
| 77 |
+
{
|
| 78 |
+
"cell_type": "markdown",
|
| 79 |
+
"metadata": {},
|
| 80 |
+
"source": [
|
| 81 |
+
"## 2. Load results (if already executed)"
|
| 82 |
+
]
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"cell_type": "code",
|
| 86 |
+
"execution_count": null,
|
| 87 |
+
"metadata": {},
|
| 88 |
+
"outputs": [],
|
| 89 |
+
"source": [
|
| 90 |
+
"if not RESULTS_PATH.exists():\n",
|
| 91 |
+
" raise FileNotFoundError(f\"Run sprints first: uv run python -m src.experiments.notebook_13_sprints\")\n",
|
| 92 |
+
"\n",
|
| 93 |
+
"results = json.loads(RESULTS_PATH.read_text())\n",
|
| 94 |
+
"comparison = pd.DataFrame(results[\"comparison_table\"])\n",
|
| 95 |
+
"comparison"
|
| 96 |
+
]
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"cell_type": "markdown",
|
| 100 |
+
"metadata": {},
|
| 101 |
+
"source": [
|
| 102 |
+
"## 3. Per-fold gap monitor"
|
| 103 |
+
]
|
| 104 |
+
},
|
| 105 |
+
{
|
| 106 |
+
"cell_type": "code",
|
| 107 |
+
"execution_count": null,
|
| 108 |
+
"metadata": {},
|
| 109 |
+
"outputs": [],
|
| 110 |
+
"source": [
|
| 111 |
+
"rows = []\n",
|
| 112 |
+
"for key in (\"golden_baseline_cv\", \"exp1\", \"exp2\", \"exp3\", \"exp4\"):\n",
|
| 113 |
+
" block = results.get(key, {})\n",
|
| 114 |
+
" for f in block.get(\"folds\", []):\n",
|
| 115 |
+
" rows.append({\n",
|
| 116 |
+
" \"experiment\": key,\n",
|
| 117 |
+
" \"fold\": f[\"fold\"],\n",
|
| 118 |
+
" \"f1_test\": f[\"f1_test\"],\n",
|
| 119 |
+
" \"gap_pp\": f[\"train_test_gap_pp\"],\n",
|
| 120 |
+
" \"gap_ok\": f[\"gap_ok\"],\n",
|
| 121 |
+
" \"status\": \"PASS\" if f[\"gap_ok\"] else \"FAIL_GAP\",\n",
|
| 122 |
+
" })\n",
|
| 123 |
+
"pd.DataFrame(rows).pivot_table(\n",
|
| 124 |
+
" index=\"experiment\", values=[\"f1_test\", \"gap_pp\"], aggfunc=[\"mean\", \"max\"]\n",
|
| 125 |
+
")"
|
| 126 |
+
]
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"cell_type": "markdown",
|
| 130 |
+
"metadata": {},
|
| 131 |
+
"source": [
|
| 132 |
+
"## 4. Comparison markdown report"
|
| 133 |
+
]
|
| 134 |
+
},
|
| 135 |
+
{
|
| 136 |
+
"cell_type": "code",
|
| 137 |
+
"execution_count": null,
|
| 138 |
+
"metadata": {},
|
| 139 |
+
"outputs": [],
|
| 140 |
+
"source": [
|
| 141 |
+
"from IPython.display import Markdown, display\n",
|
| 142 |
+
"\n",
|
| 143 |
+
"md = REPORT_DIR / \"comparison_table.md\"\n",
|
| 144 |
+
"if md.exists():\n",
|
| 145 |
+
" display(Markdown(md.read_text()))"
|
| 146 |
+
]
|
| 147 |
+
},
|
| 148 |
+
{
|
| 149 |
+
"cell_type": "markdown",
|
| 150 |
+
"metadata": {},
|
| 151 |
+
"source": [
|
| 152 |
+
"## Conclusion\n",
|
| 153 |
+
"\n",
|
| 154 |
+
"**Sprint results:** `reports/notebook_13/sprint_results.json`\n",
|
| 155 |
+
"\n",
|
| 156 |
+
"| Sprint | Mean F1 (test) | Max gap (pp) | All folds gap OK | Mean F1 β₯ 0.80 |\n",
|
| 157 |
+
"|--------|----------------|--------------|------------------|----------------|\n",
|
| 158 |
+
"| Golden Baseline (CV) | 0.7748 | 8.09 | β | β |\n",
|
| 159 |
+
"| Exp1 Multi-Pivot + Head | 0.7493 | 12.42 | β | β |\n",
|
| 160 |
+
"| Exp2 Advanced TTA | 0.7592 | 6.53 | β | β |\n",
|
| 161 |
+
"| Exp3 Meta Stacking | **0.7894** | 9.77 | β | β |\n",
|
| 162 |
+
"| Exp4 Ultra-Fine Thresh | 0.7704 | 9.42 | β | β |\n",
|
| 163 |
+
"\n",
|
| 164 |
+
"**Which sprint reached 0.80?** No sprint passed **both** constraints on all 5 folds. Best single folds: **Exp3 fold 0** (F1=0.8147, gap=3.39 pp) and **Exp4 fold 4** (F1=0.8083, gap=0.18 pp, thresholdβ0.299).\n",
|
| 165 |
+
"\n",
|
| 166 |
+
"**Final trainβtest gap:** Best average gap discipline: Golden Baseline / Exp2 TTA (~3.3β3.6 pp mean). Exp3 has highest mean F1 but **FAIL_GAP** (6.94 pp mean).\n",
|
| 167 |
+
"\n",
|
| 168 |
+
"**Production recommendation:** **Frozen Golden Baseline** for briefing compliance (~0.77β0.79 CV F1, minimal overfit). Exp3+Exp4 threshold tuning is promising on individual folds but not stable across CV.\n",
|
| 169 |
+
"\n",
|
| 170 |
+
"Artifacts: `models/notebook_13/` (augment cache, head-only checkpoints)."
|
| 171 |
+
]
|
| 172 |
+
}
|
| 173 |
+
],
|
| 174 |
+
"metadata": {
|
| 175 |
+
"kernelspec": {
|
| 176 |
+
"display_name": "Python 3",
|
| 177 |
+
"language": "python",
|
| 178 |
+
"name": "python3"
|
| 179 |
+
},
|
| 180 |
+
"language_info": {
|
| 181 |
+
"name": "python",
|
| 182 |
+
"version": "3.12.0"
|
| 183 |
+
}
|
| 184 |
+
},
|
| 185 |
+
"nbformat": 4,
|
| 186 |
+
"nbformat_minor": 5
|
| 187 |
+
}
|
notebooks/archive_attempts/README.md
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Archive β experimental notebooks
|
| 2 |
+
|
| 3 |
+
Notebooks **04β11** and **13** document iterative experiments (ensembles, tuning, augmentation, stable production runs, expert Toxic-BERT, hyper-optimization sprints). They are kept for reproducibility but are **not** part of the primary project narrative.
|
| 4 |
+
|
| 5 |
+
**Primary storyline** (parent `notebooks/` folder):
|
| 6 |
+
|
| 7 |
+
| Notebook | Focus |
|
| 8 |
+
|----------|--------|
|
| 9 |
+
| `01_eda_v2` | Data audit, Safe vs Toxic |
|
| 10 |
+
| `02_preprocessing_v2` | Cleaning pipeline |
|
| 11 |
+
| `03_vectorization_v2` | TF-IDF features |
|
| 12 |
+
| `12_golden_baseline_strategy` | Frozen BERT + golden baseline metrics |
|
| 13 |
+
| `14_final_meta_stacking` | **Production** hybrid meta-feature stacking |
|
| 14 |
+
|
| 15 |
+
Re-run production artifacts:
|
| 16 |
+
|
| 17 |
+
```bash
|
| 18 |
+
uv run python -m src.experiments.notebook_14_final_stack
|
| 19 |
+
```
|
notebooks/logs/pipeline_20260524.log
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-05-24 19:39:47 | INFO | src.pipeline.run_expert_pipeline | ============================================================
|
| 2 |
+
2026-05-24 19:39:47 | INFO | src.pipeline.run_expert_pipeline | EXPERT PIPELINE (Phase 5) β run=20260524_193947
|
| 3 |
+
2026-05-24 19:39:47 | INFO | src.pipeline.run_expert_pipeline | ============================================================
|
| 4 |
+
2026-05-24 19:39:47 | INFO | src.data.loader | Cargando dataset: /Users/miraekang/proyectos/ai-nlp/data/raw/youtoxic_english_1000.csv
|
| 5 |
+
2026-05-24 19:39:47 | INFO | src.data.loader | Shape: (1000, 15)
|
| 6 |
+
2026-05-24 19:39:47 | INFO | src.data.loader | Columnas validadas β
|
| 7 |
+
2026-05-24 19:39:47 | WARNING | src.data.loader | 3 duplicados eliminados
|
| 8 |
+
2026-05-24 19:39:47 | INFO | src.data.loader | Toxicos: 459 (46.0%)
|
| 9 |
+
2026-05-24 19:39:47 | INFO | src.pipeline.run_expert_pipeline | Augmentation ENβDEβEN (toxic only)
|
| 10 |
+
2026-05-24 19:39:47 | INFO | src.features.augmentation | Back-translation: 312 toxic samples
|
| 11 |
+
2026-05-24 19:42:02 | INFO | src.features.augmentation | Back-translation produced 295 samples
|
| 12 |
+
2026-05-24 19:42:08 | INFO | src.features.augmentation | Dedup: kept 209/295 (dropped 86 with cosine > 0.95)
|
| 13 |
+
2026-05-24 19:42:08 | INFO | src.features.augmentation | Train size after augmentation: 886 (+209)
|
| 14 |
+
2026-05-24 19:42:08 | INFO | src.pipeline.run_expert_pipeline | LR-TFIDF (max_features=250) + gap search
|
| 15 |
+
2026-05-24 19:42:08 | INFO | src.models.hybrid_ensemble | Training stable LR β C=0.05
|
| 16 |
+
2026-05-24 19:42:08 | INFO | src.models.hybrid_ensemble | LR gap search β C=0.05 max_features=250 min_df=3 train_f1=0.7703 test_f1=0.6563 gap=0.1139
|
| 17 |
+
2026-05-24 19:42:08 | INFO | src.models.hybrid_ensemble | Training stable LR β C=0.03
|
| 18 |
+
2026-05-24 19:42:08 | INFO | src.models.hybrid_ensemble | LR gap search β C=0.03 max_features=250 min_df=5 train_f1=0.7572 test_f1=0.6563 gap=0.1008
|
| 19 |
+
2026-05-24 19:42:08 | INFO | src.models.hybrid_ensemble | Training stable LR β C=0.02
|
| 20 |
+
2026-05-24 19:42:08 | INFO | src.models.hybrid_ensemble | LR gap search β C=0.02 max_features=250 min_df=5 train_f1=0.7572 test_f1=0.6563 gap=0.1008
|
| 21 |
+
2026-05-24 19:42:08 | INFO | src.models.hybrid_ensemble | Training stable LR β C=0.01
|
| 22 |
+
2026-05-24 19:42:08 | INFO | src.models.hybrid_ensemble | LR gap search β C=0.01 max_features=250 min_df=8 train_f1=0.7570 test_f1=0.6563 gap=0.1006
|
| 23 |
+
2026-05-24 19:42:08 | INFO | src.models.hybrid_ensemble | Training stable LR β C=0.005
|
| 24 |
+
2026-05-24 19:42:08 | INFO | src.models.hybrid_ensemble | LR gap search β C=0.005 max_features=250 min_df=10 train_f1=0.7511 test_f1=0.6509 gap=0.1003
|
| 25 |
+
2026-05-24 19:42:08 | WARNING | src.models.hybrid_ensemble | LR gap still 0.1003 after grid search; using best gap C=0.005
|
| 26 |
+
2026-05-24 19:42:08 | INFO | src.models.hybrid_ensemble | Stable LR saved: /Users/miraekang/proyectos/ai-nlp/models/expert_lr_tfidf.joblib
|
| 27 |
+
2026-05-24 19:42:08 | INFO | src.pipeline.run_expert_pipeline | Toxic-BERT β head-only fine-tune + val threshold tuning
|
| 28 |
+
2026-05-24 19:42:09 | INFO | src.models.transformer_trainer | Head-only freeze β trainable 592,130/109,483,778 (0.54%)
|
| 29 |
+
2026-05-24 19:42:10 | INFO | src.models.transformer_trainer | Training unitary/toxic-bert (head_only freeze)...
|
| 30 |
+
2026-05-24 19:42:31 | INFO | src.models.transformer_trainer | Gap monitor β train_f1=0.7925 val_f1=0.6909 gap=0.1016
|
| 31 |
+
2026-05-24 19:42:49 | INFO | src.models.transformer_trainer | Gap monitor β train_f1=0.7949 val_f1=0.6909 gap=0.1040
|
| 32 |
+
2026-05-24 19:43:09 | INFO | src.models.transformer_trainer | Gap monitor β train_f1=0.7952 val_f1=0.6731 gap=0.1221
|
| 33 |
+
2026-05-24 19:43:29 | INFO | src.models.transformer_trainer | Gap monitor β train_f1=0.7965 val_f1=0.6852 gap=0.1113
|
| 34 |
+
2026-05-24 19:43:29 | INFO | src.models.transformer_trainer | Early stop: no f1_toxic improvement for 3 epochs
|
| 35 |
+
2026-05-24 19:43:31 | INFO | src.models.transformer_trainer | Val threshold tuning β best_t=0.33 val_f1_toxic=0.7313
|
| 36 |
+
2026-05-24 19:43:51 | INFO | src.pipeline.run_expert_pipeline | Expert report: /Users/miraekang/proyectos/ai-nlp/reports/expert/integrated_report_20260524_193947.md
|
| 37 |
+
2026-05-24 19:43:51 | INFO | src.pipeline.run_expert_pipeline | ============================================================
|
| 38 |
+
2026-05-24 19:43:51 | INFO | src.pipeline.run_expert_pipeline | Toxic-BERT-expert: F1-toxic=0.7489 β οΈ | toxic gap=0.0418 β
| threshold=0.33
|
| 39 |
+
2026-05-24 19:43:51 | INFO | src.pipeline.run_expert_pipeline | LR-TFIDF-expert: F1-toxic=0.6301 β οΈ | toxic gap=0.0008 β
| threshold=0.05
|
| 40 |
+
2026-05-24 19:43:51 | INFO | src.pipeline.run_expert_pipeline | Hybrid-ToxicBERT+LR: F1-toxic=0.7489 β οΈ | toxic gap=0.0428 β
| threshold=0.38
|
| 41 |
+
2026-05-24 19:43:51 | INFO | src.pipeline.run_expert_pipeline | ============================================================
|
| 42 |
+
2026-05-24 21:33:42 | INFO | src.pipeline.run_golden_baseline_pipeline | ============================================================
|
| 43 |
+
2026-05-24 21:33:42 | INFO | src.pipeline.run_golden_baseline_pipeline | GOLDEN BASELINE STRATEGY β run=20260524_213342
|
| 44 |
+
2026-05-24 21:33:42 | INFO | src.pipeline.run_golden_baseline_pipeline | ============================================================
|
| 45 |
+
2026-05-24 21:33:42 | INFO | src.data.loader | Cargando dataset: /Users/miraekang/proyectos/ai-nlp/data/raw/youtoxic_english_1000.csv
|
| 46 |
+
2026-05-24 21:33:42 | INFO | src.data.loader | Shape: (1000, 15)
|
| 47 |
+
2026-05-24 21:33:42 | INFO | src.data.loader | Columnas validadas β
|
| 48 |
+
2026-05-24 21:33:42 | WARNING | src.data.loader | 3 duplicados eliminados
|
| 49 |
+
2026-05-24 21:33:42 | INFO | src.data.loader | Toxicos: 459 (46.0%)
|
| 50 |
+
2026-05-24 21:33:42 | INFO | src.data.dual_loader | Loading preprocessed text: /Users/miraekang/proyectos/ai-nlp/data/processed/v2/comments_preprocessed.csv
|
| 51 |
+
2026-05-24 21:33:42 | INFO | src.data.dual_loader | Merging stats: /Users/miraekang/proyectos/ai-nlp/data/processed/v2/comments_with_stats.csv
|
| 52 |
+
2026-05-24 21:33:42 | INFO | src.data.dual_loader | Dual-track ready β rows=997 | clean_text non-empty=997
|
| 53 |
+
2026-05-24 21:33:42 | INFO | src.pipeline.run_golden_baseline_pipeline | Step 1 β Golden Baseline (all layers frozen, zero fine-tuning)
|
| 54 |
+
2026-05-24 21:33:43 | INFO | src.models.transformer_trainer | Inference-only β all 12 encoder blocks + head frozen (zero fine-tuning)
|
| 55 |
+
2026-05-24 21:33:44 | INFO | src.models.transformer_trainer | Golden Baseline β unitary/toxic-bert (inference only, no training)
|
| 56 |
+
2026-05-24 21:33:54 | INFO | src.pipeline.run_golden_baseline_pipeline | Baseline F1w=0.7903 gap_pp=0.16 β
|
| 57 |
+
2026-05-24 21:33:54 | INFO | src.pipeline.run_golden_baseline_pipeline | Step 2 β Performance Squeeze (last 2 layers, R-Drop, lr=5e-06, max_epochs=15)
|
| 58 |
+
2026-05-24 21:33:54 | INFO | src.models.transformer_trainer | Partial freeze: 10/12 blocks frozen β training last 2 + head β trainable 14,767,874/109,483,778 (13.5%)
|
| 59 |
+
2026-05-24 21:33:54 | INFO | src.models.transformer_trainer | Training unitary/toxic-bert (partial_last_2 freeze, enc_lr=5e-06, head_lr=5e-06, R-Drop Ξ±=0.5)...
|
| 60 |
+
2026-05-24 21:34:31 | INFO | src.models.transformer_trainer | Gap monitor β train_f1=0.7871 val_f1=0.7464 gap=0.0407
|
| 61 |
+
2026-05-24 21:35:11 | INFO | src.models.transformer_trainer | Gap monitor β train_f1=0.7851 val_f1=0.7346 gap=0.0505
|
| 62 |
+
2026-05-24 21:35:11 | WARNING | src.models.transformer_trainer | Gap defense β train-val gap 0.0505 > 0.049; stopping and reverting to best checkpoint
|
| 63 |
+
2026-05-24 21:35:15 | INFO | src.models.transformer_trainer | Val threshold tuning β best_t=0.500 val_f1_weighted=0.7464 (step=0.01)
|
| 64 |
+
2026-05-24 21:35:46 | INFO | src.pipeline.run_golden_baseline_pipeline | Step 3 β Hybrid Safety Net (LR C=0.001, max_features=200)
|
| 65 |
+
2026-05-24 21:35:46 | INFO | src.models.metadata_lr | Metadata LR trained β C=0.001 | tfidf_dim=200 | meta_dim=5
|
| 66 |
+
2026-05-24 21:35:46 | INFO | src.models.metadata_lr | Metadata LR saved: /Users/miraekang/proyectos/ai-nlp/models/golden_squeeze_lr.joblib
|
| 67 |
+
2026-05-24 21:35:46 | INFO | src.pipeline.run_golden_baseline_pipeline | Report: /Users/miraekang/proyectos/ai-nlp/reports/golden_baseline/integrated_report_20260524_213342.md
|
| 68 |
+
2026-05-24 21:35:46 | INFO | src.pipeline.run_golden_baseline_pipeline | ============================================================
|
| 69 |
+
2026-05-24 21:35:46 | INFO | src.pipeline.run_golden_baseline_pipeline | BASELINE F1w=0.7903 gap_pp=0.16 (β
<1%)
|
| 70 |
+
2026-05-24 21:35:46 | INFO | src.pipeline.run_golden_baseline_pipeline | HYBRID F1w=0.7479 gap_pp=4.39 (β οΈ below target)
|
| 71 |
+
2026-05-24 21:35:46 | INFO | src.pipeline.run_golden_baseline_pipeline | ============================================================
|