Mirae Kang commited on
Commit Β·
975d796
1
Parent(s): 2b1423b
fix: modify evaluation pipeline, #7
Browse files- reports/pipeline/lr/cm_lr.png +3 -0
- reports/pipeline/lr/errors_lr.csv +53 -0
- reports/pipeline/lr/exp_20260523_163516_lr.json +26 -0
- reports/pipeline/lr/exp_20260523_163600_lr.json +26 -0
- reports/pipeline/lr/roc_lr.png +3 -0
- reports/summary.csv +10 -0
- src/evaluation/__init__.py +5 -0
- src/evaluation/evaluator.py +121 -31
- src/pipeline/run_pipeline.py +14 -18
reports/pipeline/lr/cm_lr.png
ADDED
|
Git LFS Details
|
reports/pipeline/lr/errors_lr.csv
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
text,real,pred,prob_toxic,tipo_error
|
| 2 |
+
fuck time freeway,False,True,0.6415007346665947,falso_positivo
|
| 3 |
+
man officer get lucky life still ruin juror would convict black way,False,True,0.5716710269810534,falso_positivo
|
| 4 |
+
police car purposefully leave destroy get anger instead business cost destruction car pass state taxpayer,False,True,0.503612868866628,falso_positivo
|
| 5 |
+
traffic bad enough,False,True,0.5106084760625215,falso_positivo
|
| 6 |
+
people get shoot everyday come one get attention,False,True,0.5624723994474933,falso_positivo
|
| 7 |
+
feel black america understand everything,False,True,0.5146301249872574,falso_positivo
|
| 8 |
+
head read protester take far,False,True,0.513207971133486,falso_positivo
|
| 9 |
+
stand side freeway instead block thousand car,False,True,0.5251542626479927,falso_positivo
|
| 10 |
+
lose shit guy lmao,False,True,0.5746623918084022,falso_positivo
|
| 11 |
+
take medical cannabis guy couple face book fight page mean necessarily aggressive,False,True,0.5036380541041356,falso_positivo
|
| 12 |
+
cop shoot white guy would riot since shoot black guy everyone seem think black look evidence skin color everyone always throw race card crap,False,True,0.5970904349206777,falso_positivo
|
| 13 |
+
good job guy raise taxis,False,True,0.5008660396035933,falso_positivo
|
| 14 |
+
people translate conversation funniest sad shit ever funny get wrong sad people reach every direction find hole get cop,False,True,0.5340463945332806,falso_positivo
|
| 15 |
+
weird white pretext police officer white must less risk would black police officer would kill white would take year prison I may white white black equal,False,True,0.5870245290128406,falso_positivo
|
| 16 |
+
obama show hos face city yet support sad,False,True,0.5103506243372347,falso_positivo
|
| 17 |
+
everyone say black people white people white woman get shoot rubber bullet come next day two black people help walk,False,True,0.5578356563804213,falso_positivo
|
| 18 |
+
real question need ask would mike brown rush cop already draw weapon already shoot way answer show true color think black male would disregard life kill cop escalate punishment probation petty theft death penalty kill cop utterly absurd truly believe someone go start college next week would sorry racist,False,True,0.5443162086132329,falso_positivo
|
| 19 |
+
agree police action kill boy mean protest loot destroy neighbourhood stupid make people want support struggle,False,True,0.5236958135962575,falso_positivo
|
| 20 |
+
call peacefully protest burn car lol,False,True,0.5116161189286824,falso_positivo
|
| 21 |
+
spread word year unarmed white guy shoot black cop,False,True,0.5538455860954972,falso_positivo
|
| 22 |
+
gas grenade pepper spray knight stick would apply,False,True,0.5147355292873231,falso_positivo
|
| 23 |
+
police shoot black guy black riot black shoot black happen,False,True,0.5956566810148107,falso_positivo
|
| 24 |
+
amen peggy hubbard color make difference person inside innocent nine year old little girl one attention people want cause chaos black white go exactly obama want look country mess admire courage tell peggy hubbard,True,False,0.40495127807343234,falso_negativo
|
| 25 |
+
ah beautiful,True,False,0.46757257821393866,falso_negativo
|
| 26 |
+
bullshit stef police officer pepper spray stun gun number non lethal method immediate disposal real question version event hold true gun first option officer go,True,False,0.4753839163064213,falso_negativo
|
| 27 |
+
stefan ever victim police discrimination game white supremacy deny involvement play victim black complain never fail,True,False,0.4502233884252265,falso_negativo
|
| 28 |
+
bottom line michael brown drug addict criminal violent criminal anyone feel bad get kill unless criminal,True,False,0.4974324511920543,falso_negativo
|
| 29 |
+
jimmy dore dickhead square,True,False,0.46757257821393866,falso_negativo
|
| 30 |
+
blacklivesmatter build deceit lie,True,False,0.45884138016676057,falso_negativo
|
| 31 |
+
educate dumb assss rap rap real people dispise,True,False,0.42647914768536693,falso_negativo
|
| 32 |
+
push clerk around really make I lose sympathy definitely tell guy likely dick.but hell need new tv might well riot go loot one honor,True,False,0.4460833711504373,falso_negativo
|
| 33 |
+
michael brown thug god bless thin blue line graduate police academy december,True,False,0.44310473756276547,falso_negativo
|
| 34 |
+
protect serve people michael brown guy deserve kill cry,True,False,0.4930152565241669,falso_negativo
|
| 35 |
+
zimmerman case full shit smell hate seep bring rap music listen sound cloud nothing make charcter look foolish dangerous possible ever want respond fuck az well> go much effort destroy mike browns character bring smoking habit nothing get shoot tone perfectly choose word use really make brown look unstable look much detail go trhu slay browns character,True,False,0.4835229120264079,falso_negativo
|
| 36 |
+
far base even call propaganda bs,True,False,0.43974023289369363,falso_negativo
|
| 37 |
+
patient ever ambulance get injure bad make hospital protestor liable die protestor charge man slaughter,True,False,0.46034973043545707,falso_negativo
|
| 38 |
+
police problem protest cause black criminal let forget blame address elephant room,True,False,0.4891559683461648,falso_negativo
|
| 39 |
+
think gentle giant would kill song rap talk bad,True,False,0.45653873903001546,falso_negativo
|
| 40 |
+
black person say thank speak truth many black cause trouble dress idiot bring trouble kill eachother cause crime complain police good glad parent grow I properly stay away garbage,True,False,0.46329948394601783,falso_negativo
|
| 41 |
+
rich brat,True,False,0.48192463449669426,falso_negativo
|
| 42 |
+
dude lie azz racist cracka,True,False,0.4780534833910638,falso_negativo
|
| 43 |
+
hahahahaha guy ed bassmaster character always teste ed bassmaster semi retarded character cross eyed guy,True,False,0.47755160633262406,falso_negativo
|
| 44 |
+
revolution set neighbourhood fire good job burn suburb cop live make homeless achive anything,True,False,0.47050296796324714,falso_negativo
|
| 45 |
+
one pair work boot steal ferguson riot,True,False,0.4605386101821508,falso_negativo
|
| 46 |
+
wish mr masri dead,True,False,0.46328501485243195,falso_negativo
|
| 47 |
+
cop pig,True,False,0.4794382794479078,falso_negativo
|
| 48 |
+
know chimper compassion mud pump heart,True,False,0.41790271212899205,falso_negativo
|
| 49 |
+
niggas love victim situation separate black people need,True,False,0.4715061903422615,falso_negativo
|
| 50 |
+
let get straight present fact mass media beginning yr old legal aspect adult rob convince store criminal later flee seen aprehende police resist arrest altercation occur two police every case similar use force criminal kill killing go court randomly select jury view evidence hear witness pertain case radom jury reach unanimous verdict guilty sorry protestor democracy,True,False,0.4488888614734203,falso_negativo
|
| 51 |
+
motherfucker angry understand,True,False,0.4807313546725058,falso_negativo
|
| 52 |
+
hell care people peacefully protest police feel threaten go act say blow fucker face tg follow riot,True,False,0.49462955392154545,falso_negativo
|
| 53 |
+
let elite divide conquer tactic work want keep racially divide never rise,True,False,0.46885478185552015,falso_negativo
|
reports/pipeline/lr/exp_20260523_163516_lr.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "LR",
|
| 3 |
+
"timestamp": "2026-05-23T16:35:21.125402",
|
| 4 |
+
"f1_weighted": 0.7387,
|
| 5 |
+
"f1_toxic": 0.7045,
|
| 6 |
+
"precision": 0.7399,
|
| 7 |
+
"recall": 0.74,
|
| 8 |
+
"accuracy": 0.74,
|
| 9 |
+
"roc_auc": 0.7838,
|
| 10 |
+
"fp": 22,
|
| 11 |
+
"fn": 30,
|
| 12 |
+
"n_test": 200,
|
| 13 |
+
"f1_train": 0.8984,
|
| 14 |
+
"train_test_gap_pp": 15.97,
|
| 15 |
+
"cv_f1_mean": 0.7193,
|
| 16 |
+
"cv_f1_std": 0.0382,
|
| 17 |
+
"cv_test_gap_pp": 1.94,
|
| 18 |
+
"cm_plot": "/Users/miraekang/proyectos/ai-nlp/reports/pipeline/lr/cm_lr.png",
|
| 19 |
+
"roc_plot": "/Users/miraekang/proyectos/ai-nlp/reports/pipeline/lr/roc_lr.png",
|
| 20 |
+
"top_fp_terms": "black(14), would(9), white(9), shoot(8), get(7), people(7), guy(7), cop(6), police(5), car(4)",
|
| 21 |
+
"top_fn_terms": "police(8), make(6), black(6), criminal(6), people(5), kill(5), want(4), cause(4), look(4), brown(4)",
|
| 22 |
+
"run_id": "20260523_163516",
|
| 23 |
+
"model_path": "/Users/miraekang/proyectos/ai-nlp/models/experiments/lr/lr_pipeline_20260523_163516.joblib",
|
| 24 |
+
"model_type": "lr",
|
| 25 |
+
"model_family": "sklearn_baseline"
|
| 26 |
+
}
|
reports/pipeline/lr/exp_20260523_163600_lr.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "LR",
|
| 3 |
+
"timestamp": "2026-05-23T16:36:05.009624",
|
| 4 |
+
"f1_weighted": 0.7387,
|
| 5 |
+
"f1_toxic": 0.7045,
|
| 6 |
+
"precision": 0.7399,
|
| 7 |
+
"recall": 0.74,
|
| 8 |
+
"accuracy": 0.74,
|
| 9 |
+
"roc_auc": 0.7838,
|
| 10 |
+
"fp": 22,
|
| 11 |
+
"fn": 30,
|
| 12 |
+
"n_test": 200,
|
| 13 |
+
"f1_train": 0.8984,
|
| 14 |
+
"train_test_gap_pp": 15.97,
|
| 15 |
+
"cv_f1_mean": 0.7193,
|
| 16 |
+
"cv_f1_std": 0.0382,
|
| 17 |
+
"cv_test_gap_pp": 1.94,
|
| 18 |
+
"cm_plot": "/Users/miraekang/proyectos/ai-nlp/reports/pipeline/lr/cm_lr.png",
|
| 19 |
+
"roc_plot": "/Users/miraekang/proyectos/ai-nlp/reports/pipeline/lr/roc_lr.png",
|
| 20 |
+
"top_fp_terms": "black(14), would(9), white(9), shoot(8), get(7), people(7), guy(7), cop(6), police(5), car(4)",
|
| 21 |
+
"top_fn_terms": "police(8), make(6), black(6), criminal(6), people(5), kill(5), want(4), cause(4), look(4), brown(4)",
|
| 22 |
+
"run_id": "20260523_163600",
|
| 23 |
+
"model_path": "/Users/miraekang/proyectos/ai-nlp/models/experiments/lr/lr_pipeline_20260523_163600.joblib",
|
| 24 |
+
"model_type": "lr",
|
| 25 |
+
"model_family": "sklearn_baseline"
|
| 26 |
+
}
|
reports/pipeline/lr/roc_lr.png
ADDED
|
Git LFS Details
|
reports/summary.csv
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model,model_family,f1_weighted,roc_auc,fp,fn,cv_test_gap_pp,train_test_gap_pp,f1_train,evaluation_source,production_default,notes,timestamp,f1_toxic,precision,recall,accuracy,n_test,cv_f1_mean,cv_f1_std,cm_plot,roc_plot,top_fp_terms,top_fn_terms
|
| 2 |
+
LR + TF-IDF (tuned),sklearn_baseline,0.7579,0.81,18.0,30.0,4.76,14.07,0.8987,configs/best_params.yaml Optuna,true,Best sklearn model on held-out test split (IsToxic),,,,,,,,,,,,
|
| 3 |
+
LR + TF-IDF (local),sklearn_baseline,0.7579,0.81,18.0,30.0,4.76,14.07,0.8987,models/final_model.joblib,true,Served by FastAPI and Streamlit via ModelService,,,,,,,,,,,,
|
| 4 |
+
LR,,0.7387,0.7838,22.0,30.0,1.94,15.97,0.8984,,,,2026-05-23T16:35:21.125402,0.7045,0.7399,0.74,0.74,200.0,0.7193,0.0382,/Users/miraekang/proyectos/ai-nlp/reports/pipeline/lr/cm_lr.png,/Users/miraekang/proyectos/ai-nlp/reports/pipeline/lr/roc_lr.png,"black(14), would(9), white(9), shoot(8), get(7), people(7), guy(7), cop(6), police(5), car(4)","police(8), make(6), black(6), criminal(6), people(5), kill(5), want(4), cause(4), look(4), brown(4)"
|
| 5 |
+
LR,,0.7387,0.7838,22.0,30.0,1.94,15.97,0.8984,,,,2026-05-23T16:36:05.009624,0.7045,0.7399,0.74,0.74,200.0,0.7193,0.0382,/Users/miraekang/proyectos/ai-nlp/reports/pipeline/lr/cm_lr.png,/Users/miraekang/proyectos/ai-nlp/reports/pipeline/lr/roc_lr.png,"black(14), would(9), white(9), shoot(8), get(7), people(7), guy(7), cop(6), police(5), car(4)","police(8), make(6), black(6), criminal(6), people(5), kill(5), want(4), cause(4), look(4), brown(4)"
|
| 6 |
+
DistilBERT Toxicity,transformers_hf,,,,,,,ModelService catalog,false,Remote HF martin-ha/toxic-comment-model β switch via PUT /model/{name},,,,,,,,,,,,,
|
| 7 |
+
toxic-bert (multilabel),transformers_hf,,,,,,,ModelService catalog,false,Remote HF unitary/toxic-bert β multilabel Jigsaw,,,,,,,,,,,,,
|
| 8 |
+
RoBERTa Toxicity,transformers_hf,,,,,,,ModelService catalog,false,Remote HF s-nlp/roberta_toxicity_classifier,,,,,,,,,,,,,
|
| 9 |
+
RF,sklearn_baseline,,,,,,,pipeline --model rf,false,Train and evaluate: python -m src.pipeline.run_pipeline --model rf,,,,,,,,,,,,,
|
| 10 |
+
XGBoost,sklearn_baseline,,,,,,,pipeline --model xgboost,false,Train and evaluate: python -m src.pipeline.run_pipeline --model xgboost,,,,,,,,,,,,,
|
src/evaluation/__init__.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Model evaluation and comparison."""
|
| 2 |
+
|
| 3 |
+
from src.evaluation.evaluator import Evaluator
|
| 4 |
+
|
| 5 |
+
__all__ = ["Evaluator"]
|
src/evaluation/evaluator.py
CHANGED
|
@@ -5,13 +5,17 @@ EvaluaciΓ³n estandarizada de modelos.
|
|
| 5 |
Genera mΓ©tricas, visualizaciones e informes JSON.
|
| 6 |
|
| 7 |
Uso:
|
| 8 |
-
evaluator = Evaluator(output_dir="reports/pipeline")
|
| 9 |
-
metrics = evaluator.
|
| 10 |
-
|
| 11 |
-
|
|
|
|
| 12 |
"""
|
| 13 |
|
| 14 |
import json
|
|
|
|
|
|
|
|
|
|
| 15 |
import numpy as np
|
| 16 |
import pandas as pd
|
| 17 |
import matplotlib.pyplot as plt
|
|
@@ -28,6 +32,9 @@ from src.utils.logger import get_logger
|
|
| 28 |
|
| 29 |
logger = get_logger(__name__)
|
| 30 |
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
class Evaluator:
|
| 33 |
"""
|
|
@@ -109,6 +116,56 @@ class Evaluator:
|
|
| 109 |
self._print_summary(metrics)
|
| 110 |
return metrics
|
| 111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
# ββ Visualizaciones ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 113 |
def plot_confusion_matrix(
|
| 114 |
self,
|
|
@@ -116,6 +173,7 @@ class Evaluator:
|
|
| 116 |
y_pred,
|
| 117 |
model_name: str,
|
| 118 |
save: bool = True,
|
|
|
|
| 119 |
) -> Path | None:
|
| 120 |
"""Genera y guarda la matriz de confusiΓ³n."""
|
| 121 |
cm = confusion_matrix(y_test, y_pred)
|
|
@@ -126,21 +184,21 @@ class Evaluator:
|
|
| 126 |
yticklabels=["No tΓ³xico", "TΓ³xico"],
|
| 127 |
linewidths=0.5,
|
| 128 |
)
|
| 129 |
-
ax.set_title(f"{model_name} β
|
| 130 |
ax.set_xlabel("PredicciΓ³n")
|
| 131 |
ax.set_ylabel("Real")
|
| 132 |
plt.tight_layout()
|
| 133 |
|
|
|
|
|
|
|
| 134 |
if save:
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
plt.show()
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
plt.show()
|
| 143 |
-
return None
|
| 144 |
|
| 145 |
def plot_roc_curve(
|
| 146 |
self,
|
|
@@ -148,27 +206,28 @@ class Evaluator:
|
|
| 148 |
y_proba,
|
| 149 |
model_name: str,
|
| 150 |
save: bool = True,
|
|
|
|
| 151 |
) -> Path | None:
|
| 152 |
"""Genera y guarda la curva ROC."""
|
| 153 |
fig, ax = plt.subplots(figsize=(6, 5))
|
| 154 |
RocCurveDisplay.from_predictions(
|
| 155 |
y_test, y_proba, ax=ax, name=model_name, color="#7F77DD"
|
| 156 |
)
|
| 157 |
-
ax.plot([0, 1], [0, 1], "--", color="gray", alpha=0.5, label="
|
| 158 |
ax.set_title(f"{model_name} β Curva ROC", fontweight="bold")
|
| 159 |
ax.legend()
|
| 160 |
plt.tight_layout()
|
| 161 |
|
|
|
|
|
|
|
| 162 |
if save:
|
| 163 |
-
|
| 164 |
-
path = self.output_dir / f"roc_{safe}.png"
|
| 165 |
-
plt.savefig(path, dpi=150, bbox_inches="tight")
|
| 166 |
-
plt.show()
|
| 167 |
logger.info(f"Curva ROC guardada: {path}")
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
|
|
|
| 172 |
|
| 173 |
# ββ AnΓ‘lisis de errores ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 174 |
def error_analysis(
|
|
@@ -177,6 +236,7 @@ class Evaluator:
|
|
| 177 |
y_test,
|
| 178 |
y_pred,
|
| 179 |
y_proba,
|
|
|
|
| 180 |
n_examples: int = 5,
|
| 181 |
) -> dict:
|
| 182 |
"""
|
|
@@ -198,24 +258,44 @@ class Evaluator:
|
|
| 198 |
fp = error_df[(error_df["real"] == 0) & (error_df["pred"] == 1)]
|
| 199 |
fn = error_df[(error_df["real"] == 1) & (error_df["pred"] == 0)]
|
| 200 |
|
| 201 |
-
|
|
|
|
|
|
|
|
|
|
| 202 |
|
| 203 |
print(f"\n{'='*65}")
|
| 204 |
-
print(f"FALSOS NEGATIVOS β
|
|
|
|
|
|
|
| 205 |
print(f"{'='*65}")
|
| 206 |
for _, row in fn.nsmallest(n_examples, "prob_toxic").iterrows():
|
| 207 |
-
print(f" Prob: {row['prob_toxic']:.3f} | {row['text'][:110]}")
|
| 208 |
print()
|
| 209 |
|
| 210 |
print(f"{'='*65}")
|
| 211 |
-
print(f"FALSOS POSITIVOS β
|
|
|
|
|
|
|
| 212 |
print(f"{'='*65}")
|
| 213 |
for _, row in fp.nlargest(n_examples, "prob_toxic").iterrows():
|
| 214 |
-
print(f" Prob: {row['prob_toxic']:.3f} | {row['text'][:110]}")
|
| 215 |
print()
|
| 216 |
|
| 217 |
-
|
| 218 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
|
| 220 |
# ββ Reports ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 221 |
def save_report(self, metrics: dict, experiment_id: str) -> Path:
|
|
@@ -232,7 +312,8 @@ class Evaluator:
|
|
| 232 |
Si summary.csv ya existe, agrega nuevas filas.
|
| 233 |
"""
|
| 234 |
|
| 235 |
-
path = Path(path or
|
|
|
|
| 236 |
|
| 237 |
# Nuevo dataframe
|
| 238 |
new_df = pd.DataFrame(all_metrics)
|
|
@@ -247,13 +328,15 @@ class Evaluator:
|
|
| 247 |
# Evitar duplicados por run_id si existe
|
| 248 |
if "run_id" in df.columns:
|
| 249 |
df = df.drop_duplicates(subset=["run_id"], keep="last")
|
|
|
|
|
|
|
| 250 |
|
| 251 |
else:
|
| 252 |
df = new_df
|
| 253 |
|
| 254 |
# Ordenar por F1 descendente
|
| 255 |
if "f1_weighted" in df.columns:
|
| 256 |
-
df = df.sort_values("f1_weighted", ascending=False)
|
| 257 |
|
| 258 |
# Guardar actualizado
|
| 259 |
df.to_csv(path, index=False)
|
|
@@ -265,6 +348,13 @@ class Evaluator:
|
|
| 265 |
|
| 266 |
return path
|
| 267 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
# ββ Interno ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 269 |
def _print_summary(self, metrics: dict) -> None:
|
| 270 |
gap_str = ""
|
|
|
|
| 5 |
Genera mΓ©tricas, visualizaciones e informes JSON.
|
| 6 |
|
| 7 |
Uso:
|
| 8 |
+
evaluator = Evaluator(output_dir="reports/pipeline/lr")
|
| 9 |
+
metrics = evaluator.evaluate_and_report(
|
| 10 |
+
model, X_test, y_test, model_name="LR",
|
| 11 |
+
summary_path="reports/summary.csv",
|
| 12 |
+
)
|
| 13 |
"""
|
| 14 |
|
| 15 |
import json
|
| 16 |
+
import re
|
| 17 |
+
from collections import Counter
|
| 18 |
+
|
| 19 |
import numpy as np
|
| 20 |
import pandas as pd
|
| 21 |
import matplotlib.pyplot as plt
|
|
|
|
| 32 |
|
| 33 |
logger = get_logger(__name__)
|
| 34 |
|
| 35 |
+
DEFAULT_SUMMARY_PATH = Path("reports/summary.csv")
|
| 36 |
+
_TOKEN_RE = re.compile(r"[a-zÑéΓΓ³ΓΊΓ±'][a-zÑéΓΓ³ΓΊΓ±]{2,}")
|
| 37 |
+
|
| 38 |
|
| 39 |
class Evaluator:
|
| 40 |
"""
|
|
|
|
| 116 |
self._print_summary(metrics)
|
| 117 |
return metrics
|
| 118 |
|
| 119 |
+
def evaluate_and_report(
|
| 120 |
+
self,
|
| 121 |
+
model,
|
| 122 |
+
X_test,
|
| 123 |
+
y_test,
|
| 124 |
+
model_name: str,
|
| 125 |
+
X_train=None,
|
| 126 |
+
y_train=None,
|
| 127 |
+
cv_results: dict = None,
|
| 128 |
+
summary_path: str | Path | None = None,
|
| 129 |
+
n_error_examples: int = 5,
|
| 130 |
+
show_plots: bool = False,
|
| 131 |
+
) -> dict:
|
| 132 |
+
"""
|
| 133 |
+
EvaluaciΓ³n completa: mΓ©tricas, grΓ‘ficos, anΓ‘lisis de errores y summary.csv.
|
| 134 |
+
|
| 135 |
+
Usado por run_pipeline; actualiza reports/summary.csv por defecto del proyecto.
|
| 136 |
+
"""
|
| 137 |
+
metrics = self.evaluate(
|
| 138 |
+
model, X_test, y_test, model_name,
|
| 139 |
+
X_train=X_train, y_train=y_train, cv_results=cv_results,
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
y_pred = model.predict(X_test)
|
| 143 |
+
y_proba = model.predict_proba(X_test)[:, 1]
|
| 144 |
+
|
| 145 |
+
cm_path = self.plot_confusion_matrix(
|
| 146 |
+
y_test, y_pred, model_name, save=True, show=show_plots,
|
| 147 |
+
)
|
| 148 |
+
roc_path = self.plot_roc_curve(
|
| 149 |
+
y_test, y_proba, model_name, save=True, show=show_plots,
|
| 150 |
+
)
|
| 151 |
+
errors = self.error_analysis(
|
| 152 |
+
X_test, y_test, y_pred, y_proba,
|
| 153 |
+
model_name=model_name, n_examples=n_error_examples,
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
+
metrics["cm_plot"] = str(cm_path) if cm_path else ""
|
| 157 |
+
metrics["roc_plot"] = str(roc_path) if roc_path else ""
|
| 158 |
+
metrics["top_fp_terms"] = ", ".join(
|
| 159 |
+
f"{t}({c})" for t, c in errors.get("top_fp_terms", [])
|
| 160 |
+
)
|
| 161 |
+
metrics["top_fn_terms"] = ", ".join(
|
| 162 |
+
f"{t}({c})" for t, c in errors.get("top_fn_terms", [])
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
+
out = Path(summary_path or DEFAULT_SUMMARY_PATH)
|
| 166 |
+
self.save_summary([metrics], path=out)
|
| 167 |
+
return metrics
|
| 168 |
+
|
| 169 |
# ββ Visualizaciones ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 170 |
def plot_confusion_matrix(
|
| 171 |
self,
|
|
|
|
| 173 |
y_pred,
|
| 174 |
model_name: str,
|
| 175 |
save: bool = True,
|
| 176 |
+
show: bool = False,
|
| 177 |
) -> Path | None:
|
| 178 |
"""Genera y guarda la matriz de confusiΓ³n."""
|
| 179 |
cm = confusion_matrix(y_test, y_pred)
|
|
|
|
| 184 |
yticklabels=["No tΓ³xico", "TΓ³xico"],
|
| 185 |
linewidths=0.5,
|
| 186 |
)
|
| 187 |
+
ax.set_title(f"{model_name} β Matriz de confusiΓ³n", fontweight="bold")
|
| 188 |
ax.set_xlabel("PredicciΓ³n")
|
| 189 |
ax.set_ylabel("Real")
|
| 190 |
plt.tight_layout()
|
| 191 |
|
| 192 |
+
safe = model_name.lower().replace(" ", "_").replace("/", "_")
|
| 193 |
+
path = self.output_dir / f"cm_{safe}.png"
|
| 194 |
if save:
|
| 195 |
+
fig.savefig(path, dpi=150, bbox_inches="tight")
|
| 196 |
+
logger.info(f"Matriz de confusiΓ³n guardada: {path}")
|
| 197 |
+
if show:
|
| 198 |
plt.show()
|
| 199 |
+
else:
|
| 200 |
+
plt.close(fig)
|
| 201 |
+
return path if save else None
|
|
|
|
|
|
|
| 202 |
|
| 203 |
def plot_roc_curve(
|
| 204 |
self,
|
|
|
|
| 206 |
y_proba,
|
| 207 |
model_name: str,
|
| 208 |
save: bool = True,
|
| 209 |
+
show: bool = False,
|
| 210 |
) -> Path | None:
|
| 211 |
"""Genera y guarda la curva ROC."""
|
| 212 |
fig, ax = plt.subplots(figsize=(6, 5))
|
| 213 |
RocCurveDisplay.from_predictions(
|
| 214 |
y_test, y_proba, ax=ax, name=model_name, color="#7F77DD"
|
| 215 |
)
|
| 216 |
+
ax.plot([0, 1], [0, 1], "--", color="gray", alpha=0.5, label="Azar")
|
| 217 |
ax.set_title(f"{model_name} β Curva ROC", fontweight="bold")
|
| 218 |
ax.legend()
|
| 219 |
plt.tight_layout()
|
| 220 |
|
| 221 |
+
safe = model_name.lower().replace(" ", "_").replace("/", "_")
|
| 222 |
+
path = self.output_dir / f"roc_{safe}.png"
|
| 223 |
if save:
|
| 224 |
+
fig.savefig(path, dpi=150, bbox_inches="tight")
|
|
|
|
|
|
|
|
|
|
| 225 |
logger.info(f"Curva ROC guardada: {path}")
|
| 226 |
+
if show:
|
| 227 |
+
plt.show()
|
| 228 |
+
else:
|
| 229 |
+
plt.close(fig)
|
| 230 |
+
return path if save else None
|
| 231 |
|
| 232 |
# ββ AnΓ‘lisis de errores ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 233 |
def error_analysis(
|
|
|
|
| 236 |
y_test,
|
| 237 |
y_pred,
|
| 238 |
y_proba,
|
| 239 |
+
model_name: str = "modelo",
|
| 240 |
n_examples: int = 5,
|
| 241 |
) -> dict:
|
| 242 |
"""
|
|
|
|
| 258 |
fp = error_df[(error_df["real"] == 0) & (error_df["pred"] == 1)]
|
| 259 |
fn = error_df[(error_df["real"] == 1) & (error_df["pred"] == 0)]
|
| 260 |
|
| 261 |
+
top_fp_terms = self._most_common_terms(fp["text"].tolist())
|
| 262 |
+
top_fn_terms = self._most_common_terms(fn["text"].tolist())
|
| 263 |
+
|
| 264 |
+
logger.info(f"Errores {model_name}: FP={len(fp)} | FN={len(fn)}")
|
| 265 |
|
| 266 |
print(f"\n{'='*65}")
|
| 267 |
+
print(f"FALSOS NEGATIVOS β tΓ³xico no detectado ({len(fn)} total)")
|
| 268 |
+
if top_fn_terms:
|
| 269 |
+
print(" TΓ©rminos mΓ‘s frecuentes:", ", ".join(f"{w}({c})" for w, c in top_fn_terms[:8]))
|
| 270 |
print(f"{'='*65}")
|
| 271 |
for _, row in fn.nsmallest(n_examples, "prob_toxic").iterrows():
|
| 272 |
+
print(f" Prob: {row['prob_toxic']:.3f} | {str(row['text'])[:110]}")
|
| 273 |
print()
|
| 274 |
|
| 275 |
print(f"{'='*65}")
|
| 276 |
+
print(f"FALSOS POSITIVOS β seguro marcado como tΓ³xico ({len(fp)} total)")
|
| 277 |
+
if top_fp_terms:
|
| 278 |
+
print(" TΓ©rminos mΓ‘s frecuentes:", ", ".join(f"{w}({c})" for w, c in top_fp_terms[:8]))
|
| 279 |
print(f"{'='*65}")
|
| 280 |
for _, row in fp.nlargest(n_examples, "prob_toxic").iterrows():
|
| 281 |
+
print(f" Prob: {row['prob_toxic']:.3f} | {str(row['text'])[:110]}")
|
| 282 |
print()
|
| 283 |
|
| 284 |
+
safe = model_name.lower().replace(" ", "_").replace("/", "_")
|
| 285 |
+
errors_path = self.output_dir / f"errors_{safe}.csv"
|
| 286 |
+
pd.concat([
|
| 287 |
+
fp.assign(tipo_error="falso_positivo"),
|
| 288 |
+
fn.assign(tipo_error="falso_negativo"),
|
| 289 |
+
], ignore_index=True).to_csv(errors_path, index=False)
|
| 290 |
+
logger.info(f"Errores guardados: {errors_path}")
|
| 291 |
+
|
| 292 |
+
return {
|
| 293 |
+
"top_fp_terms": top_fp_terms,
|
| 294 |
+
"top_fn_terms": top_fn_terms,
|
| 295 |
+
"fp_examples": fp.head(n_examples).to_dict("records"),
|
| 296 |
+
"fn_examples": fn.head(n_examples).to_dict("records"),
|
| 297 |
+
"errors_csv": str(errors_path),
|
| 298 |
+
}
|
| 299 |
|
| 300 |
# ββ Reports ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 301 |
def save_report(self, metrics: dict, experiment_id: str) -> Path:
|
|
|
|
| 312 |
Si summary.csv ya existe, agrega nuevas filas.
|
| 313 |
"""
|
| 314 |
|
| 315 |
+
path = Path(path or DEFAULT_SUMMARY_PATH)
|
| 316 |
+
path.parent.mkdir(parents=True, exist_ok=True)
|
| 317 |
|
| 318 |
# Nuevo dataframe
|
| 319 |
new_df = pd.DataFrame(all_metrics)
|
|
|
|
| 328 |
# Evitar duplicados por run_id si existe
|
| 329 |
if "run_id" in df.columns:
|
| 330 |
df = df.drop_duplicates(subset=["run_id"], keep="last")
|
| 331 |
+
elif "model" in df.columns and "timestamp" in df.columns:
|
| 332 |
+
df = df.drop_duplicates(subset=["model", "timestamp"], keep="last")
|
| 333 |
|
| 334 |
else:
|
| 335 |
df = new_df
|
| 336 |
|
| 337 |
# Ordenar por F1 descendente
|
| 338 |
if "f1_weighted" in df.columns:
|
| 339 |
+
df = df.sort_values("f1_weighted", ascending=False, na_position="last")
|
| 340 |
|
| 341 |
# Guardar actualizado
|
| 342 |
df.to_csv(path, index=False)
|
|
|
|
| 348 |
|
| 349 |
return path
|
| 350 |
|
| 351 |
+
@staticmethod
|
| 352 |
+
def _most_common_terms(texts: list, top_n: int = 10) -> list[tuple[str, int]]:
|
| 353 |
+
counter: Counter[str] = Counter()
|
| 354 |
+
for text in texts:
|
| 355 |
+
counter.update(_TOKEN_RE.findall(str(text).lower()))
|
| 356 |
+
return counter.most_common(top_n)
|
| 357 |
+
|
| 358 |
# ββ Interno ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 359 |
def _print_summary(self, metrics: dict) -> None:
|
| 360 |
gap_str = ""
|
src/pipeline/run_pipeline.py
CHANGED
|
@@ -122,24 +122,20 @@ def run_pipeline(model_type: str = "lr") -> dict:
|
|
| 122 |
|
| 123 |
# ββ FASE 6: EvaluaciΓ³n en test ββββββββββββββββββββββββββββββββββββββββββββ
|
| 124 |
logger.info("FASE 6 β EvaluaciΓ³n en test")
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
model_name
|
| 133 |
-
X_train
|
| 134 |
-
y_train
|
| 135 |
-
cv_results
|
|
|
|
| 136 |
)
|
| 137 |
|
| 138 |
-
# Visualizaciones
|
| 139 |
-
evaluator.plot_confusion_matrix(y_test, y_pred, model_type.upper())
|
| 140 |
-
evaluator.plot_roc_curve(y_test, y_proba, model_type.upper())
|
| 141 |
-
evaluator.error_analysis(X_test_clean, y_test, y_pred, y_proba)
|
| 142 |
-
|
| 143 |
# ββ FASE 7: Guardado del modelo βββββββββββββββββββββββββββββββββββββββββββ
|
| 144 |
logger.info("FASE 7 β Guardado del modelo")
|
| 145 |
model_path = EXPERIMENTS_DIR / f"{model_type}_pipeline_{run_id}.joblib"
|
|
@@ -160,10 +156,10 @@ def run_pipeline(model_type: str = "lr") -> dict:
|
|
| 160 |
logger.info("FASE 9 β Generando informes")
|
| 161 |
metrics["run_id"] = run_id
|
| 162 |
metrics["model_path"]= str(model_path)
|
| 163 |
-
evaluator.save_report(metrics, f"exp_{run_id}_{model_type}")
|
| 164 |
metrics["model_type"] = model_type
|
| 165 |
metrics["run_id"] = run_id
|
| 166 |
-
|
|
|
|
| 167 |
|
| 168 |
logger.info("=" * 60)
|
| 169 |
logger.info(f"β
Pipeline completado β F1={metrics['f1_weighted']:.4f}")
|
|
|
|
| 122 |
|
| 123 |
# ββ FASE 6: EvaluaciΓ³n en test ββββββββββββββββββββββββββββββββββββββββββββ
|
| 124 |
logger.info("FASE 6 β EvaluaciΓ³n en test")
|
| 125 |
+
report_dir = PROJECT_ROOT / "reports" / "pipeline" / model_type
|
| 126 |
+
evaluator = Evaluator(output_dir=report_dir)
|
| 127 |
+
|
| 128 |
+
metrics = evaluator.evaluate_and_report(
|
| 129 |
+
model,
|
| 130 |
+
X_test_clean,
|
| 131 |
+
y_test,
|
| 132 |
+
model_name=model_type.upper(),
|
| 133 |
+
X_train=X_train_clean,
|
| 134 |
+
y_train=y_train,
|
| 135 |
+
cv_results=cv_results,
|
| 136 |
+
summary_path=PROJECT_ROOT / "reports" / "summary.csv",
|
| 137 |
)
|
| 138 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
# ββ FASE 7: Guardado del modelo βββββββββββββββββββββββββββββββββββββββββββ
|
| 140 |
logger.info("FASE 7 β Guardado del modelo")
|
| 141 |
model_path = EXPERIMENTS_DIR / f"{model_type}_pipeline_{run_id}.joblib"
|
|
|
|
| 156 |
logger.info("FASE 9 β Generando informes")
|
| 157 |
metrics["run_id"] = run_id
|
| 158 |
metrics["model_path"]= str(model_path)
|
|
|
|
| 159 |
metrics["model_type"] = model_type
|
| 160 |
metrics["run_id"] = run_id
|
| 161 |
+
metrics["model_family"] = "sklearn_baseline"
|
| 162 |
+
evaluator.save_report(metrics, f"exp_{run_id}_{model_type}")
|
| 163 |
|
| 164 |
logger.info("=" * 60)
|
| 165 |
logger.info(f"β
Pipeline completado β F1={metrics['f1_weighted']:.4f}")
|