Mirae Kang commited on
Commit
975d796
Β·
1 Parent(s): 2b1423b

fix: modify evaluation pipeline, #7

Browse files
reports/pipeline/lr/cm_lr.png ADDED

Git LFS Details

  • SHA256: b64abbfe6125a4afd8f95a411aa2f655ffbab819afe041ddd94d363960c52512
  • Pointer size: 130 Bytes
  • Size of remote file: 26.9 kB
reports/pipeline/lr/errors_lr.csv ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ text,real,pred,prob_toxic,tipo_error
2
+ fuck time freeway,False,True,0.6415007346665947,falso_positivo
3
+ man officer get lucky life still ruin juror would convict black way,False,True,0.5716710269810534,falso_positivo
4
+ police car purposefully leave destroy get anger instead business cost destruction car pass state taxpayer,False,True,0.503612868866628,falso_positivo
5
+ traffic bad enough,False,True,0.5106084760625215,falso_positivo
6
+ people get shoot everyday come one get attention,False,True,0.5624723994474933,falso_positivo
7
+ feel black america understand everything,False,True,0.5146301249872574,falso_positivo
8
+ head read protester take far,False,True,0.513207971133486,falso_positivo
9
+ stand side freeway instead block thousand car,False,True,0.5251542626479927,falso_positivo
10
+ lose shit guy lmao,False,True,0.5746623918084022,falso_positivo
11
+ take medical cannabis guy couple face book fight page mean necessarily aggressive,False,True,0.5036380541041356,falso_positivo
12
+ cop shoot white guy would riot since shoot black guy everyone seem think black look evidence skin color everyone always throw race card crap,False,True,0.5970904349206777,falso_positivo
13
+ good job guy raise taxis,False,True,0.5008660396035933,falso_positivo
14
+ people translate conversation funniest sad shit ever funny get wrong sad people reach every direction find hole get cop,False,True,0.5340463945332806,falso_positivo
15
+ weird white pretext police officer white must less risk would black police officer would kill white would take year prison I may white white black equal,False,True,0.5870245290128406,falso_positivo
16
+ obama show hos face city yet support sad,False,True,0.5103506243372347,falso_positivo
17
+ everyone say black people white people white woman get shoot rubber bullet come next day two black people help walk,False,True,0.5578356563804213,falso_positivo
18
+ real question need ask would mike brown rush cop already draw weapon already shoot way answer show true color think black male would disregard life kill cop escalate punishment probation petty theft death penalty kill cop utterly absurd truly believe someone go start college next week would sorry racist,False,True,0.5443162086132329,falso_positivo
19
+ agree police action kill boy mean protest loot destroy neighbourhood stupid make people want support struggle,False,True,0.5236958135962575,falso_positivo
20
+ call peacefully protest burn car lol,False,True,0.5116161189286824,falso_positivo
21
+ spread word year unarmed white guy shoot black cop,False,True,0.5538455860954972,falso_positivo
22
+ gas grenade pepper spray knight stick would apply,False,True,0.5147355292873231,falso_positivo
23
+ police shoot black guy black riot black shoot black happen,False,True,0.5956566810148107,falso_positivo
24
+ amen peggy hubbard color make difference person inside innocent nine year old little girl one attention people want cause chaos black white go exactly obama want look country mess admire courage tell peggy hubbard,True,False,0.40495127807343234,falso_negativo
25
+ ah beautiful,True,False,0.46757257821393866,falso_negativo
26
+ bullshit stef police officer pepper spray stun gun number non lethal method immediate disposal real question version event hold true gun first option officer go,True,False,0.4753839163064213,falso_negativo
27
+ stefan ever victim police discrimination game white supremacy deny involvement play victim black complain never fail,True,False,0.4502233884252265,falso_negativo
28
+ bottom line michael brown drug addict criminal violent criminal anyone feel bad get kill unless criminal,True,False,0.4974324511920543,falso_negativo
29
+ jimmy dore dickhead square,True,False,0.46757257821393866,falso_negativo
30
+ blacklivesmatter build deceit lie,True,False,0.45884138016676057,falso_negativo
31
+ educate dumb assss rap rap real people dispise,True,False,0.42647914768536693,falso_negativo
32
+ push clerk around really make I lose sympathy definitely tell guy likely dick.but hell need new tv might well riot go loot one honor,True,False,0.4460833711504373,falso_negativo
33
+ michael brown thug god bless thin blue line graduate police academy december,True,False,0.44310473756276547,falso_negativo
34
+ protect serve people michael brown guy deserve kill cry,True,False,0.4930152565241669,falso_negativo
35
+ zimmerman case full shit smell hate seep bring rap music listen sound cloud nothing make charcter look foolish dangerous possible ever want respond fuck az well> go much effort destroy mike browns character bring smoking habit nothing get shoot tone perfectly choose word use really make brown look unstable look much detail go trhu slay browns character,True,False,0.4835229120264079,falso_negativo
36
+ far base even call propaganda bs,True,False,0.43974023289369363,falso_negativo
37
+ patient ever ambulance get injure bad make hospital protestor liable die protestor charge man slaughter,True,False,0.46034973043545707,falso_negativo
38
+ police problem protest cause black criminal let forget blame address elephant room,True,False,0.4891559683461648,falso_negativo
39
+ think gentle giant would kill song rap talk bad,True,False,0.45653873903001546,falso_negativo
40
+ black person say thank speak truth many black cause trouble dress idiot bring trouble kill eachother cause crime complain police good glad parent grow I properly stay away garbage,True,False,0.46329948394601783,falso_negativo
41
+ rich brat,True,False,0.48192463449669426,falso_negativo
42
+ dude lie azz racist cracka,True,False,0.4780534833910638,falso_negativo
43
+ hahahahaha guy ed bassmaster character always teste ed bassmaster semi retarded character cross eyed guy,True,False,0.47755160633262406,falso_negativo
44
+ revolution set neighbourhood fire good job burn suburb cop live make homeless achive anything,True,False,0.47050296796324714,falso_negativo
45
+ one pair work boot steal ferguson riot,True,False,0.4605386101821508,falso_negativo
46
+ wish mr masri dead,True,False,0.46328501485243195,falso_negativo
47
+ cop pig,True,False,0.4794382794479078,falso_negativo
48
+ know chimper compassion mud pump heart,True,False,0.41790271212899205,falso_negativo
49
+ niggas love victim situation separate black people need,True,False,0.4715061903422615,falso_negativo
50
+ let get straight present fact mass media beginning yr old legal aspect adult rob convince store criminal later flee seen aprehende police resist arrest altercation occur two police every case similar use force criminal kill killing go court randomly select jury view evidence hear witness pertain case radom jury reach unanimous verdict guilty sorry protestor democracy,True,False,0.4488888614734203,falso_negativo
51
+ motherfucker angry understand,True,False,0.4807313546725058,falso_negativo
52
+ hell care people peacefully protest police feel threaten go act say blow fucker face tg follow riot,True,False,0.49462955392154545,falso_negativo
53
+ let elite divide conquer tactic work want keep racially divide never rise,True,False,0.46885478185552015,falso_negativo
reports/pipeline/lr/exp_20260523_163516_lr.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "LR",
3
+ "timestamp": "2026-05-23T16:35:21.125402",
4
+ "f1_weighted": 0.7387,
5
+ "f1_toxic": 0.7045,
6
+ "precision": 0.7399,
7
+ "recall": 0.74,
8
+ "accuracy": 0.74,
9
+ "roc_auc": 0.7838,
10
+ "fp": 22,
11
+ "fn": 30,
12
+ "n_test": 200,
13
+ "f1_train": 0.8984,
14
+ "train_test_gap_pp": 15.97,
15
+ "cv_f1_mean": 0.7193,
16
+ "cv_f1_std": 0.0382,
17
+ "cv_test_gap_pp": 1.94,
18
+ "cm_plot": "/Users/miraekang/proyectos/ai-nlp/reports/pipeline/lr/cm_lr.png",
19
+ "roc_plot": "/Users/miraekang/proyectos/ai-nlp/reports/pipeline/lr/roc_lr.png",
20
+ "top_fp_terms": "black(14), would(9), white(9), shoot(8), get(7), people(7), guy(7), cop(6), police(5), car(4)",
21
+ "top_fn_terms": "police(8), make(6), black(6), criminal(6), people(5), kill(5), want(4), cause(4), look(4), brown(4)",
22
+ "run_id": "20260523_163516",
23
+ "model_path": "/Users/miraekang/proyectos/ai-nlp/models/experiments/lr/lr_pipeline_20260523_163516.joblib",
24
+ "model_type": "lr",
25
+ "model_family": "sklearn_baseline"
26
+ }
reports/pipeline/lr/exp_20260523_163600_lr.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "LR",
3
+ "timestamp": "2026-05-23T16:36:05.009624",
4
+ "f1_weighted": 0.7387,
5
+ "f1_toxic": 0.7045,
6
+ "precision": 0.7399,
7
+ "recall": 0.74,
8
+ "accuracy": 0.74,
9
+ "roc_auc": 0.7838,
10
+ "fp": 22,
11
+ "fn": 30,
12
+ "n_test": 200,
13
+ "f1_train": 0.8984,
14
+ "train_test_gap_pp": 15.97,
15
+ "cv_f1_mean": 0.7193,
16
+ "cv_f1_std": 0.0382,
17
+ "cv_test_gap_pp": 1.94,
18
+ "cm_plot": "/Users/miraekang/proyectos/ai-nlp/reports/pipeline/lr/cm_lr.png",
19
+ "roc_plot": "/Users/miraekang/proyectos/ai-nlp/reports/pipeline/lr/roc_lr.png",
20
+ "top_fp_terms": "black(14), would(9), white(9), shoot(8), get(7), people(7), guy(7), cop(6), police(5), car(4)",
21
+ "top_fn_terms": "police(8), make(6), black(6), criminal(6), people(5), kill(5), want(4), cause(4), look(4), brown(4)",
22
+ "run_id": "20260523_163600",
23
+ "model_path": "/Users/miraekang/proyectos/ai-nlp/models/experiments/lr/lr_pipeline_20260523_163600.joblib",
24
+ "model_type": "lr",
25
+ "model_family": "sklearn_baseline"
26
+ }
reports/pipeline/lr/roc_lr.png ADDED

Git LFS Details

  • SHA256: 2944f4eebf52a33fc354d21da1dc1a92b472d459fb2de4a9651def2e5ba0f347
  • Pointer size: 130 Bytes
  • Size of remote file: 39.4 kB
reports/summary.csv ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ model,model_family,f1_weighted,roc_auc,fp,fn,cv_test_gap_pp,train_test_gap_pp,f1_train,evaluation_source,production_default,notes,timestamp,f1_toxic,precision,recall,accuracy,n_test,cv_f1_mean,cv_f1_std,cm_plot,roc_plot,top_fp_terms,top_fn_terms
2
+ LR + TF-IDF (tuned),sklearn_baseline,0.7579,0.81,18.0,30.0,4.76,14.07,0.8987,configs/best_params.yaml Optuna,true,Best sklearn model on held-out test split (IsToxic),,,,,,,,,,,,
3
+ LR + TF-IDF (local),sklearn_baseline,0.7579,0.81,18.0,30.0,4.76,14.07,0.8987,models/final_model.joblib,true,Served by FastAPI and Streamlit via ModelService,,,,,,,,,,,,
4
+ LR,,0.7387,0.7838,22.0,30.0,1.94,15.97,0.8984,,,,2026-05-23T16:35:21.125402,0.7045,0.7399,0.74,0.74,200.0,0.7193,0.0382,/Users/miraekang/proyectos/ai-nlp/reports/pipeline/lr/cm_lr.png,/Users/miraekang/proyectos/ai-nlp/reports/pipeline/lr/roc_lr.png,"black(14), would(9), white(9), shoot(8), get(7), people(7), guy(7), cop(6), police(5), car(4)","police(8), make(6), black(6), criminal(6), people(5), kill(5), want(4), cause(4), look(4), brown(4)"
5
+ LR,,0.7387,0.7838,22.0,30.0,1.94,15.97,0.8984,,,,2026-05-23T16:36:05.009624,0.7045,0.7399,0.74,0.74,200.0,0.7193,0.0382,/Users/miraekang/proyectos/ai-nlp/reports/pipeline/lr/cm_lr.png,/Users/miraekang/proyectos/ai-nlp/reports/pipeline/lr/roc_lr.png,"black(14), would(9), white(9), shoot(8), get(7), people(7), guy(7), cop(6), police(5), car(4)","police(8), make(6), black(6), criminal(6), people(5), kill(5), want(4), cause(4), look(4), brown(4)"
6
+ DistilBERT Toxicity,transformers_hf,,,,,,,ModelService catalog,false,Remote HF martin-ha/toxic-comment-model β€” switch via PUT /model/{name},,,,,,,,,,,,,
7
+ toxic-bert (multilabel),transformers_hf,,,,,,,ModelService catalog,false,Remote HF unitary/toxic-bert β€” multilabel Jigsaw,,,,,,,,,,,,,
8
+ RoBERTa Toxicity,transformers_hf,,,,,,,ModelService catalog,false,Remote HF s-nlp/roberta_toxicity_classifier,,,,,,,,,,,,,
9
+ RF,sklearn_baseline,,,,,,,pipeline --model rf,false,Train and evaluate: python -m src.pipeline.run_pipeline --model rf,,,,,,,,,,,,,
10
+ XGBoost,sklearn_baseline,,,,,,,pipeline --model xgboost,false,Train and evaluate: python -m src.pipeline.run_pipeline --model xgboost,,,,,,,,,,,,,
src/evaluation/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ """Model evaluation and comparison."""
2
+
3
+ from src.evaluation.evaluator import Evaluator
4
+
5
+ __all__ = ["Evaluator"]
src/evaluation/evaluator.py CHANGED
@@ -5,13 +5,17 @@ EvaluaciΓ³n estandarizada de modelos.
5
  Genera mΓ©tricas, visualizaciones e informes JSON.
6
 
7
  Uso:
8
- evaluator = Evaluator(output_dir="reports/pipeline")
9
- metrics = evaluator.evaluate(model, X_test, y_test, model_name="LR")
10
- evaluator.error_analysis(X_test, y_test, preds, probs)
11
- evaluator.save_summary(all_metrics, path="reports/summary.csv")
 
12
  """
13
 
14
  import json
 
 
 
15
  import numpy as np
16
  import pandas as pd
17
  import matplotlib.pyplot as plt
@@ -28,6 +32,9 @@ from src.utils.logger import get_logger
28
 
29
  logger = get_logger(__name__)
30
 
 
 
 
31
 
32
  class Evaluator:
33
  """
@@ -109,6 +116,56 @@ class Evaluator:
109
  self._print_summary(metrics)
110
  return metrics
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  # ── Visualizaciones ──────────────────────────────────────────────────────
113
  def plot_confusion_matrix(
114
  self,
@@ -116,6 +173,7 @@ class Evaluator:
116
  y_pred,
117
  model_name: str,
118
  save: bool = True,
 
119
  ) -> Path | None:
120
  """Genera y guarda la matriz de confusiΓ³n."""
121
  cm = confusion_matrix(y_test, y_pred)
@@ -126,21 +184,21 @@ class Evaluator:
126
  yticklabels=["No tΓ³xico", "TΓ³xico"],
127
  linewidths=0.5,
128
  )
129
- ax.set_title(f"{model_name} β€” Confusion Matrix", fontweight="bold")
130
  ax.set_xlabel("PredicciΓ³n")
131
  ax.set_ylabel("Real")
132
  plt.tight_layout()
133
 
 
 
134
  if save:
135
- safe = model_name.lower().replace(" ", "_").replace("/", "_")
136
- path = self.output_dir / f"cm_{safe}.png"
137
- plt.savefig(path, dpi=150, bbox_inches="tight")
138
  plt.show()
139
- logger.info(f"Confusion matrix guardada: {path}")
140
- return path
141
-
142
- plt.show()
143
- return None
144
 
145
  def plot_roc_curve(
146
  self,
@@ -148,27 +206,28 @@ class Evaluator:
148
  y_proba,
149
  model_name: str,
150
  save: bool = True,
 
151
  ) -> Path | None:
152
  """Genera y guarda la curva ROC."""
153
  fig, ax = plt.subplots(figsize=(6, 5))
154
  RocCurveDisplay.from_predictions(
155
  y_test, y_proba, ax=ax, name=model_name, color="#7F77DD"
156
  )
157
- ax.plot([0, 1], [0, 1], "--", color="gray", alpha=0.5, label="Random")
158
  ax.set_title(f"{model_name} β€” Curva ROC", fontweight="bold")
159
  ax.legend()
160
  plt.tight_layout()
161
 
 
 
162
  if save:
163
- safe = model_name.lower().replace(" ", "_").replace("/", "_")
164
- path = self.output_dir / f"roc_{safe}.png"
165
- plt.savefig(path, dpi=150, bbox_inches="tight")
166
- plt.show()
167
  logger.info(f"Curva ROC guardada: {path}")
168
- return path
169
-
170
- plt.show()
171
- return None
 
172
 
173
  # ── AnΓ‘lisis de errores ──────────────────────────────────────────────────
174
  def error_analysis(
@@ -177,6 +236,7 @@ class Evaluator:
177
  y_test,
178
  y_pred,
179
  y_proba,
 
180
  n_examples: int = 5,
181
  ) -> dict:
182
  """
@@ -198,24 +258,44 @@ class Evaluator:
198
  fp = error_df[(error_df["real"] == 0) & (error_df["pred"] == 1)]
199
  fn = error_df[(error_df["real"] == 1) & (error_df["pred"] == 0)]
200
 
201
- logger.info(f"Errores: FP={len(fp)} | FN={len(fn)}")
 
 
 
202
 
203
  print(f"\n{'='*65}")
204
- print(f"FALSOS NEGATIVOS β€” hate speech que NO detectΓ³ ({len(fn)} total)")
 
 
205
  print(f"{'='*65}")
206
  for _, row in fn.nsmallest(n_examples, "prob_toxic").iterrows():
207
- print(f" Prob: {row['prob_toxic']:.3f} | {row['text'][:110]}")
208
  print()
209
 
210
  print(f"{'='*65}")
211
- print(f"FALSOS POSITIVOS β€” comentarios OK censurados ({len(fp)} total)")
 
 
212
  print(f"{'='*65}")
213
  for _, row in fp.nlargest(n_examples, "prob_toxic").iterrows():
214
- print(f" Prob: {row['prob_toxic']:.3f} | {row['text'][:110]}")
215
  print()
216
 
217
- return {"fp_examples": fp.head(n_examples).to_dict("records"),
218
- "fn_examples": fn.head(n_examples).to_dict("records")}
 
 
 
 
 
 
 
 
 
 
 
 
 
219
 
220
  # ── Reports ──────────────────────────────────────────────────────────────
221
  def save_report(self, metrics: dict, experiment_id: str) -> Path:
@@ -232,7 +312,8 @@ class Evaluator:
232
  Si summary.csv ya existe, agrega nuevas filas.
233
  """
234
 
235
- path = Path(path or self.output_dir / "summary.csv")
 
236
 
237
  # Nuevo dataframe
238
  new_df = pd.DataFrame(all_metrics)
@@ -247,13 +328,15 @@ class Evaluator:
247
  # Evitar duplicados por run_id si existe
248
  if "run_id" in df.columns:
249
  df = df.drop_duplicates(subset=["run_id"], keep="last")
 
 
250
 
251
  else:
252
  df = new_df
253
 
254
  # Ordenar por F1 descendente
255
  if "f1_weighted" in df.columns:
256
- df = df.sort_values("f1_weighted", ascending=False)
257
 
258
  # Guardar actualizado
259
  df.to_csv(path, index=False)
@@ -265,6 +348,13 @@ class Evaluator:
265
 
266
  return path
267
 
 
 
 
 
 
 
 
268
  # ── Interno ──────────────────────────────────────────────────────────────
269
  def _print_summary(self, metrics: dict) -> None:
270
  gap_str = ""
 
5
  Genera mΓ©tricas, visualizaciones e informes JSON.
6
 
7
  Uso:
8
+ evaluator = Evaluator(output_dir="reports/pipeline/lr")
9
+ metrics = evaluator.evaluate_and_report(
10
+ model, X_test, y_test, model_name="LR",
11
+ summary_path="reports/summary.csv",
12
+ )
13
  """
14
 
15
  import json
16
+ import re
17
+ from collections import Counter
18
+
19
  import numpy as np
20
  import pandas as pd
21
  import matplotlib.pyplot as plt
 
32
 
33
  logger = get_logger(__name__)
34
 
35
+ DEFAULT_SUMMARY_PATH = Path("reports/summary.csv")
36
+ _TOKEN_RE = re.compile(r"[a-zÑéíóúñ'][a-zÑéíóúñ]{2,}")
37
+
38
 
39
  class Evaluator:
40
  """
 
116
  self._print_summary(metrics)
117
  return metrics
118
 
119
+ def evaluate_and_report(
120
+ self,
121
+ model,
122
+ X_test,
123
+ y_test,
124
+ model_name: str,
125
+ X_train=None,
126
+ y_train=None,
127
+ cv_results: dict = None,
128
+ summary_path: str | Path | None = None,
129
+ n_error_examples: int = 5,
130
+ show_plots: bool = False,
131
+ ) -> dict:
132
+ """
133
+ EvaluaciΓ³n completa: mΓ©tricas, grΓ‘ficos, anΓ‘lisis de errores y summary.csv.
134
+
135
+ Usado por run_pipeline; actualiza reports/summary.csv por defecto del proyecto.
136
+ """
137
+ metrics = self.evaluate(
138
+ model, X_test, y_test, model_name,
139
+ X_train=X_train, y_train=y_train, cv_results=cv_results,
140
+ )
141
+
142
+ y_pred = model.predict(X_test)
143
+ y_proba = model.predict_proba(X_test)[:, 1]
144
+
145
+ cm_path = self.plot_confusion_matrix(
146
+ y_test, y_pred, model_name, save=True, show=show_plots,
147
+ )
148
+ roc_path = self.plot_roc_curve(
149
+ y_test, y_proba, model_name, save=True, show=show_plots,
150
+ )
151
+ errors = self.error_analysis(
152
+ X_test, y_test, y_pred, y_proba,
153
+ model_name=model_name, n_examples=n_error_examples,
154
+ )
155
+
156
+ metrics["cm_plot"] = str(cm_path) if cm_path else ""
157
+ metrics["roc_plot"] = str(roc_path) if roc_path else ""
158
+ metrics["top_fp_terms"] = ", ".join(
159
+ f"{t}({c})" for t, c in errors.get("top_fp_terms", [])
160
+ )
161
+ metrics["top_fn_terms"] = ", ".join(
162
+ f"{t}({c})" for t, c in errors.get("top_fn_terms", [])
163
+ )
164
+
165
+ out = Path(summary_path or DEFAULT_SUMMARY_PATH)
166
+ self.save_summary([metrics], path=out)
167
+ return metrics
168
+
169
  # ── Visualizaciones ──────────────────────────────────────────────────────
170
  def plot_confusion_matrix(
171
  self,
 
173
  y_pred,
174
  model_name: str,
175
  save: bool = True,
176
+ show: bool = False,
177
  ) -> Path | None:
178
  """Genera y guarda la matriz de confusiΓ³n."""
179
  cm = confusion_matrix(y_test, y_pred)
 
184
  yticklabels=["No tΓ³xico", "TΓ³xico"],
185
  linewidths=0.5,
186
  )
187
+ ax.set_title(f"{model_name} β€” Matriz de confusiΓ³n", fontweight="bold")
188
  ax.set_xlabel("PredicciΓ³n")
189
  ax.set_ylabel("Real")
190
  plt.tight_layout()
191
 
192
+ safe = model_name.lower().replace(" ", "_").replace("/", "_")
193
+ path = self.output_dir / f"cm_{safe}.png"
194
  if save:
195
+ fig.savefig(path, dpi=150, bbox_inches="tight")
196
+ logger.info(f"Matriz de confusiΓ³n guardada: {path}")
197
+ if show:
198
  plt.show()
199
+ else:
200
+ plt.close(fig)
201
+ return path if save else None
 
 
202
 
203
  def plot_roc_curve(
204
  self,
 
206
  y_proba,
207
  model_name: str,
208
  save: bool = True,
209
+ show: bool = False,
210
  ) -> Path | None:
211
  """Genera y guarda la curva ROC."""
212
  fig, ax = plt.subplots(figsize=(6, 5))
213
  RocCurveDisplay.from_predictions(
214
  y_test, y_proba, ax=ax, name=model_name, color="#7F77DD"
215
  )
216
+ ax.plot([0, 1], [0, 1], "--", color="gray", alpha=0.5, label="Azar")
217
  ax.set_title(f"{model_name} β€” Curva ROC", fontweight="bold")
218
  ax.legend()
219
  plt.tight_layout()
220
 
221
+ safe = model_name.lower().replace(" ", "_").replace("/", "_")
222
+ path = self.output_dir / f"roc_{safe}.png"
223
  if save:
224
+ fig.savefig(path, dpi=150, bbox_inches="tight")
 
 
 
225
  logger.info(f"Curva ROC guardada: {path}")
226
+ if show:
227
+ plt.show()
228
+ else:
229
+ plt.close(fig)
230
+ return path if save else None
231
 
232
  # ── AnΓ‘lisis de errores ──────────────────────────────────────────────────
233
  def error_analysis(
 
236
  y_test,
237
  y_pred,
238
  y_proba,
239
+ model_name: str = "modelo",
240
  n_examples: int = 5,
241
  ) -> dict:
242
  """
 
258
  fp = error_df[(error_df["real"] == 0) & (error_df["pred"] == 1)]
259
  fn = error_df[(error_df["real"] == 1) & (error_df["pred"] == 0)]
260
 
261
+ top_fp_terms = self._most_common_terms(fp["text"].tolist())
262
+ top_fn_terms = self._most_common_terms(fn["text"].tolist())
263
+
264
+ logger.info(f"Errores {model_name}: FP={len(fp)} | FN={len(fn)}")
265
 
266
  print(f"\n{'='*65}")
267
+ print(f"FALSOS NEGATIVOS β€” tΓ³xico no detectado ({len(fn)} total)")
268
+ if top_fn_terms:
269
+ print(" TΓ©rminos mΓ‘s frecuentes:", ", ".join(f"{w}({c})" for w, c in top_fn_terms[:8]))
270
  print(f"{'='*65}")
271
  for _, row in fn.nsmallest(n_examples, "prob_toxic").iterrows():
272
+ print(f" Prob: {row['prob_toxic']:.3f} | {str(row['text'])[:110]}")
273
  print()
274
 
275
  print(f"{'='*65}")
276
+ print(f"FALSOS POSITIVOS β€” seguro marcado como tΓ³xico ({len(fp)} total)")
277
+ if top_fp_terms:
278
+ print(" TΓ©rminos mΓ‘s frecuentes:", ", ".join(f"{w}({c})" for w, c in top_fp_terms[:8]))
279
  print(f"{'='*65}")
280
  for _, row in fp.nlargest(n_examples, "prob_toxic").iterrows():
281
+ print(f" Prob: {row['prob_toxic']:.3f} | {str(row['text'])[:110]}")
282
  print()
283
 
284
+ safe = model_name.lower().replace(" ", "_").replace("/", "_")
285
+ errors_path = self.output_dir / f"errors_{safe}.csv"
286
+ pd.concat([
287
+ fp.assign(tipo_error="falso_positivo"),
288
+ fn.assign(tipo_error="falso_negativo"),
289
+ ], ignore_index=True).to_csv(errors_path, index=False)
290
+ logger.info(f"Errores guardados: {errors_path}")
291
+
292
+ return {
293
+ "top_fp_terms": top_fp_terms,
294
+ "top_fn_terms": top_fn_terms,
295
+ "fp_examples": fp.head(n_examples).to_dict("records"),
296
+ "fn_examples": fn.head(n_examples).to_dict("records"),
297
+ "errors_csv": str(errors_path),
298
+ }
299
 
300
  # ── Reports ──────────────────────────────────────────────────────────────
301
  def save_report(self, metrics: dict, experiment_id: str) -> Path:
 
312
  Si summary.csv ya existe, agrega nuevas filas.
313
  """
314
 
315
+ path = Path(path or DEFAULT_SUMMARY_PATH)
316
+ path.parent.mkdir(parents=True, exist_ok=True)
317
 
318
  # Nuevo dataframe
319
  new_df = pd.DataFrame(all_metrics)
 
328
  # Evitar duplicados por run_id si existe
329
  if "run_id" in df.columns:
330
  df = df.drop_duplicates(subset=["run_id"], keep="last")
331
+ elif "model" in df.columns and "timestamp" in df.columns:
332
+ df = df.drop_duplicates(subset=["model", "timestamp"], keep="last")
333
 
334
  else:
335
  df = new_df
336
 
337
  # Ordenar por F1 descendente
338
  if "f1_weighted" in df.columns:
339
+ df = df.sort_values("f1_weighted", ascending=False, na_position="last")
340
 
341
  # Guardar actualizado
342
  df.to_csv(path, index=False)
 
348
 
349
  return path
350
 
351
+ @staticmethod
352
+ def _most_common_terms(texts: list, top_n: int = 10) -> list[tuple[str, int]]:
353
+ counter: Counter[str] = Counter()
354
+ for text in texts:
355
+ counter.update(_TOKEN_RE.findall(str(text).lower()))
356
+ return counter.most_common(top_n)
357
+
358
  # ── Interno ──────────────────────────────────────────────────────────────
359
  def _print_summary(self, metrics: dict) -> None:
360
  gap_str = ""
src/pipeline/run_pipeline.py CHANGED
@@ -122,24 +122,20 @@ def run_pipeline(model_type: str = "lr") -> dict:
122
 
123
  # ── FASE 6: EvaluaciΓ³n en test ────────────────────────────────────────────
124
  logger.info("FASE 6 β€” EvaluaciΓ³n en test")
125
- evaluator = Evaluator(output_dir=PROJECT_ROOT / "reports" / "v2" / "pipeline")
126
-
127
- y_pred = model.predict(X_test_clean)
128
- y_proba = model.predict_proba(X_test_clean)[:, 1]
129
-
130
- metrics = evaluator.evaluate(
131
- model, X_test_clean, y_test,
132
- model_name = model_type.upper(),
133
- X_train = X_train_clean,
134
- y_train = y_train,
135
- cv_results = cv_results,
 
136
  )
137
 
138
- # Visualizaciones
139
- evaluator.plot_confusion_matrix(y_test, y_pred, model_type.upper())
140
- evaluator.plot_roc_curve(y_test, y_proba, model_type.upper())
141
- evaluator.error_analysis(X_test_clean, y_test, y_pred, y_proba)
142
-
143
  # ── FASE 7: Guardado del modelo ───────────────────────────────────────────
144
  logger.info("FASE 7 β€” Guardado del modelo")
145
  model_path = EXPERIMENTS_DIR / f"{model_type}_pipeline_{run_id}.joblib"
@@ -160,10 +156,10 @@ def run_pipeline(model_type: str = "lr") -> dict:
160
  logger.info("FASE 9 β€” Generando informes")
161
  metrics["run_id"] = run_id
162
  metrics["model_path"]= str(model_path)
163
- evaluator.save_report(metrics, f"exp_{run_id}_{model_type}")
164
  metrics["model_type"] = model_type
165
  metrics["run_id"] = run_id
166
- evaluator.save_summary([metrics])
 
167
 
168
  logger.info("=" * 60)
169
  logger.info(f"βœ… Pipeline completado β€” F1={metrics['f1_weighted']:.4f}")
 
122
 
123
  # ── FASE 6: EvaluaciΓ³n en test ────────────────────────────────────────────
124
  logger.info("FASE 6 β€” EvaluaciΓ³n en test")
125
+ report_dir = PROJECT_ROOT / "reports" / "pipeline" / model_type
126
+ evaluator = Evaluator(output_dir=report_dir)
127
+
128
+ metrics = evaluator.evaluate_and_report(
129
+ model,
130
+ X_test_clean,
131
+ y_test,
132
+ model_name=model_type.upper(),
133
+ X_train=X_train_clean,
134
+ y_train=y_train,
135
+ cv_results=cv_results,
136
+ summary_path=PROJECT_ROOT / "reports" / "summary.csv",
137
  )
138
 
 
 
 
 
 
139
  # ── FASE 7: Guardado del modelo ───────────────────────────────────────────
140
  logger.info("FASE 7 β€” Guardado del modelo")
141
  model_path = EXPERIMENTS_DIR / f"{model_type}_pipeline_{run_id}.joblib"
 
156
  logger.info("FASE 9 β€” Generando informes")
157
  metrics["run_id"] = run_id
158
  metrics["model_path"]= str(model_path)
 
159
  metrics["model_type"] = model_type
160
  metrics["run_id"] = run_id
161
+ metrics["model_family"] = "sklearn_baseline"
162
+ evaluator.save_report(metrics, f"exp_{run_id}_{model_type}")
163
 
164
  logger.info("=" * 60)
165
  logger.info(f"βœ… Pipeline completado β€” F1={metrics['f1_weighted']:.4f}")