Spaces:

devrup404
/

SignalMod

Running

App Files Files Community

Mirae Kang commited on 5 days ago

Commit

975d796

1 Parent(s): 2b1423b

fix: modify evaluation pipeline, #7

Browse files

Files changed (9) hide show

reports/pipeline/lr/cm_lr.png +3 -0
reports/pipeline/lr/errors_lr.csv +53 -0
reports/pipeline/lr/exp_20260523_163516_lr.json +26 -0
reports/pipeline/lr/exp_20260523_163600_lr.json +26 -0
reports/pipeline/lr/roc_lr.png +3 -0
reports/summary.csv +10 -0
src/evaluation/__init__.py +5 -0
src/evaluation/evaluator.py +121 -31
src/pipeline/run_pipeline.py +14 -18

reports/pipeline/lr/cm_lr.png ADDED Viewed

Git LFS Details

SHA256: b64abbfe6125a4afd8f95a411aa2f655ffbab819afe041ddd94d363960c52512
Pointer size: 130 Bytes
Size of remote file: 26.9 kB

reports/pipeline/lr/errors_lr.csv ADDED Viewed

	@@ -0,0 +1,53 @@

+text,real,pred,prob_toxic,tipo_error
+fuck time freeway,False,True,0.6415007346665947,falso_positivo
+man officer get lucky life still ruin juror would convict black way,False,True,0.5716710269810534,falso_positivo
+police car purposefully leave destroy get anger instead business cost destruction car pass state taxpayer,False,True,0.503612868866628,falso_positivo
+traffic bad enough,False,True,0.5106084760625215,falso_positivo
+people get shoot everyday come one get attention,False,True,0.5624723994474933,falso_positivo
+feel black america understand everything,False,True,0.5146301249872574,falso_positivo
+head read protester take far,False,True,0.513207971133486,falso_positivo
+stand side freeway instead block thousand car,False,True,0.5251542626479927,falso_positivo
+lose shit guy lmao,False,True,0.5746623918084022,falso_positivo
+take medical cannabis guy couple face book fight page mean necessarily aggressive,False,True,0.5036380541041356,falso_positivo
+cop shoot white guy would riot since shoot black guy everyone seem think black look evidence skin color everyone always throw race card crap,False,True,0.5970904349206777,falso_positivo
+good job guy raise taxis,False,True,0.5008660396035933,falso_positivo
+people translate conversation funniest sad shit ever funny get wrong sad people reach every direction find hole get cop,False,True,0.5340463945332806,falso_positivo
+weird white pretext police officer white must less risk would black police officer would kill white would take year prison I may white white black equal,False,True,0.5870245290128406,falso_positivo
+obama show hos face city yet support sad,False,True,0.5103506243372347,falso_positivo
+everyone say black people white people white woman get shoot rubber bullet come next day two black people help walk,False,True,0.5578356563804213,falso_positivo
+real question need ask would mike brown rush cop already draw weapon already shoot way answer show true color think black male would disregard life kill cop escalate punishment probation petty theft death penalty kill cop utterly absurd truly believe someone go start college next week would sorry racist,False,True,0.5443162086132329,falso_positivo
+agree police action kill boy mean protest loot destroy neighbourhood stupid make people want support struggle,False,True,0.5236958135962575,falso_positivo
+call peacefully protest burn car lol,False,True,0.5116161189286824,falso_positivo
+spread word year unarmed white guy shoot black cop,False,True,0.5538455860954972,falso_positivo
+gas grenade pepper spray knight stick would apply,False,True,0.5147355292873231,falso_positivo
+police shoot black guy black riot black shoot black happen,False,True,0.5956566810148107,falso_positivo
+amen peggy hubbard color make difference person inside innocent nine year old little girl one attention people want cause chaos black white go exactly obama want look country mess admire courage tell peggy hubbard,True,False,0.40495127807343234,falso_negativo
+ah beautiful,True,False,0.46757257821393866,falso_negativo
+bullshit stef police officer pepper spray stun gun number non lethal method immediate disposal real question version event hold true gun first option officer go,True,False,0.4753839163064213,falso_negativo
+stefan ever victim police discrimination game white supremacy deny involvement play victim black complain never fail,True,False,0.4502233884252265,falso_negativo
+bottom line michael brown drug addict criminal violent criminal anyone feel bad get kill unless criminal,True,False,0.4974324511920543,falso_negativo
+jimmy dore dickhead square,True,False,0.46757257821393866,falso_negativo
+blacklivesmatter build deceit lie,True,False,0.45884138016676057,falso_negativo
+educate dumb assss rap rap real people dispise,True,False,0.42647914768536693,falso_negativo
+push clerk around really make I lose sympathy definitely tell guy likely dick.but hell need new tv might well riot go loot one honor,True,False,0.4460833711504373,falso_negativo
+michael brown thug god bless thin blue line graduate police academy december,True,False,0.44310473756276547,falso_negativo
+protect serve people michael brown guy deserve kill cry,True,False,0.4930152565241669,falso_negativo
+zimmerman case full shit smell hate seep bring rap music listen sound cloud nothing make charcter look foolish dangerous possible ever want respond fuck az well> go much effort destroy mike browns character bring smoking habit nothing get shoot tone perfectly choose word use really make brown look unstable look much detail go trhu slay browns character,True,False,0.4835229120264079,falso_negativo
+far base even call propaganda bs,True,False,0.43974023289369363,falso_negativo
+patient ever ambulance get injure bad make hospital protestor liable die protestor charge man slaughter,True,False,0.46034973043545707,falso_negativo
+police problem protest cause black criminal let forget blame address elephant room,True,False,0.4891559683461648,falso_negativo
+think gentle giant would kill song rap talk bad,True,False,0.45653873903001546,falso_negativo
+black person say thank speak truth many black cause trouble dress idiot bring trouble kill eachother cause crime complain police good glad parent grow I properly stay away garbage,True,False,0.46329948394601783,falso_negativo
+rich brat,True,False,0.48192463449669426,falso_negativo
+dude lie azz racist cracka,True,False,0.4780534833910638,falso_negativo
+hahahahaha guy ed bassmaster character always teste ed bassmaster semi retarded character cross eyed guy,True,False,0.47755160633262406,falso_negativo
+revolution set neighbourhood fire good job burn suburb cop live make homeless achive anything,True,False,0.47050296796324714,falso_negativo
+one pair work boot steal ferguson riot,True,False,0.4605386101821508,falso_negativo
+wish mr masri dead,True,False,0.46328501485243195,falso_negativo
+cop pig,True,False,0.4794382794479078,falso_negativo
+know chimper compassion mud pump heart,True,False,0.41790271212899205,falso_negativo
+niggas love victim situation separate black people need,True,False,0.4715061903422615,falso_negativo
+let get straight present fact mass media beginning yr old legal aspect adult rob convince store criminal later flee seen aprehende police resist arrest altercation occur two police every case similar use force criminal kill killing go court randomly select jury view evidence hear witness pertain case radom jury reach unanimous verdict guilty sorry protestor democracy,True,False,0.4488888614734203,falso_negativo
+motherfucker angry understand,True,False,0.4807313546725058,falso_negativo
+hell care people peacefully protest police feel threaten go act say blow fucker face tg follow riot,True,False,0.49462955392154545,falso_negativo
+let elite divide conquer tactic work want keep racially divide never rise,True,False,0.46885478185552015,falso_negativo

reports/pipeline/lr/exp_20260523_163516_lr.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "model": "LR",
+  "timestamp": "2026-05-23T16:35:21.125402",
+  "f1_weighted": 0.7387,
+  "f1_toxic": 0.7045,
+  "precision": 0.7399,
+  "recall": 0.74,
+  "accuracy": 0.74,
+  "roc_auc": 0.7838,
+  "fp": 22,
+  "fn": 30,
+  "n_test": 200,
+  "f1_train": 0.8984,
+  "train_test_gap_pp": 15.97,
+  "cv_f1_mean": 0.7193,
+  "cv_f1_std": 0.0382,
+  "cv_test_gap_pp": 1.94,
+  "cm_plot": "/Users/miraekang/proyectos/ai-nlp/reports/pipeline/lr/cm_lr.png",
+  "roc_plot": "/Users/miraekang/proyectos/ai-nlp/reports/pipeline/lr/roc_lr.png",
+  "top_fp_terms": "black(14), would(9), white(9), shoot(8), get(7), people(7), guy(7), cop(6), police(5), car(4)",
+  "top_fn_terms": "police(8), make(6), black(6), criminal(6), people(5), kill(5), want(4), cause(4), look(4), brown(4)",
+  "run_id": "20260523_163516",
+  "model_path": "/Users/miraekang/proyectos/ai-nlp/models/experiments/lr/lr_pipeline_20260523_163516.joblib",
+  "model_type": "lr",
+  "model_family": "sklearn_baseline"
+}

reports/pipeline/lr/exp_20260523_163600_lr.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "model": "LR",
+  "timestamp": "2026-05-23T16:36:05.009624",
+  "f1_weighted": 0.7387,
+  "f1_toxic": 0.7045,
+  "precision": 0.7399,
+  "recall": 0.74,
+  "accuracy": 0.74,
+  "roc_auc": 0.7838,
+  "fp": 22,
+  "fn": 30,
+  "n_test": 200,
+  "f1_train": 0.8984,
+  "train_test_gap_pp": 15.97,
+  "cv_f1_mean": 0.7193,
+  "cv_f1_std": 0.0382,
+  "cv_test_gap_pp": 1.94,
+  "cm_plot": "/Users/miraekang/proyectos/ai-nlp/reports/pipeline/lr/cm_lr.png",
+  "roc_plot": "/Users/miraekang/proyectos/ai-nlp/reports/pipeline/lr/roc_lr.png",
+  "top_fp_terms": "black(14), would(9), white(9), shoot(8), get(7), people(7), guy(7), cop(6), police(5), car(4)",
+  "top_fn_terms": "police(8), make(6), black(6), criminal(6), people(5), kill(5), want(4), cause(4), look(4), brown(4)",
+  "run_id": "20260523_163600",
+  "model_path": "/Users/miraekang/proyectos/ai-nlp/models/experiments/lr/lr_pipeline_20260523_163600.joblib",
+  "model_type": "lr",
+  "model_family": "sklearn_baseline"
+}

reports/pipeline/lr/roc_lr.png ADDED Viewed

Git LFS Details

SHA256: 2944f4eebf52a33fc354d21da1dc1a92b472d459fb2de4a9651def2e5ba0f347
Pointer size: 130 Bytes
Size of remote file: 39.4 kB

reports/summary.csv ADDED Viewed

	@@ -0,0 +1,10 @@

+model,model_family,f1_weighted,roc_auc,fp,fn,cv_test_gap_pp,train_test_gap_pp,f1_train,evaluation_source,production_default,notes,timestamp,f1_toxic,precision,recall,accuracy,n_test,cv_f1_mean,cv_f1_std,cm_plot,roc_plot,top_fp_terms,top_fn_terms
+LR + TF-IDF (tuned),sklearn_baseline,0.7579,0.81,18.0,30.0,4.76,14.07,0.8987,configs/best_params.yaml Optuna,true,Best sklearn model on held-out test split (IsToxic),,,,,,,,,,,,
+LR + TF-IDF (local),sklearn_baseline,0.7579,0.81,18.0,30.0,4.76,14.07,0.8987,models/final_model.joblib,true,Served by FastAPI and Streamlit via ModelService,,,,,,,,,,,,
+LR,,0.7387,0.7838,22.0,30.0,1.94,15.97,0.8984,,,,2026-05-23T16:35:21.125402,0.7045,0.7399,0.74,0.74,200.0,0.7193,0.0382,/Users/miraekang/proyectos/ai-nlp/reports/pipeline/lr/cm_lr.png,/Users/miraekang/proyectos/ai-nlp/reports/pipeline/lr/roc_lr.png,"black(14), would(9), white(9), shoot(8), get(7), people(7), guy(7), cop(6), police(5), car(4)","police(8), make(6), black(6), criminal(6), people(5), kill(5), want(4), cause(4), look(4), brown(4)"
+LR,,0.7387,0.7838,22.0,30.0,1.94,15.97,0.8984,,,,2026-05-23T16:36:05.009624,0.7045,0.7399,0.74,0.74,200.0,0.7193,0.0382,/Users/miraekang/proyectos/ai-nlp/reports/pipeline/lr/cm_lr.png,/Users/miraekang/proyectos/ai-nlp/reports/pipeline/lr/roc_lr.png,"black(14), would(9), white(9), shoot(8), get(7), people(7), guy(7), cop(6), police(5), car(4)","police(8), make(6), black(6), criminal(6), people(5), kill(5), want(4), cause(4), look(4), brown(4)"
+DistilBERT Toxicity,transformers_hf,,,,,,,ModelService catalog,false,Remote HF martin-ha/toxic-comment-model — switch via PUT /model/{name},,,,,,,,,,,,,
+toxic-bert (multilabel),transformers_hf,,,,,,,ModelService catalog,false,Remote HF unitary/toxic-bert — multilabel Jigsaw,,,,,,,,,,,,,
+RoBERTa Toxicity,transformers_hf,,,,,,,ModelService catalog,false,Remote HF s-nlp/roberta_toxicity_classifier,,,,,,,,,,,,,
+RF,sklearn_baseline,,,,,,,pipeline --model rf,false,Train and evaluate: python -m src.pipeline.run_pipeline --model rf,,,,,,,,,,,,,
+XGBoost,sklearn_baseline,,,,,,,pipeline --model xgboost,false,Train and evaluate: python -m src.pipeline.run_pipeline --model xgboost,,,,,,,,,,,,,

src/evaluation/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+"""Model evaluation and comparison."""
+from src.evaluation.evaluator import Evaluator
+__all__ = ["Evaluator"]

src/evaluation/evaluator.py CHANGED Viewed

@@ -5,13 +5,17 @@ Evaluación estandarizada de modelos.
 Genera métricas, visualizaciones e informes JSON.
 Uso:
-    evaluator = Evaluator(output_dir="reports/pipeline")
-    metrics = evaluator.evaluate(model, X_test, y_test, model_name="LR")
-    evaluator.error_analysis(X_test, y_test, preds, probs)
-    evaluator.save_summary(all_metrics, path="reports/summary.csv")
 """
 import json
 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
@@ -28,6 +32,9 @@ from src.utils.logger import get_logger
 logger = get_logger(__name__)
 class Evaluator:
     """
@@ -109,6 +116,56 @@ class Evaluator:
         self._print_summary(metrics)
         return metrics
     # ── Visualizaciones ──────────────────────────────────────────────────────
     def plot_confusion_matrix(
         self,
@@ -116,6 +173,7 @@ class Evaluator:
         y_pred,
         model_name: str,
         save: bool = True,
     ) -> Path | None:
         """Genera y guarda la matriz de confusión."""
         cm = confusion_matrix(y_test, y_pred)
@@ -126,21 +184,21 @@ class Evaluator:
             yticklabels=["No tóxico", "Tóxico"],
             linewidths=0.5,
         )
-        ax.set_title(f"{model_name} — Confusion Matrix", fontweight="bold")
         ax.set_xlabel("Predicción")
         ax.set_ylabel("Real")
         plt.tight_layout()
         if save:
-            safe = model_name.lower().replace(" ", "_").replace("/", "_")
-            path = self.output_dir / f"cm_{safe}.png"
-            plt.savefig(path, dpi=150, bbox_inches="tight")
             plt.show()
-            logger.info(f"Confusion matrix guardada: {path}")
-            return path
-        plt.show()
-        return None
     def plot_roc_curve(
         self,
@@ -148,27 +206,28 @@ class Evaluator:
         y_proba,
         model_name: str,
         save: bool = True,
     ) -> Path | None:
         """Genera y guarda la curva ROC."""
         fig, ax = plt.subplots(figsize=(6, 5))
         RocCurveDisplay.from_predictions(
             y_test, y_proba, ax=ax, name=model_name, color="#7F77DD"
         )
-        ax.plot([0, 1], [0, 1], "--", color="gray", alpha=0.5, label="Random")
         ax.set_title(f"{model_name} — Curva ROC", fontweight="bold")
         ax.legend()
         plt.tight_layout()
         if save:
-            safe = model_name.lower().replace(" ", "_").replace("/", "_")
-            path = self.output_dir / f"roc_{safe}.png"
-            plt.savefig(path, dpi=150, bbox_inches="tight")
-            plt.show()
             logger.info(f"Curva ROC guardada: {path}")
-            return path
-        plt.show()
-        return None
     # ── Análisis de errores ──────────────────────────────────────────────────
     def error_analysis(
@@ -177,6 +236,7 @@ class Evaluator:
         y_test,
         y_pred,
         y_proba,
         n_examples: int = 5,
     ) -> dict:
         """
@@ -198,24 +258,44 @@ class Evaluator:
         fp = error_df[(error_df["real"] == 0) & (error_df["pred"] == 1)]
         fn = error_df[(error_df["real"] == 1) & (error_df["pred"] == 0)]
-        logger.info(f"Errores: FP={len(fp)} | FN={len(fn)}")
         print(f"\n{'='*65}")
-        print(f"FALSOS NEGATIVOS — hate speech que NO detectó ({len(fn)} total)")
         print(f"{'='*65}")
         for _, row in fn.nsmallest(n_examples, "prob_toxic").iterrows():
-            print(f"  Prob: {row['prob_toxic']:.3f} | {row['text'][:110]}")
             print()
         print(f"{'='*65}")
-        print(f"FALSOS POSITIVOS — comentarios OK censurados ({len(fp)} total)")
         print(f"{'='*65}")
         for _, row in fp.nlargest(n_examples, "prob_toxic").iterrows():
-            print(f"  Prob: {row['prob_toxic']:.3f} | {row['text'][:110]}")
             print()
-        return {"fp_examples": fp.head(n_examples).to_dict("records"),
-                "fn_examples": fn.head(n_examples).to_dict("records")}
     # ── Reports ──────────────────────────────────────────────────────────────
     def save_report(self, metrics: dict, experiment_id: str) -> Path:
@@ -232,7 +312,8 @@ class Evaluator:
         Si summary.csv ya existe, agrega nuevas filas.
         """
-        path = Path(path or self.output_dir / "summary.csv")
         # Nuevo dataframe
         new_df = pd.DataFrame(all_metrics)
@@ -247,13 +328,15 @@ class Evaluator:
             # Evitar duplicados por run_id si existe
             if "run_id" in df.columns:
                 df = df.drop_duplicates(subset=["run_id"], keep="last")
         else:
             df = new_df
         # Ordenar por F1 descendente
         if "f1_weighted" in df.columns:
-            df = df.sort_values("f1_weighted", ascending=False)
         # Guardar actualizado
         df.to_csv(path, index=False)
@@ -265,6 +348,13 @@ class Evaluator:
         return path
     # ── Interno ──────────────────────────────────────────────────────────────
     def _print_summary(self, metrics: dict) -> None:
         gap_str = ""

 Genera métricas, visualizaciones e informes JSON.
 Uso:
+    evaluator = Evaluator(output_dir="reports/pipeline/lr")
+    metrics = evaluator.evaluate_and_report(
+        model, X_test, y_test, model_name="LR",
+        summary_path="reports/summary.csv",
+    )
 """
 import json
+import re
+from collections import Counter
 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
 logger = get_logger(__name__)
+DEFAULT_SUMMARY_PATH = Path("reports/summary.csv")
+_TOKEN_RE = re.compile(r"[a-záéíóúñ'][a-záéíóúñ]{2,}")
 class Evaluator:
     """
         self._print_summary(metrics)
         return metrics
+    def evaluate_and_report(
+        self,
+        model,
+        X_test,
+        y_test,
+        model_name: str,
+        X_train=None,
+        y_train=None,
+        cv_results: dict = None,
+        summary_path: str | Path | None = None,
+        n_error_examples: int = 5,
+        show_plots: bool = False,
+    ) -> dict:
+        """
+        Evaluación completa: métricas, gráficos, análisis de errores y summary.csv.
+        Usado por run_pipeline; actualiza reports/summary.csv por defecto del proyecto.
+        """
+        metrics = self.evaluate(
+            model, X_test, y_test, model_name,
+            X_train=X_train, y_train=y_train, cv_results=cv_results,
+        )
+        y_pred = model.predict(X_test)
+        y_proba = model.predict_proba(X_test)[:, 1]
+        cm_path = self.plot_confusion_matrix(
+            y_test, y_pred, model_name, save=True, show=show_plots,
+        )
+        roc_path = self.plot_roc_curve(
+            y_test, y_proba, model_name, save=True, show=show_plots,
+        )
+        errors = self.error_analysis(
+            X_test, y_test, y_pred, y_proba,
+            model_name=model_name, n_examples=n_error_examples,
+        )
+        metrics["cm_plot"] = str(cm_path) if cm_path else ""
+        metrics["roc_plot"] = str(roc_path) if roc_path else ""
+        metrics["top_fp_terms"] = ", ".join(
+            f"{t}({c})" for t, c in errors.get("top_fp_terms", [])
+        )
+        metrics["top_fn_terms"] = ", ".join(
+            f"{t}({c})" for t, c in errors.get("top_fn_terms", [])
+        )
+        out = Path(summary_path or DEFAULT_SUMMARY_PATH)
+        self.save_summary([metrics], path=out)
+        return metrics
     # ── Visualizaciones ──────────────────────────────────────────────────────
     def plot_confusion_matrix(
         self,
         y_pred,
         model_name: str,
         save: bool = True,
+        show: bool = False,
     ) -> Path | None:
         """Genera y guarda la matriz de confusión."""
         cm = confusion_matrix(y_test, y_pred)
             yticklabels=["No tóxico", "Tóxico"],
             linewidths=0.5,
         )
+        ax.set_title(f"{model_name} — Matriz de confusión", fontweight="bold")
         ax.set_xlabel("Predicción")
         ax.set_ylabel("Real")
         plt.tight_layout()
+        safe = model_name.lower().replace(" ", "_").replace("/", "_")
+        path = self.output_dir / f"cm_{safe}.png"
         if save:
+            fig.savefig(path, dpi=150, bbox_inches="tight")
+            logger.info(f"Matriz de confusión guardada: {path}")
+        if show:
             plt.show()
+        else:
+            plt.close(fig)
+        return path if save else None
     def plot_roc_curve(
         self,
         y_proba,
         model_name: str,
         save: bool = True,
+        show: bool = False,
     ) -> Path | None:
         """Genera y guarda la curva ROC."""
         fig, ax = plt.subplots(figsize=(6, 5))
         RocCurveDisplay.from_predictions(
             y_test, y_proba, ax=ax, name=model_name, color="#7F77DD"
         )
+        ax.plot([0, 1], [0, 1], "--", color="gray", alpha=0.5, label="Azar")
         ax.set_title(f"{model_name} — Curva ROC", fontweight="bold")
         ax.legend()
         plt.tight_layout()
+        safe = model_name.lower().replace(" ", "_").replace("/", "_")
+        path = self.output_dir / f"roc_{safe}.png"
         if save:
+            fig.savefig(path, dpi=150, bbox_inches="tight")
             logger.info(f"Curva ROC guardada: {path}")
+        if show:
+            plt.show()
+        else:
+            plt.close(fig)
+        return path if save else None
     # ── Análisis de errores ──────────────────────────────────────────────────
     def error_analysis(
         y_test,
         y_pred,
         y_proba,
+        model_name: str = "modelo",
         n_examples: int = 5,
     ) -> dict:
         """
         fp = error_df[(error_df["real"] == 0) & (error_df["pred"] == 1)]
         fn = error_df[(error_df["real"] == 1) & (error_df["pred"] == 0)]
+        top_fp_terms = self._most_common_terms(fp["text"].tolist())
+        top_fn_terms = self._most_common_terms(fn["text"].tolist())
+        logger.info(f"Errores {model_name}: FP={len(fp)} | FN={len(fn)}")
         print(f"\n{'='*65}")
+        print(f"FALSOS NEGATIVOS — tóxico no detectado ({len(fn)} total)")
+        if top_fn_terms:
+            print("  Términos más frecuentes:", ", ".join(f"{w}({c})" for w, c in top_fn_terms[:8]))
         print(f"{'='*65}")
         for _, row in fn.nsmallest(n_examples, "prob_toxic").iterrows():
+            print(f"  Prob: {row['prob_toxic']:.3f} | {str(row['text'])[:110]}")
             print()
         print(f"{'='*65}")
+        print(f"FALSOS POSITIVOS — seguro marcado como tóxico ({len(fp)} total)")
+        if top_fp_terms:
+            print("  Términos más frecuentes:", ", ".join(f"{w}({c})" for w, c in top_fp_terms[:8]))
         print(f"{'='*65}")
         for _, row in fp.nlargest(n_examples, "prob_toxic").iterrows():
+            print(f"  Prob: {row['prob_toxic']:.3f} | {str(row['text'])[:110]}")
             print()
+        safe = model_name.lower().replace(" ", "_").replace("/", "_")
+        errors_path = self.output_dir / f"errors_{safe}.csv"
+        pd.concat([
+            fp.assign(tipo_error="falso_positivo"),
+            fn.assign(tipo_error="falso_negativo"),
+        ], ignore_index=True).to_csv(errors_path, index=False)
+        logger.info(f"Errores guardados: {errors_path}")
+        return {
+            "top_fp_terms": top_fp_terms,
+            "top_fn_terms": top_fn_terms,
+            "fp_examples": fp.head(n_examples).to_dict("records"),
+            "fn_examples": fn.head(n_examples).to_dict("records"),
+            "errors_csv": str(errors_path),
+        }
     # ── Reports ──────────────────────────────────────────────────────────────
     def save_report(self, metrics: dict, experiment_id: str) -> Path:
         Si summary.csv ya existe, agrega nuevas filas.
         """
+        path = Path(path or DEFAULT_SUMMARY_PATH)
+        path.parent.mkdir(parents=True, exist_ok=True)
         # Nuevo dataframe
         new_df = pd.DataFrame(all_metrics)
             # Evitar duplicados por run_id si existe
             if "run_id" in df.columns:
                 df = df.drop_duplicates(subset=["run_id"], keep="last")
+            elif "model" in df.columns and "timestamp" in df.columns:
+                df = df.drop_duplicates(subset=["model", "timestamp"], keep="last")
         else:
             df = new_df
         # Ordenar por F1 descendente
         if "f1_weighted" in df.columns:
+            df = df.sort_values("f1_weighted", ascending=False, na_position="last")
         # Guardar actualizado
         df.to_csv(path, index=False)
         return path
+    @staticmethod
+    def _most_common_terms(texts: list, top_n: int = 10) -> list[tuple[str, int]]:
+        counter: Counter[str] = Counter()
+        for text in texts:
+            counter.update(_TOKEN_RE.findall(str(text).lower()))
+        return counter.most_common(top_n)
     # ── Interno ──────────────────────────────────────────────────────────────
     def _print_summary(self, metrics: dict) -> None:
         gap_str = ""

src/pipeline/run_pipeline.py CHANGED Viewed

@@ -122,24 +122,20 @@ def run_pipeline(model_type: str = "lr") -> dict:
     # ── FASE 6: Evaluación en test ────────────────────────────────────────────
     logger.info("FASE 6 — Evaluación en test")
-    evaluator = Evaluator(output_dir=PROJECT_ROOT / "reports" / "v2" / "pipeline")
-    y_pred  = model.predict(X_test_clean)
-    y_proba = model.predict_proba(X_test_clean)[:, 1]
-    metrics = evaluator.evaluate(
-        model, X_test_clean, y_test,
-        model_name  = model_type.upper(),
-        X_train     = X_train_clean,
-        y_train     = y_train,
-        cv_results  = cv_results,
     )
-    # Visualizaciones
-    evaluator.plot_confusion_matrix(y_test, y_pred, model_type.upper())
-    evaluator.plot_roc_curve(y_test, y_proba, model_type.upper())
-    evaluator.error_analysis(X_test_clean, y_test, y_pred, y_proba)
     # ── FASE 7: Guardado del modelo ───────────────────────────────────────────
     logger.info("FASE 7 — Guardado del modelo")
     model_path = EXPERIMENTS_DIR / f"{model_type}_pipeline_{run_id}.joblib"
@@ -160,10 +156,10 @@ def run_pipeline(model_type: str = "lr") -> dict:
     logger.info("FASE 9 — Generando informes")
     metrics["run_id"]    = run_id
     metrics["model_path"]= str(model_path)
-    evaluator.save_report(metrics, f"exp_{run_id}_{model_type}")
     metrics["model_type"] = model_type
     metrics["run_id"] = run_id
-    evaluator.save_summary([metrics])
     logger.info("=" * 60)
     logger.info(f"✅ Pipeline completado — F1={metrics['f1_weighted']:.4f}")

     # ── FASE 6: Evaluación en test ────────────────────────────────────────────
     logger.info("FASE 6 — Evaluación en test")
+    report_dir = PROJECT_ROOT / "reports" / "pipeline" / model_type
+    evaluator = Evaluator(output_dir=report_dir)
+    metrics = evaluator.evaluate_and_report(
+        model,
+        X_test_clean,
+        y_test,
+        model_name=model_type.upper(),
+        X_train=X_train_clean,
+        y_train=y_train,
+        cv_results=cv_results,
+        summary_path=PROJECT_ROOT / "reports" / "summary.csv",
     )
     # ── FASE 7: Guardado del modelo ───────────────────────────────────────────
     logger.info("FASE 7 — Guardado del modelo")
     model_path = EXPERIMENTS_DIR / f"{model_type}_pipeline_{run_id}.joblib"
     logger.info("FASE 9 — Generando informes")
     metrics["run_id"]    = run_id
     metrics["model_path"]= str(model_path)
     metrics["model_type"] = model_type
     metrics["run_id"] = run_id
+    metrics["model_family"] = "sklearn_baseline"
+    evaluator.save_report(metrics, f"exp_{run_id}_{model_type}")
     logger.info("=" * 60)
     logger.info(f"✅ Pipeline completado — F1={metrics['f1_weighted']:.4f}")