argus-mlops / src /models /evaluator.py
hodfa840's picture
Fix scroll reset for HF Spaces double-iframe context
1aa566a
"""Champion vs. challenger model comparison."""
from __future__ import annotations
import numpy as np
import pandas as pd
from typing import Any
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from src.data.preprocessing import Preprocessor
from src.utils.logging_config import get_logger
log = get_logger(__name__)
class ModelEvaluator:
"""Compare champion vs. challenger models on a held-out dataset."""
def __init__(self) -> None:
self.preprocessor = Preprocessor()
def evaluate_single(self, model: Any, df: pd.DataFrame) -> dict:
"""Compute regression metrics for a single model."""
X, y = self.preprocessor.transform_with_target(df)
if y is None:
raise ValueError("DataFrame must contain the target column.")
y_pred = model.predict(X)
return self._metrics(y.to_numpy(), y_pred)
def compare(self, champion: Any, challenger: Any, df: pd.DataFrame) -> dict:
"""Side-by-side comparison returning metrics and a promotion recommendation."""
champ_m = self.evaluate_single(champion, df)
chal_m = self.evaluate_single(challenger, df)
delta_rmse = chal_m["rmse"] - champ_m["rmse"]
improvement_pct = -delta_rmse / max(champ_m["rmse"], 1e-9) * 100
recommendation = "promote" if improvement_pct > 0 else "keep_champion"
result = {
"champion_metrics": champ_m,
"challenger_metrics": chal_m,
"delta_rmse": round(delta_rmse, 4),
"improvement_pct": round(improvement_pct, 2),
"recommendation": recommendation,
}
log.info(
"Model comparison: Champion RMSE=%.4f | Challenger RMSE=%.4f | "
"delta=%.4f (%.2f%%) -> %s",
champ_m["rmse"], chal_m["rmse"], delta_rmse, improvement_pct,
recommendation.upper(),
)
return result
def _metrics(self, y_true: np.ndarray, y_pred: np.ndarray) -> dict:
return {
"rmse": round(float(np.sqrt(mean_squared_error(y_true, y_pred))), 4),
"mae": round(float(mean_absolute_error(y_true, y_pred)), 4),
"r2": round(float(r2_score(y_true, y_pred)), 4),
}