""" src.evaluation.metrics ====================== Comprehensive evaluation metrics for battery lifecycle prediction. Provides: - Regression metrics: MAE, MSE, RMSE, R², MAPE, tolerance accuracy - Classification metrics: accuracy, F1-macro, confusion matrix - Per-battery evaluation for cross-entity analysis - Summary table builder """ from __future__ import annotations from typing import Any import numpy as np import pandas as pd from sklearn.metrics import ( accuracy_score, confusion_matrix, f1_score, mean_absolute_error, mean_squared_error, r2_score, ) def regression_metrics( y_true: np.ndarray, y_pred: np.ndarray, prefix: str = "", ) -> dict[str, float]: """Compute full regression metric suite. Returns dict with keys: MAE, MSE, RMSE, R2, MAPE """ y_true = np.asarray(y_true).ravel() y_pred = np.asarray(y_pred).ravel() mae = mean_absolute_error(y_true, y_pred) mse = mean_squared_error(y_true, y_pred) rmse = np.sqrt(mse) r2 = r2_score(y_true, y_pred) # MAPE — avoid division by zero mask = y_true != 0 mape = np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100 if mask.any() else np.nan p = f"{prefix}_" if prefix else "" return { f"{p}MAE": mae, f"{p}MSE": mse, f"{p}RMSE": rmse, f"{p}R2": r2, f"{p}MAPE": mape, } def tolerance_accuracy( y_true: np.ndarray, y_pred: np.ndarray, tolerance: float = 2.0, ) -> float: """Fraction of predictions within ±tolerance of true values. Parameters ---------- tolerance : float Absolute tolerance (e.g., 2.0 for ±2% SOH or ±2 cycles RUL). """ return float(np.mean(np.abs(y_true - y_pred) <= tolerance)) def classification_metrics( y_true: np.ndarray, y_pred: np.ndarray, labels: list | None = None, ) -> dict[str, Any]: """Compute classification metrics.""" return { "accuracy": accuracy_score(y_true, y_pred), "f1_macro": f1_score(y_true, y_pred, average="macro", zero_division=0), "f1_weighted": f1_score(y_true, y_pred, average="weighted", zero_division=0), "confusion_matrix": confusion_matrix(y_true, y_pred, labels=labels), } def per_battery_evaluation( y_true: np.ndarray, y_pred: np.ndarray, battery_ids: np.ndarray | pd.Series, ) -> pd.DataFrame: """Compute regression metrics for each battery separately. Returns ------- pd.DataFrame One row per battery, columns = metrics. """ results = [] for bid in np.unique(battery_ids): mask = np.asarray(battery_ids) == bid if mask.sum() < 2: continue m = regression_metrics(y_true[mask], y_pred[mask]) m["battery_id"] = bid m["n_samples"] = int(mask.sum()) results.append(m) return pd.DataFrame(results) def build_summary_table( results: dict[str, dict[str, float]], ) -> pd.DataFrame: """Merge per-model results into one summary table. Parameters ---------- results : dict ``{model_name: {metric: value, ...}, ...}`` Returns ------- pd.DataFrame One row per model. """ rows = [] for name, metrics in results.items(): row = {"model": name} row.update(metrics) rows.append(row) return pd.DataFrame(rows).set_index("model")