Spaces:
Running
Running
| """ | |
| Evaluation Metrics | |
| ================== | |
| Comprehensive metrics for classification and regression tasks. | |
| Author: UW MSIM Team | |
| Date: November 2025 | |
| """ | |
| import numpy as np | |
| from sklearn.metrics import ( | |
| roc_auc_score, accuracy_score, f1_score, precision_score, recall_score, | |
| r2_score, mean_squared_error, mean_absolute_error, log_loss | |
| ) | |
| from typing import Dict, Optional | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| def calculate_classification_metrics( | |
| y_true: np.ndarray, | |
| y_pred: np.ndarray, | |
| y_proba: Optional[np.ndarray] = None | |
| ) -> Dict[str, float]: | |
| """ | |
| Calculate all classification metrics. | |
| Parameters | |
| ---------- | |
| y_true : np.ndarray | |
| True labels | |
| y_pred : np.ndarray | |
| Predicted labels | |
| y_proba : np.ndarray, optional | |
| Predicted probabilities (n_samples, n_classes) | |
| Returns | |
| ------- | |
| metrics : dict | |
| Dictionary of metric names and values | |
| """ | |
| metrics = { | |
| 'accuracy': accuracy_score(y_true, y_pred), | |
| 'f1_macro': f1_score(y_true, y_pred, average='macro', zero_division=0), | |
| 'f1_weighted': f1_score(y_true, y_pred, average='weighted', zero_division=0), | |
| 'precision_macro': precision_score(y_true, y_pred, average='macro', zero_division=0), | |
| 'recall_macro': recall_score(y_true, y_pred, average='macro', zero_division=0) | |
| } | |
| # ROC-AUC (if probabilities available) | |
| if y_proba is not None: | |
| try: | |
| n_classes = len(np.unique(y_true)) | |
| if n_classes == 2: | |
| # Binary classification | |
| metrics['roc_auc'] = roc_auc_score(y_true, y_proba[:, 1]) | |
| else: | |
| # Multi-class classification | |
| metrics['roc_auc'] = roc_auc_score( | |
| y_true, y_proba, | |
| multi_class='ovr', | |
| average='macro' | |
| ) | |
| # Log loss | |
| metrics['log_loss'] = log_loss(y_true, y_proba) | |
| except Exception as e: | |
| logger.warning(f"ROC-AUC calculation failed: {e}") | |
| metrics['roc_auc'] = np.nan | |
| metrics['log_loss'] = np.nan | |
| return metrics | |
| def calculate_regression_metrics( | |
| y_true: np.ndarray, | |
| y_pred: np.ndarray | |
| ) -> Dict[str, float]: | |
| """ | |
| Calculate all regression metrics. | |
| Parameters | |
| ---------- | |
| y_true : np.ndarray | |
| True values | |
| y_pred : np.ndarray | |
| Predicted values | |
| Returns | |
| ------- | |
| metrics : dict | |
| Dictionary of metric names and values | |
| """ | |
| metrics = { | |
| 'r2': r2_score(y_true, y_pred), | |
| 'rmse': np.sqrt(mean_squared_error(y_true, y_pred)), | |
| 'mae': mean_absolute_error(y_true, y_pred), | |
| 'mse': mean_squared_error(y_true, y_pred) | |
| } | |
| # MAPE (avoid division by zero) | |
| try: | |
| non_zero_mask = y_true != 0 | |
| if np.any(non_zero_mask): | |
| mape = np.mean(np.abs((y_true[non_zero_mask] - y_pred[non_zero_mask]) / y_true[non_zero_mask])) * 100 | |
| metrics['mape'] = mape | |
| else: | |
| metrics['mape'] = np.nan | |
| except: | |
| metrics['mape'] = np.nan | |
| return metrics | |