Spaces:
Sleeping
Sleeping
File size: 3,537 Bytes
1e5b98a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
# src/cross_validation.py
from typing import Dict, List, Tuple
import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score, f1_score, precision_score, recall_score, accuracy_score
from sklearn.model_selection import StratifiedKFold
from sklearn.pipeline import Pipeline
def perform_cross_validation(
model: Pipeline,
X: pd.DataFrame,
y: np.ndarray,
n_splits: int = 5,
random_state: int = 42,
threshold: float = 0.5,
) -> Dict[str, Dict[str, float]]:
"""
Perform k-fold stratified cross-validation and return aggregated metrics.
Args:
model: Scikit-learn pipeline or model
X: Feature matrix
y: Target array
n_splits: Number of folds for cross-validation
random_state: Random state for reproducibility
threshold: Probability threshold for binary classification
Returns:
Dictionary with mean and std metrics across folds
"""
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=random_state)
# Store metrics for each fold
fold_metrics = {
"auc": [],
"f1": [],
"precision": [],
"recall": [],
"accuracy": [],
}
print(f"\n{'='*60}")
print(f"Performing {n_splits}-Fold Stratified Cross-Validation")
print(f"{'='*60}\n")
for fold_idx, (train_idx, val_idx) in enumerate(skf.split(X, y), 1):
X_train_fold, X_val_fold = X.iloc[train_idx], X.iloc[val_idx]
y_train_fold, y_val_fold = y[train_idx], y[val_idx]
# Train model on fold
model.fit(X_train_fold, y_train_fold)
# Predict on validation fold
y_proba_fold = model.predict_proba(X_val_fold)[:, 1]
y_pred_fold = (y_proba_fold >= threshold).astype(int)
# Calculate metrics
auc = roc_auc_score(y_val_fold, y_proba_fold)
f1 = f1_score(y_val_fold, y_pred_fold)
precision = precision_score(y_val_fold, y_pred_fold)
recall = recall_score(y_val_fold, y_pred_fold)
accuracy = accuracy_score(y_val_fold, y_pred_fold)
# Store metrics
fold_metrics["auc"].append(auc)
fold_metrics["f1"].append(f1)
fold_metrics["precision"].append(precision)
fold_metrics["recall"].append(recall)
fold_metrics["accuracy"].append(accuracy)
print(f"Fold {fold_idx}/{n_splits}:")
print(f" AUC: {auc:.4f} | F1: {f1:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f} | Accuracy: {accuracy:.4f}")
# Calculate mean and std across folds
cv_results = {}
for metric_name, values in fold_metrics.items():
cv_results[metric_name] = {
"mean": float(np.mean(values)),
"std": float(np.std(values)),
"values": [float(v) for v in values],
}
print(f"\n{'='*60}")
print("Cross-Validation Results (Mean ± Std)")
print(f"{'='*60}")
for metric_name, stats in cv_results.items():
print(f"{metric_name.upper():12s}: {stats['mean']:.4f} ± {stats['std']:.4f}")
print(f"{'='*60}\n")
return cv_results
def get_cv_summary(cv_results: Dict[str, Dict[str, float]]) -> Dict[str, float]:
"""
Extract mean metrics from CV results for logging.
Args:
cv_results: Results from perform_cross_validation
Returns:
Dictionary with mean metrics only
"""
return {f"cv_{metric}_mean": stats["mean"] for metric, stats in cv_results.items()}
|