SignalMod / src /evaluation /stable_cv.py
Mirae Kang
feat: implement new models and improve UI, #23
46cc63a
"""
Stratified K-fold cross-validation for the stable production pipeline.
"""
from __future__ import annotations
from typing import Any, Callable
import numpy as np
import pandas as pd
from sklearn.metrics import f1_score, roc_auc_score
from sklearn.model_selection import StratifiedKFold
from src.utils.logger import get_logger
logger = get_logger(__name__)
def stratified_kfold_cv(
X: pd.Series,
y: pd.Series,
*,
n_splits: int,
random_state: int,
fit_predict_fn: Callable[[pd.Series, pd.Series, pd.Series, pd.Series], dict[str, float]],
) -> dict[str, Any]:
"""
Run stratified K-fold CV with a caller-provided fit/eval hook.
``fit_predict_fn`` receives (X_tr, y_tr, X_val, y_val) and returns
per-fold metrics including at least ``f1_weighted``.
"""
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=random_state)
fold_metrics: list[dict[str, float]] = []
for fold, (tr_idx, val_idx) in enumerate(skf.split(X, y)):
X_tr = X.iloc[tr_idx].reset_index(drop=True)
y_tr = y.iloc[tr_idx].reset_index(drop=True)
X_val = X.iloc[val_idx].reset_index(drop=True)
y_val = y.iloc[val_idx].reset_index(drop=True)
m = fit_predict_fn(X_tr, y_tr, X_val, y_val)
m["fold"] = fold
fold_metrics.append(m)
logger.info(
f"CV fold {fold + 1}/{n_splits} — F1={m['f1_weighted']:.4f} "
f"gap={m.get('train_val_gap', 0):.4f}"
)
f1s = [m["f1_weighted"] for m in fold_metrics]
gaps = [m.get("train_val_gap", 0.0) for m in fold_metrics]
rocs = [m.get("roc_auc", np.nan) for m in fold_metrics]
summary = {
"n_splits": n_splits,
"f1_mean": round(float(np.mean(f1s)), 4),
"f1_std": round(float(np.std(f1s)), 4),
"f1_min": round(float(np.min(f1s)), 4),
"f1_max": round(float(np.max(f1s)), 4),
"gap_mean": round(float(np.mean(gaps)), 4),
"gap_std": round(float(np.std(gaps)), 4),
"gap_max": round(float(np.max(gaps)), 4),
"roc_auc_mean": round(float(np.nanmean(rocs)), 4),
"folds": fold_metrics,
}
stable = summary["f1_std"] < 0.05 and summary["gap_max"] < 0.05
summary["stable_across_folds"] = stable
return summary
def evaluate_lr_fold(
lr_model_factory,
X_tr: pd.Series,
y_tr: pd.Series,
X_val: pd.Series,
y_val: pd.Series,
*,
augment_fn=None,
cfg: dict | None = None,
seed: int = 42,
) -> dict[str, float]:
"""Fit LR on (optionally augmented) fold train; score fold val."""
if augment_fn and cfg is not None:
X_fit, y_fit = augment_fn(X_tr, y_tr, cfg, seed=seed)
else:
X_fit, y_fit = X_tr, y_tr
model = lr_model_factory()
model.fit(X_fit, y_fit)
y_val_arr = y_val.astype(int).values
preds_val = model.predict(X_val)
preds_train = model.predict(X_fit)
probs_val = model.predict_proba(X_val)[:, 1]
f1_val = float(f1_score(y_val_arr, preds_val, average="weighted", zero_division=0))
f1_train = float(
f1_score(y_fit.astype(int), preds_train, average="weighted", zero_division=0)
)
gap = abs(f1_train - f1_val)
return {
"f1_weighted": f1_val,
"f1_train": f1_train,
"train_val_gap": round(gap, 4),
"train_val_gap_pp": round(gap * 100, 2),
"roc_auc": round(float(roc_auc_score(y_val_arr, probs_val)), 4),
}