File size: 1,771 Bytes
02dbc0d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | # src/models/meta.py
import numpy as np
from sklearn.linear_model import RidgeCV
def fit_meta(oof_matrix: np.ndarray, y_train: np.ndarray,
test_matrix: np.ndarray) -> tuple:
"""RidgeCV meta-learner on OOF predictions."""
meta = RidgeCV(alphas=np.logspace(-3, 3, 50), cv=5)
meta.fit(oof_matrix, y_train)
preds = meta.predict(test_matrix)
print(f" Meta alpha: {meta.alpha_:.4f} "
f"coef range: [{meta.coef_.min():.3f}, {meta.coef_.max():.3f}]")
return meta, preds
# src/models/calibration.py
import numpy as np
from sklearn.isotonic import IsotonicRegression
from sklearn.model_selection import KFold
def fit_isotonic(oof_preds: np.ndarray, y_train: np.ndarray,
test_preds: np.ndarray) -> tuple:
"""
Fits isotonic regression on OOF meta-predictions.
OOF predictions are unbiased — no test leakage.
Includes CV check: if test improves >> CV estimate, flag it.
"""
# CV estimate of benefit
kf = KFold(n_splits=5, shuffle=True, random_state=42)
cv_raw, cv_cal = [], []
for tri, vali in kf.split(oof_preds):
iso = IsotonicRegression(out_of_bounds='clip')
iso.fit(oof_preds[tri], y_train[tri])
p = iso.predict(oof_preds[vali])
cv_raw.append(np.sqrt(np.mean((oof_preds[vali] - y_train[vali])**2)))
cv_cal.append(np.sqrt(np.mean((p - y_train[vali])**2)))
cv_gain = np.mean(cv_raw) - np.mean(cv_cal)
print(f" Isotonic CV RMSE: {np.mean(cv_raw):.4f} → {np.mean(cv_cal):.4f} "
f"(gain={cv_gain:+.4f})")
# Fit on full OOF
iso_full = IsotonicRegression(out_of_bounds='clip')
iso_full.fit(oof_preds, y_train)
preds_cal = iso_full.predict(test_preds)
return iso_full, preds_cal
|