VeloBind / src /models /meta.py
ym59's picture
Upload src/models/meta.py with huggingface_hub
02dbc0d verified
# src/models/meta.py
import numpy as np
from sklearn.linear_model import RidgeCV
def fit_meta(oof_matrix: np.ndarray, y_train: np.ndarray,
test_matrix: np.ndarray) -> tuple:
"""RidgeCV meta-learner on OOF predictions."""
meta = RidgeCV(alphas=np.logspace(-3, 3, 50), cv=5)
meta.fit(oof_matrix, y_train)
preds = meta.predict(test_matrix)
print(f" Meta alpha: {meta.alpha_:.4f} "
f"coef range: [{meta.coef_.min():.3f}, {meta.coef_.max():.3f}]")
return meta, preds
# src/models/calibration.py
import numpy as np
from sklearn.isotonic import IsotonicRegression
from sklearn.model_selection import KFold
def fit_isotonic(oof_preds: np.ndarray, y_train: np.ndarray,
test_preds: np.ndarray) -> tuple:
"""
Fits isotonic regression on OOF meta-predictions.
OOF predictions are unbiased — no test leakage.
Includes CV check: if test improves >> CV estimate, flag it.
"""
# CV estimate of benefit
kf = KFold(n_splits=5, shuffle=True, random_state=42)
cv_raw, cv_cal = [], []
for tri, vali in kf.split(oof_preds):
iso = IsotonicRegression(out_of_bounds='clip')
iso.fit(oof_preds[tri], y_train[tri])
p = iso.predict(oof_preds[vali])
cv_raw.append(np.sqrt(np.mean((oof_preds[vali] - y_train[vali])**2)))
cv_cal.append(np.sqrt(np.mean((p - y_train[vali])**2)))
cv_gain = np.mean(cv_raw) - np.mean(cv_cal)
print(f" Isotonic CV RMSE: {np.mean(cv_raw):.4f}{np.mean(cv_cal):.4f} "
f"(gain={cv_gain:+.4f})")
# Fit on full OOF
iso_full = IsotonicRegression(out_of_bounds='clip')
iso_full.fit(oof_preds, y_train)
preds_cal = iso_full.predict(test_preds)
return iso_full, preds_cal