Upload src/models/meta.py with huggingface_hub
Browse files- src/models/meta.py +49 -0
src/models/meta.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# src/models/meta.py
|
| 2 |
+
import numpy as np
|
| 3 |
+
from sklearn.linear_model import RidgeCV
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def fit_meta(oof_matrix: np.ndarray, y_train: np.ndarray,
|
| 7 |
+
test_matrix: np.ndarray) -> tuple:
|
| 8 |
+
"""RidgeCV meta-learner on OOF predictions."""
|
| 9 |
+
meta = RidgeCV(alphas=np.logspace(-3, 3, 50), cv=5)
|
| 10 |
+
meta.fit(oof_matrix, y_train)
|
| 11 |
+
preds = meta.predict(test_matrix)
|
| 12 |
+
print(f" Meta alpha: {meta.alpha_:.4f} "
|
| 13 |
+
f"coef range: [{meta.coef_.min():.3f}, {meta.coef_.max():.3f}]")
|
| 14 |
+
return meta, preds
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
# src/models/calibration.py
|
| 18 |
+
import numpy as np
|
| 19 |
+
from sklearn.isotonic import IsotonicRegression
|
| 20 |
+
from sklearn.model_selection import KFold
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def fit_isotonic(oof_preds: np.ndarray, y_train: np.ndarray,
|
| 24 |
+
test_preds: np.ndarray) -> tuple:
|
| 25 |
+
"""
|
| 26 |
+
Fits isotonic regression on OOF meta-predictions.
|
| 27 |
+
OOF predictions are unbiased — no test leakage.
|
| 28 |
+
Includes CV check: if test improves >> CV estimate, flag it.
|
| 29 |
+
"""
|
| 30 |
+
# CV estimate of benefit
|
| 31 |
+
kf = KFold(n_splits=5, shuffle=True, random_state=42)
|
| 32 |
+
cv_raw, cv_cal = [], []
|
| 33 |
+
for tri, vali in kf.split(oof_preds):
|
| 34 |
+
iso = IsotonicRegression(out_of_bounds='clip')
|
| 35 |
+
iso.fit(oof_preds[tri], y_train[tri])
|
| 36 |
+
p = iso.predict(oof_preds[vali])
|
| 37 |
+
cv_raw.append(np.sqrt(np.mean((oof_preds[vali] - y_train[vali])**2)))
|
| 38 |
+
cv_cal.append(np.sqrt(np.mean((p - y_train[vali])**2)))
|
| 39 |
+
|
| 40 |
+
cv_gain = np.mean(cv_raw) - np.mean(cv_cal)
|
| 41 |
+
print(f" Isotonic CV RMSE: {np.mean(cv_raw):.4f} → {np.mean(cv_cal):.4f} "
|
| 42 |
+
f"(gain={cv_gain:+.4f})")
|
| 43 |
+
|
| 44 |
+
# Fit on full OOF
|
| 45 |
+
iso_full = IsotonicRegression(out_of_bounds='clip')
|
| 46 |
+
iso_full.fit(oof_preds, y_train)
|
| 47 |
+
preds_cal = iso_full.predict(test_preds)
|
| 48 |
+
|
| 49 |
+
return iso_full, preds_cal
|