Update src/streamlit_app.py
Browse files- src/streamlit_app.py +47 -7
src/streamlit_app.py
CHANGED
|
@@ -654,19 +654,59 @@ with tabs[4]:
|
|
| 654 |
# evaluate stacked ensemble on a holdout split
|
| 655 |
X_tr, X_val, y_tr, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
|
| 656 |
# predict with base models -> create meta inputs
|
|
|
|
|
|
|
|
|
|
| 657 |
meta_inputs = []
|
|
|
|
| 658 |
for fam in selected:
|
| 659 |
-
bm =
|
| 660 |
-
if bm is
|
| 661 |
-
|
| 662 |
-
|
| 663 |
-
|
| 664 |
-
|
| 665 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 666 |
meta_inputs.append(np.full(len(X_val), y_tr.mean()))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 667 |
X_meta_val = np.column_stack(meta_inputs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 668 |
y_meta_pred = meta.predict(X_meta_val)
|
| 669 |
|
|
|
|
| 670 |
final_r2 = r2_score(y_val, y_meta_pred)
|
| 671 |
final_rmse = mean_squared_error(y_val, y_meta_pred, squared=False)
|
| 672 |
|
|
|
|
| 654 |
# evaluate stacked ensemble on a holdout split
|
| 655 |
X_tr, X_val, y_tr, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
|
| 656 |
# predict with base models -> create meta inputs
|
| 657 |
+
# build a map of family -> fitted model
|
| 658 |
+
base_model_map = {bm["family"]: bm["model"] for bm in base_models}
|
| 659 |
+
|
| 660 |
meta_inputs = []
|
| 661 |
+
missing_families = []
|
| 662 |
for fam in selected:
|
| 663 |
+
bm = base_model_map.get(fam, None)
|
| 664 |
+
if bm is None:
|
| 665 |
+
# missing base model: fill with training mean as safe fallback
|
| 666 |
+
missing_families.append(fam)
|
| 667 |
+
meta_inputs.append(np.full(len(X_val), y_tr.mean()))
|
| 668 |
+
continue
|
| 669 |
+
|
| 670 |
+
try:
|
| 671 |
+
preds = bm.predict(X_val)
|
| 672 |
+
# make sure preds is 1D and correct length
|
| 673 |
+
preds = np.asarray(preds).reshape(-1)
|
| 674 |
+
if len(preds) != len(X_val):
|
| 675 |
+
# fallback to mean if shape mismatch
|
| 676 |
+
preds = np.full(len(X_val), y_tr.mean())
|
| 677 |
+
meta_inputs.append(preds)
|
| 678 |
+
except Exception:
|
| 679 |
+
# fallback to mean predictions on error
|
| 680 |
meta_inputs.append(np.full(len(X_val), y_tr.mean()))
|
| 681 |
+
|
| 682 |
+
if len(missing_families) > 0:
|
| 683 |
+
st.warning(f"Warning: missing base models for families: {missing_families}. Filled with mean predictions.")
|
| 684 |
+
|
| 685 |
+
# Now stack into (n_samples, n_models_selected)
|
| 686 |
X_meta_val = np.column_stack(meta_inputs)
|
| 687 |
+
|
| 688 |
+
# Defensive check: ensure X_meta_val has same number of cols as meta was trained on
|
| 689 |
+
n_meta_features_trained = X_stack.shape[1]
|
| 690 |
+
n_meta_features_val = X_meta_val.shape[1]
|
| 691 |
+
if n_meta_features_val != n_meta_features_trained:
|
| 692 |
+
st.warning(f"Meta feature mismatch: trained on {n_meta_features_trained} cols, validating with {n_meta_features_val} cols. Aligning by padding/truncating.")
|
| 693 |
+
# If fewer cols, pad with columns of means
|
| 694 |
+
if n_meta_features_val < n_meta_features_trained:
|
| 695 |
+
pad_cols = n_meta_features_trained - n_meta_features_val
|
| 696 |
+
pad = np.tile(np.full((len(X_val),1), y_tr.mean()), (1, pad_cols))
|
| 697 |
+
X_meta_val = np.hstack([X_meta_val, pad])
|
| 698 |
+
# If more cols, truncate to the trained size (keeps leftmost selected order)
|
| 699 |
+
else:
|
| 700 |
+
X_meta_val = X_meta_val[:, :n_meta_features_trained]
|
| 701 |
+
|
| 702 |
+
# final safety assert (will raise an informative error if still wrong)
|
| 703 |
+
if X_meta_val.shape[1] != n_meta_features_trained:
|
| 704 |
+
raise ValueError(f"Final X_meta_val columns ({X_meta_val.shape[1]}) != trained meta features ({n_meta_features_trained})")
|
| 705 |
+
|
| 706 |
+
# predict
|
| 707 |
y_meta_pred = meta.predict(X_meta_val)
|
| 708 |
|
| 709 |
+
|
| 710 |
final_r2 = r2_score(y_val, y_meta_pred)
|
| 711 |
final_rmse = mean_squared_error(y_val, y_meta_pred, squared=False)
|
| 712 |
|