Update src/streamlit_app.py
Browse files- src/streamlit_app.py +7 -2
src/streamlit_app.py
CHANGED
|
@@ -471,6 +471,9 @@ with tabs[4]:
|
|
| 471 |
|
| 472 |
features = st.multiselect("Model input features (auto-suggested)", numeric_cols, default=suggested)
|
| 473 |
st.markdown(f"Auto target: `{target}` 路 Suggested family hint: `{model_hint}`")
|
|
|
|
|
|
|
|
|
|
| 474 |
|
| 475 |
# ---------- SAFE target & X preparation ----------
|
| 476 |
# Ensure target is a single column name (string). If it's a list, pick the first and warn.
|
|
@@ -499,7 +502,8 @@ with tabs[4]:
|
|
| 499 |
st.error(f"Multi-output target detected (shape {y.shape}). Select a single target column.")
|
| 500 |
st.stop()
|
| 501 |
|
| 502 |
-
y = np.ravel(y)
|
|
|
|
| 503 |
|
| 504 |
# Drop known leak or identifier columns
|
| 505 |
leak_cols = ["furnace_temp_next", "pred_temp_30s", "run_timestamp", "timestamp", "batch_id_numeric", "batch_id"]
|
|
@@ -642,7 +646,8 @@ with tabs[4]:
|
|
| 642 |
oof = np.zeros(X_sel.shape[0])
|
| 643 |
for tr_idx, val_idx in kf.split(X_sel):
|
| 644 |
X_tr, X_val = X_sel.iloc[tr_idx], X_sel.iloc[val_idx]
|
| 645 |
-
y_tr = y.iloc[tr_idx]
|
|
|
|
| 646 |
try:
|
| 647 |
model_obj.fit(X_tr, y_tr)
|
| 648 |
preds = model_obj.predict(X_val)
|
|
|
|
| 471 |
|
| 472 |
features = st.multiselect("Model input features (auto-suggested)", numeric_cols, default=suggested)
|
| 473 |
st.markdown(f"Auto target: `{target}` 路 Suggested family hint: `{model_hint}`")
|
| 474 |
+
# --- Sampling configuration ---
|
| 475 |
+
max_rows = min(df.shape[0], 20000)
|
| 476 |
+
sample_size = st.slider("Sample rows", 500, max_rows, min(1500, max_rows), step=100)
|
| 477 |
|
| 478 |
# ---------- SAFE target & X preparation ----------
|
| 479 |
# Ensure target is a single column name (string). If it's a list, pick the first and warn.
|
|
|
|
| 502 |
st.error(f"Multi-output target detected (shape {y.shape}). Select a single target column.")
|
| 503 |
st.stop()
|
| 504 |
|
| 505 |
+
y = pd.Series(np.ravel(y), name=target)
|
| 506 |
+
|
| 507 |
|
| 508 |
# Drop known leak or identifier columns
|
| 509 |
leak_cols = ["furnace_temp_next", "pred_temp_30s", "run_timestamp", "timestamp", "batch_id_numeric", "batch_id"]
|
|
|
|
| 646 |
oof = np.zeros(X_sel.shape[0])
|
| 647 |
for tr_idx, val_idx in kf.split(X_sel):
|
| 648 |
X_tr, X_val = X_sel.iloc[tr_idx], X_sel.iloc[val_idx]
|
| 649 |
+
y_tr = y[tr_idx] if not hasattr(y, "iloc") else y.iloc[tr_idx]
|
| 650 |
+
|
| 651 |
try:
|
| 652 |
model_obj.fit(X_tr, y_tr)
|
| 653 |
preds = model_obj.predict(X_val)
|