Update src/streamlit_app.py
Browse files- src/streamlit_app.py +45 -5
src/streamlit_app.py
CHANGED
|
@@ -472,11 +472,51 @@ with tabs[4]:
|
|
| 472 |
features = st.multiselect("Model input features (auto-suggested)", numeric_cols, default=suggested)
|
| 473 |
st.markdown(f"Auto target: `{target}` 路 Suggested family hint: `{model_hint}`")
|
| 474 |
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 480 |
|
| 481 |
st.markdown("### Ensemble & AutoML Settings")
|
| 482 |
max_trials = st.slider("Optuna trials per family", 5, 80, 20, step=5)
|
|
|
|
| 472 |
features = st.multiselect("Model input features (auto-suggested)", numeric_cols, default=suggested)
|
| 473 |
st.markdown(f"Auto target: `{target}` 路 Suggested family hint: `{model_hint}`")
|
| 474 |
|
| 475 |
+
# ---------- SAFE target & X preparation ----------
|
| 476 |
+
# Ensure target is a single column name (string). If it's a list, pick the first and warn.
|
| 477 |
+
if isinstance(target, (list, tuple)):
|
| 478 |
+
st.warning(f"Target provided as list/tuple; using first element `{target[0]}` as target.")
|
| 479 |
+
target = target[0]
|
| 480 |
+
|
| 481 |
+
# Select only valid feature columns
|
| 482 |
+
cols_needed = [c for c in features if c in df.columns]
|
| 483 |
+
if target not in df.columns:
|
| 484 |
+
st.error(f"Target `{target}` not found in dataframe columns.")
|
| 485 |
+
st.stop()
|
| 486 |
+
|
| 487 |
+
# Build sub_df safely
|
| 488 |
+
sub_df = df[cols_needed + [target]].sample(n=sample_size, random_state=42).reset_index(drop=True)
|
| 489 |
+
|
| 490 |
+
# Construct X and y
|
| 491 |
+
X = sub_df[cols_needed].copy()
|
| 492 |
+
y = sub_df[[target]].copy()
|
| 493 |
+
|
| 494 |
+
# Convert y to 1-D Series
|
| 495 |
+
if isinstance(y, pd.DataFrame):
|
| 496 |
+
if y.shape[1] == 1:
|
| 497 |
+
y = y.iloc[:, 0]
|
| 498 |
+
else:
|
| 499 |
+
st.error(f"Multi-output target detected (shape {y.shape}). Select a single target column.")
|
| 500 |
+
st.stop()
|
| 501 |
+
|
| 502 |
+
y = np.ravel(y)
|
| 503 |
+
|
| 504 |
+
# Drop known leak or identifier columns
|
| 505 |
+
leak_cols = ["furnace_temp_next", "pred_temp_30s", "run_timestamp", "timestamp", "batch_id_numeric", "batch_id"]
|
| 506 |
+
for lc in leak_cols:
|
| 507 |
+
if lc in X.columns:
|
| 508 |
+
X.drop(columns=[lc], inplace=True)
|
| 509 |
+
|
| 510 |
+
# Remove constant or near-constant columns
|
| 511 |
+
nunique = X.nunique(dropna=False)
|
| 512 |
+
const_cols = nunique[nunique <= 1].index.tolist()
|
| 513 |
+
if const_cols:
|
| 514 |
+
X.drop(columns=const_cols, inplace=True)
|
| 515 |
+
|
| 516 |
+
if X.shape[1] == 0:
|
| 517 |
+
st.error("No valid feature columns remain after cleaning. Check feature selection.")
|
| 518 |
+
st.stop()
|
| 519 |
+
|
| 520 |
|
| 521 |
st.markdown("### Ensemble & AutoML Settings")
|
| 522 |
max_trials = st.slider("Optuna trials per family", 5, 80, 20, step=5)
|