singhn9 commited on
Commit
1e28a2d
verified
1 Parent(s): 14df0e9

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +45 -5
src/streamlit_app.py CHANGED
@@ -472,11 +472,51 @@ with tabs[4]:
472
  features = st.multiselect("Model input features (auto-suggested)", numeric_cols, default=suggested)
473
  st.markdown(f"Auto target: `{target}` 路 Suggested family hint: `{model_hint}`")
474
 
475
- max_rows = min(df.shape[0], 20000)
476
- sample_size = st.slider("Sample rows", 500, max_rows, min(1500, max_rows), step=100)
477
- sub_df = df[features + [target]].sample(n=sample_size, random_state=42).reset_index(drop=True)
478
- X = sub_df[features].fillna(0)
479
- y = sub_df[target].fillna(0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
480
 
481
  st.markdown("### Ensemble & AutoML Settings")
482
  max_trials = st.slider("Optuna trials per family", 5, 80, 20, step=5)
 
472
  features = st.multiselect("Model input features (auto-suggested)", numeric_cols, default=suggested)
473
  st.markdown(f"Auto target: `{target}` 路 Suggested family hint: `{model_hint}`")
474
 
475
+ # ---------- SAFE target & X preparation ----------
476
+ # Ensure target is a single column name (string). If it's a list, pick the first and warn.
477
+ if isinstance(target, (list, tuple)):
478
+ st.warning(f"Target provided as list/tuple; using first element `{target[0]}` as target.")
479
+ target = target[0]
480
+
481
+ # Select only valid feature columns
482
+ cols_needed = [c for c in features if c in df.columns]
483
+ if target not in df.columns:
484
+ st.error(f"Target `{target}` not found in dataframe columns.")
485
+ st.stop()
486
+
487
+ # Build sub_df safely
488
+ sub_df = df[cols_needed + [target]].sample(n=sample_size, random_state=42).reset_index(drop=True)
489
+
490
+ # Construct X and y
491
+ X = sub_df[cols_needed].copy()
492
+ y = sub_df[[target]].copy()
493
+
494
+ # Convert y to 1-D Series
495
+ if isinstance(y, pd.DataFrame):
496
+ if y.shape[1] == 1:
497
+ y = y.iloc[:, 0]
498
+ else:
499
+ st.error(f"Multi-output target detected (shape {y.shape}). Select a single target column.")
500
+ st.stop()
501
+
502
+ y = np.ravel(y)
503
+
504
+ # Drop known leak or identifier columns
505
+ leak_cols = ["furnace_temp_next", "pred_temp_30s", "run_timestamp", "timestamp", "batch_id_numeric", "batch_id"]
506
+ for lc in leak_cols:
507
+ if lc in X.columns:
508
+ X.drop(columns=[lc], inplace=True)
509
+
510
+ # Remove constant or near-constant columns
511
+ nunique = X.nunique(dropna=False)
512
+ const_cols = nunique[nunique <= 1].index.tolist()
513
+ if const_cols:
514
+ X.drop(columns=const_cols, inplace=True)
515
+
516
+ if X.shape[1] == 0:
517
+ st.error("No valid feature columns remain after cleaning. Check feature selection.")
518
+ st.stop()
519
+
520
 
521
  st.markdown("### Ensemble & AutoML Settings")
522
  max_trials = st.slider("Optuna trials per family", 5, 80, 20, step=5)