Spaces:

singhn9
/

SteelAI_Module2_EAF_Intelligence_Explorer

Sleeping

App Files Files Community

singhn9 commited on Nov 9, 2025

Commit

1e28a2d

verified ·

1 Parent(s): 14df0e9

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +45 -5

src/streamlit_app.py CHANGED Viewed

@@ -472,11 +472,51 @@ with tabs[4]:
     features = st.multiselect("Model input features (auto-suggested)", numeric_cols, default=suggested)
     st.markdown(f"Auto target: `{target}` · Suggested family hint: `{model_hint}`")
-    max_rows = min(df.shape[0], 20000)
-    sample_size = st.slider("Sample rows", 500, max_rows, min(1500, max_rows), step=100)
-    sub_df = df[features + [target]].sample(n=sample_size, random_state=42).reset_index(drop=True)
-    X = sub_df[features].fillna(0)
-    y = sub_df[target].fillna(0)
     st.markdown("### Ensemble & AutoML Settings")
     max_trials = st.slider("Optuna trials per family", 5, 80, 20, step=5)

     features = st.multiselect("Model input features (auto-suggested)", numeric_cols, default=suggested)
     st.markdown(f"Auto target: `{target}` · Suggested family hint: `{model_hint}`")
+    # ---------- SAFE target & X preparation ----------
+    # Ensure target is a single column name (string). If it's a list, pick the first and warn.
+    if isinstance(target, (list, tuple)):
+        st.warning(f"Target provided as list/tuple; using first element `{target[0]}` as target.")
+        target = target[0]
+    # Select only valid feature columns
+    cols_needed = [c for c in features if c in df.columns]
+    if target not in df.columns:
+        st.error(f"Target `{target}` not found in dataframe columns.")
+        st.stop()
+    # Build sub_df safely
+    sub_df = df[cols_needed + [target]].sample(n=sample_size, random_state=42).reset_index(drop=True)
+    # Construct X and y
+    X = sub_df[cols_needed].copy()
+    y = sub_df[[target]].copy()
+    # Convert y to 1-D Series
+    if isinstance(y, pd.DataFrame):
+        if y.shape[1] == 1:
+            y = y.iloc[:, 0]
+        else:
+            st.error(f"Multi-output target detected (shape {y.shape}). Select a single target column.")
+            st.stop()
+    y = np.ravel(y)
+    # Drop known leak or identifier columns
+    leak_cols = ["furnace_temp_next", "pred_temp_30s", "run_timestamp", "timestamp", "batch_id_numeric", "batch_id"]
+    for lc in leak_cols:
+        if lc in X.columns:
+            X.drop(columns=[lc], inplace=True)
+    # Remove constant or near-constant columns
+    nunique = X.nunique(dropna=False)
+    const_cols = nunique[nunique <= 1].index.tolist()
+    if const_cols:
+        X.drop(columns=const_cols, inplace=True)
+    if X.shape[1] == 0:
+        st.error("No valid feature columns remain after cleaning. Check feature selection.")
+        st.stop()
     st.markdown("### Ensemble & AutoML Settings")
     max_trials = st.slider("Optuna trials per family", 5, 80, 20, step=5)