Spaces:

singhn9
/

SteelAI_Module2_EAF_Intelligence_Explorer

Sleeping

App Files Files Community

singhn9 commited on Nov 9, 2025

Commit

85836c8

verified ·

1 Parent(s): d49d8b0

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +53 -27

src/streamlit_app.py CHANGED Viewed

@@ -320,6 +320,8 @@ def load_data(csv_path=CSV_PATH, meta_path=META_PATH):
     return df_local, pd.DataFrame(meta_local)
 df, meta_df = load_data()
 # -------------------------
 # Sidebar filters & UI
 # -------------------------
@@ -483,37 +485,61 @@ with tabs[4]:
     # Select only valid feature columns
     cols_needed = [c for c in features if c in df.columns]
-    # --- Build sub_df safely (force exact column match) ---
-    if target not in df.columns:
-        # try case-insensitive or partial fallback once
-        matches = [c for c in df.columns if target.lower() in c.lower()]
-        if len(matches) == 1:
-            target = matches[0]
-            st.info(f"Auto-corrected target to exact match: `{target}`")
-        elif len(matches) > 1:
-            st.warning(f"Multiple columns match '{target}': {matches}. Using first: {matches[0]}")
-            target = matches[0]
         else:
-            st.error(f"Target `{target}` not found in dataframe columns.")
-            st.stop()
-    # Now build sub_df strictly
-    sub_df = df.loc[:, cols_needed + [target]].sample(n=sample_size, random_state=42).reset_index(drop=True)
     # Construct X and y
-    X = sub_df[cols_needed].copy()
-    y = sub_df[[target]].copy()
-    # Convert y to 1-D Series
-    if isinstance(y, pd.DataFrame):
-        if y.shape[1] == 1:
-            y = y.iloc[:, 0]
-        else:
-            st.error(f"Multi-output target detected (shape {y.shape}). Select a single target column.")
-            st.stop()
-    y = pd.Series(np.ravel(y), name=target)
     # Drop known leak or identifier columns

     return df_local, pd.DataFrame(meta_local)
 df, meta_df = load_data()
+df = df.loc[:, ~df.columns.duplicated()]
 # -------------------------
 # Sidebar filters & UI
 # -------------------------
     # Select only valid feature columns
     cols_needed = [c for c in features if c in df.columns]
+    # Match exact name first
+    if isinstance(target, (list, tuple)):
+        st.warning(f"Target provided as list/tuple; using first element `{target[0]}` as target.")
+        target = target[0]
+    # Select only valid feature columns
+    cols_needed = [c for c in features if c in df.columns]
+    # --- Force single exact target column ---
+    if target in df.columns:
+        target_col = target
+    else:
+        # Case-insensitive exact match
+        matches = [c for c in df.columns if c.lower() == target.lower()]
+        if matches:
+            target_col = matches[0]
+            st.info(f"Auto-corrected to exact match: `{target_col}`")
         else:
+            # Partial substring match (e.g., 'furnace_temp' vs 'furnace_temp_next')
+            matches = [c for c in df.columns if target.lower() in c.lower()]
+            if len(matches) == 1:
+                target_col = matches[0]
+                st.info(f"Auto-corrected to closest match: `{target_col}`")
+            elif len(matches) > 1:
+                # Prefer '_temp', '_ratio', or exact substring equality
+                preferred = [m for m in matches if m.endswith("_temp") or m.endswith("_ratio") or m == target]
+                if preferred:
+                    target_col = preferred[0]
+                    st.warning(f"Multiple matches found {matches}. Using `{target_col}`.")
+                else:
+                    target_col = matches[0]
+                    st.warning(f"Multiple matches found {matches}. Using first: `{target_col}`.")
+            else:
+                st.error(f"Target `{target}` not found in dataframe columns.")
+                st.stop()
+    # --- Build sub_df safely — ensure unique and valid target ---
+    valid_features = [c for c in cols_needed if c in df.columns and c != target_col]
+    if not valid_features:
+        st.error("No valid feature columns remain after cleaning. Check feature selection.")
+        st.stop()
+    sub_df = df.loc[:, valid_features + [target_col]].copy()
+    sub_df = sub_df.sample(n=sample_size, random_state=42).reset_index(drop=True)
+    # --- Construct clean X and y ---
+    X = sub_df.drop(columns=[target_col])
+    y = pd.Series(np.ravel(sub_df[target_col]), name=target_col)
     # Construct X and y
+    X = sub_df.drop(columns=[target_col])
+    y = sub_df[target_col]
+    y = pd.Series(np.ravel(y), name=target_col)
     # Drop known leak or identifier columns