Spaces:

singhn9
/

SteelAI_Module2_EAF_Intelligence_Explorer

Sleeping

App Files Files Community

singhn9 commited on Nov 10, 2025

Commit

a2bb6e5

verified ·

1 Parent(s): a838781

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +21 -18

src/streamlit_app.py CHANGED Viewed

@@ -770,29 +770,32 @@ with tabs[4]:
                 if top_base and hasattr(top_base["model"], "predict"):
                     # --- Ensure numeric dtypes for SHAP ---
                     sample_X = X_val.sample(min(300, len(X_val)), random_state=42).copy()
                     for col in sample_X.columns:
-                        if sample_X[col].dtype == object:
-                            # Clean any bracketed, comma, or sci-notation strings
-                            sample_X[col] = (
-                                sample_X[col]
-                                .astype(str)
-                                .str.replace("[", "", regex=False)
-                                .str.replace("]", "", regex=False)
-                                .str.replace(",", "", regex=False)
-                                .str.replace("E", "e", regex=False)
-                                .str.replace("nan", "0", regex=False)
-                                .str.strip()
-                            )
-                        # Force numeric conversion for all columns
-                        sample_X[col] = pd.to_numeric(sample_X[col], errors="coerce")
-                    # Replace NaN with 0 for SHAP stability
-                    sample_X = sample_X.fillna(0)
-                    # Optional: show columns that were coerced
                     non_numeric_cols = [c for c in sample_X.columns if not np.issubdtype(sample_X[c].dtype, np.number)]
                     if non_numeric_cols:
-                        st.warning(f"Non-numeric columns coerced: {non_numeric_cols}")
                     # --- SHAP computation ---

                 if top_base and hasattr(top_base["model"], "predict"):
                     # --- Ensure numeric dtypes for SHAP ---
                     sample_X = X_val.sample(min(300, len(X_val)), random_state=42).copy()
+                    def _clean_to_float(x):
+                        """Safely convert any numeric-looking string (even '[1.55E3]') to float."""
+                        if isinstance(x, (int, float, np.floating)):
+                            return float(x)
+                        try:
+                            x_str = str(x).replace("[", "").replace("]", "").replace(",", "").strip()
+                            # handle common non-numeric tokens
+                            if x_str.lower() in ("nan", "none", "", "null", "na", "n/a"):
+                                return 0.0
+                            return float(x_str.replace("E", "e"))
+                        except Exception:
+                            return 0.0
+                    # Apply cleaning to every column
                     for col in sample_X.columns:
+                        sample_X[col] = sample_X[col].map(_clean_to_float)
+                    # Verify numeric dtype and replace NaN
+                    sample_X = sample_X.apply(pd.to_numeric, errors="coerce").fillna(0)
+                    # Optional diagnostic
                     non_numeric_cols = [c for c in sample_X.columns if not np.issubdtype(sample_X[c].dtype, np.number)]
                     if non_numeric_cols:
+                        st.warning(f"Cleaned {len(non_numeric_cols)} potential non-numeric columns: {non_numeric_cols}")
                     # --- SHAP computation ---