Update src/streamlit_app.py
Browse files- src/streamlit_app.py +21 -18
src/streamlit_app.py
CHANGED
|
@@ -770,29 +770,32 @@ with tabs[4]:
|
|
| 770 |
if top_base and hasattr(top_base["model"], "predict"):
|
| 771 |
# --- Ensure numeric dtypes for SHAP ---
|
| 772 |
sample_X = X_val.sample(min(300, len(X_val)), random_state=42).copy()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 773 |
for col in sample_X.columns:
|
| 774 |
-
|
| 775 |
-
# Clean any bracketed, comma, or sci-notation strings
|
| 776 |
-
sample_X[col] = (
|
| 777 |
-
sample_X[col]
|
| 778 |
-
.astype(str)
|
| 779 |
-
.str.replace("[", "", regex=False)
|
| 780 |
-
.str.replace("]", "", regex=False)
|
| 781 |
-
.str.replace(",", "", regex=False)
|
| 782 |
-
.str.replace("E", "e", regex=False)
|
| 783 |
-
.str.replace("nan", "0", regex=False)
|
| 784 |
-
.str.strip()
|
| 785 |
-
)
|
| 786 |
-
# Force numeric conversion for all columns
|
| 787 |
-
sample_X[col] = pd.to_numeric(sample_X[col], errors="coerce")
|
| 788 |
|
| 789 |
-
#
|
| 790 |
-
sample_X = sample_X.fillna(0)
|
| 791 |
|
| 792 |
-
# Optional
|
| 793 |
non_numeric_cols = [c for c in sample_X.columns if not np.issubdtype(sample_X[c].dtype, np.number)]
|
| 794 |
if non_numeric_cols:
|
| 795 |
-
st.warning(f"
|
|
|
|
| 796 |
|
| 797 |
|
| 798 |
# --- SHAP computation ---
|
|
|
|
| 770 |
if top_base and hasattr(top_base["model"], "predict"):
|
| 771 |
# --- Ensure numeric dtypes for SHAP ---
|
| 772 |
sample_X = X_val.sample(min(300, len(X_val)), random_state=42).copy()
|
| 773 |
+
|
| 774 |
+
def _clean_to_float(x):
|
| 775 |
+
"""Safely convert any numeric-looking string (even '[1.55E3]') to float."""
|
| 776 |
+
if isinstance(x, (int, float, np.floating)):
|
| 777 |
+
return float(x)
|
| 778 |
+
try:
|
| 779 |
+
x_str = str(x).replace("[", "").replace("]", "").replace(",", "").strip()
|
| 780 |
+
# handle common non-numeric tokens
|
| 781 |
+
if x_str.lower() in ("nan", "none", "", "null", "na", "n/a"):
|
| 782 |
+
return 0.0
|
| 783 |
+
return float(x_str.replace("E", "e"))
|
| 784 |
+
except Exception:
|
| 785 |
+
return 0.0
|
| 786 |
+
|
| 787 |
+
# Apply cleaning to every column
|
| 788 |
for col in sample_X.columns:
|
| 789 |
+
sample_X[col] = sample_X[col].map(_clean_to_float)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 790 |
|
| 791 |
+
# Verify numeric dtype and replace NaN
|
| 792 |
+
sample_X = sample_X.apply(pd.to_numeric, errors="coerce").fillna(0)
|
| 793 |
|
| 794 |
+
# Optional diagnostic
|
| 795 |
non_numeric_cols = [c for c in sample_X.columns if not np.issubdtype(sample_X[c].dtype, np.number)]
|
| 796 |
if non_numeric_cols:
|
| 797 |
+
st.warning(f"Cleaned {len(non_numeric_cols)} potential non-numeric columns: {non_numeric_cols}")
|
| 798 |
+
|
| 799 |
|
| 800 |
|
| 801 |
# --- SHAP computation ---
|