singhn9 commited on
Commit
a2bb6e5
·
verified ·
1 Parent(s): a838781

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +21 -18
src/streamlit_app.py CHANGED
@@ -770,29 +770,32 @@ with tabs[4]:
770
  if top_base and hasattr(top_base["model"], "predict"):
771
  # --- Ensure numeric dtypes for SHAP ---
772
  sample_X = X_val.sample(min(300, len(X_val)), random_state=42).copy()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
773
  for col in sample_X.columns:
774
- if sample_X[col].dtype == object:
775
- # Clean any bracketed, comma, or sci-notation strings
776
- sample_X[col] = (
777
- sample_X[col]
778
- .astype(str)
779
- .str.replace("[", "", regex=False)
780
- .str.replace("]", "", regex=False)
781
- .str.replace(",", "", regex=False)
782
- .str.replace("E", "e", regex=False)
783
- .str.replace("nan", "0", regex=False)
784
- .str.strip()
785
- )
786
- # Force numeric conversion for all columns
787
- sample_X[col] = pd.to_numeric(sample_X[col], errors="coerce")
788
 
789
- # Replace NaN with 0 for SHAP stability
790
- sample_X = sample_X.fillna(0)
791
 
792
- # Optional: show columns that were coerced
793
  non_numeric_cols = [c for c in sample_X.columns if not np.issubdtype(sample_X[c].dtype, np.number)]
794
  if non_numeric_cols:
795
- st.warning(f"Non-numeric columns coerced: {non_numeric_cols}")
 
796
 
797
 
798
  # --- SHAP computation ---
 
770
  if top_base and hasattr(top_base["model"], "predict"):
771
  # --- Ensure numeric dtypes for SHAP ---
772
  sample_X = X_val.sample(min(300, len(X_val)), random_state=42).copy()
773
+
774
+ def _clean_to_float(x):
775
+ """Safely convert any numeric-looking string (even '[1.55E3]') to float."""
776
+ if isinstance(x, (int, float, np.floating)):
777
+ return float(x)
778
+ try:
779
+ x_str = str(x).replace("[", "").replace("]", "").replace(",", "").strip()
780
+ # handle common non-numeric tokens
781
+ if x_str.lower() in ("nan", "none", "", "null", "na", "n/a"):
782
+ return 0.0
783
+ return float(x_str.replace("E", "e"))
784
+ except Exception:
785
+ return 0.0
786
+
787
+ # Apply cleaning to every column
788
  for col in sample_X.columns:
789
+ sample_X[col] = sample_X[col].map(_clean_to_float)
 
 
 
 
 
 
 
 
 
 
 
 
 
790
 
791
+ # Verify numeric dtype and replace NaN
792
+ sample_X = sample_X.apply(pd.to_numeric, errors="coerce").fillna(0)
793
 
794
+ # Optional diagnostic
795
  non_numeric_cols = [c for c in sample_X.columns if not np.issubdtype(sample_X[c].dtype, np.number)]
796
  if non_numeric_cols:
797
+ st.warning(f"Cleaned {len(non_numeric_cols)} potential non-numeric columns: {non_numeric_cols}")
798
+
799
 
800
 
801
  # --- SHAP computation ---