Update src/streamlit_app.py
Browse files- src/streamlit_app.py +33 -20
src/streamlit_app.py
CHANGED
|
@@ -331,34 +331,46 @@ df, meta_df = load_data()
|
|
| 331 |
|
| 332 |
|
| 333 |
# -------------------------
|
| 334 |
-
# Sidebar filters & UI (
|
| 335 |
# -------------------------
|
| 336 |
st.sidebar.title("Feature Explorer - Advanced + SHAP")
|
| 337 |
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
"
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 357 |
|
|
|
|
| 358 |
|
| 359 |
# Build sidebar safely
|
| 360 |
feat_types = sorted(meta_df["source_type"].dropna().unique().tolist())
|
| 361 |
selected_types = st.sidebar.multiselect("Feature type", feat_types, default=feat_types)
|
|
|
|
| 362 |
if "source_type" not in meta_df.columns or meta_df["source_type"].dropna().empty:
|
| 363 |
filtered_meta = meta_df.copy()
|
| 364 |
else:
|
|
@@ -366,6 +378,7 @@ else:
|
|
| 366 |
|
| 367 |
numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
|
| 368 |
|
|
|
|
| 369 |
# -------------------------
|
| 370 |
# Features tab (robust)
|
| 371 |
# -------------------------
|
|
|
|
| 331 |
|
| 332 |
|
| 333 |
# -------------------------
|
| 334 |
+
# Sidebar filters & UI (FINAL ROBUST VERSION)
|
| 335 |
# -------------------------
|
| 336 |
st.sidebar.title("Feature Explorer - Advanced + SHAP")
|
| 337 |
|
| 338 |
+
def ensure_feature_metadata(df: pd.DataFrame, meta_df: pd.DataFrame) -> pd.DataFrame:
|
| 339 |
+
"""Ensure metadata dataframe matches feature count & has required columns."""
|
| 340 |
+
required_cols = ["feature_name", "source_type", "formula", "remarks"]
|
| 341 |
+
|
| 342 |
+
# If metadata missing or too short, rebuild it entirely
|
| 343 |
+
if meta_df is None or len(meta_df) < len(df.columns):
|
| 344 |
+
meta_df = pd.DataFrame({
|
| 345 |
+
"feature_name": df.columns,
|
| 346 |
+
"source_type": [
|
| 347 |
+
"engineered" if any(x in c for x in ["poly", "pca", "roll", "lag"]) else "measured"
|
| 348 |
+
for c in df.columns
|
| 349 |
+
],
|
| 350 |
+
"formula": ["" for _ in df.columns],
|
| 351 |
+
"remarks": ["auto-inferred synthetic feature metadata" for _ in df.columns],
|
| 352 |
+
})
|
| 353 |
+
st.sidebar.warning("Metadata was summary-only — rebuilt feature-level metadata.")
|
| 354 |
+
else:
|
| 355 |
+
# Ensure required columns exist
|
| 356 |
+
for col in required_cols:
|
| 357 |
+
if col not in meta_df.columns:
|
| 358 |
+
meta_df[col] = None
|
| 359 |
+
# Fill feature_name if blank or NaN
|
| 360 |
+
if meta_df["feature_name"].isna().all():
|
| 361 |
+
meta_df["feature_name"] = df.columns
|
| 362 |
+
# Clip to same number of features (safety)
|
| 363 |
+
if len(meta_df) > len(df.columns):
|
| 364 |
+
meta_df = meta_df.iloc[: len(df.columns)]
|
| 365 |
+
|
| 366 |
+
return meta_df
|
| 367 |
|
| 368 |
+
meta_df = ensure_feature_metadata(df, meta_df)
|
| 369 |
|
| 370 |
# Build sidebar safely
|
| 371 |
feat_types = sorted(meta_df["source_type"].dropna().unique().tolist())
|
| 372 |
selected_types = st.sidebar.multiselect("Feature type", feat_types, default=feat_types)
|
| 373 |
+
|
| 374 |
if "source_type" not in meta_df.columns or meta_df["source_type"].dropna().empty:
|
| 375 |
filtered_meta = meta_df.copy()
|
| 376 |
else:
|
|
|
|
| 378 |
|
| 379 |
numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
|
| 380 |
|
| 381 |
+
|
| 382 |
# -------------------------
|
| 383 |
# Features tab (robust)
|
| 384 |
# -------------------------
|