singhn9 commited on
Commit
c775649
·
verified ·
1 Parent(s): 75c10e3

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +33 -20
src/streamlit_app.py CHANGED
@@ -331,34 +331,46 @@ df, meta_df = load_data()
331
 
332
 
333
  # -------------------------
334
- # Sidebar filters & UI (fault-tolerant for minimal metadata)
335
  # -------------------------
336
  st.sidebar.title("Feature Explorer - Advanced + SHAP")
337
 
338
- # Ensure meta_df always has at least placeholder columns
339
- required_cols = ["feature_name", "source_type", "formula", "remarks"]
340
- for col in required_cols:
341
- if col not in meta_df.columns:
342
- meta_df[col] = None
343
-
344
-
345
- # Populate placeholders if metadata is summary-like (not feature-level)
346
- if len(meta_df) < len(df.columns) or "feature_name" not in meta_df.columns:
347
- st.warning("Metadata appears to be summary-only. Rebuilding feature-level metadata.")
348
- meta_df = pd.DataFrame({
349
- "feature_name": df.columns,
350
- "source_type": [
351
- "engineered" if any(x in c for x in ["poly", "pca", "roll", "lag"]) else "measured"
352
- for c in df.columns
353
- ],
354
- "formula": ["" for _ in df.columns],
355
- "remarks": ["auto-inferred synthetic feature metadata" for _ in df.columns]
356
- })
 
 
 
 
 
 
 
 
 
 
357
 
 
358
 
359
  # Build sidebar safely
360
  feat_types = sorted(meta_df["source_type"].dropna().unique().tolist())
361
  selected_types = st.sidebar.multiselect("Feature type", feat_types, default=feat_types)
 
362
  if "source_type" not in meta_df.columns or meta_df["source_type"].dropna().empty:
363
  filtered_meta = meta_df.copy()
364
  else:
@@ -366,6 +378,7 @@ else:
366
 
367
  numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
368
 
 
369
  # -------------------------
370
  # Features tab (robust)
371
  # -------------------------
 
331
 
332
 
333
  # -------------------------
334
+ # Sidebar filters & UI (FINAL ROBUST VERSION)
335
  # -------------------------
336
  st.sidebar.title("Feature Explorer - Advanced + SHAP")
337
 
338
+ def ensure_feature_metadata(df: pd.DataFrame, meta_df: pd.DataFrame) -> pd.DataFrame:
339
+ """Ensure metadata dataframe matches feature count & has required columns."""
340
+ required_cols = ["feature_name", "source_type", "formula", "remarks"]
341
+
342
+ # If metadata missing or too short, rebuild it entirely
343
+ if meta_df is None or len(meta_df) < len(df.columns):
344
+ meta_df = pd.DataFrame({
345
+ "feature_name": df.columns,
346
+ "source_type": [
347
+ "engineered" if any(x in c for x in ["poly", "pca", "roll", "lag"]) else "measured"
348
+ for c in df.columns
349
+ ],
350
+ "formula": ["" for _ in df.columns],
351
+ "remarks": ["auto-inferred synthetic feature metadata" for _ in df.columns],
352
+ })
353
+ st.sidebar.warning("Metadata was summary-only — rebuilt feature-level metadata.")
354
+ else:
355
+ # Ensure required columns exist
356
+ for col in required_cols:
357
+ if col not in meta_df.columns:
358
+ meta_df[col] = None
359
+ # Fill feature_name if blank or NaN
360
+ if meta_df["feature_name"].isna().all():
361
+ meta_df["feature_name"] = df.columns
362
+ # Clip to same number of features (safety)
363
+ if len(meta_df) > len(df.columns):
364
+ meta_df = meta_df.iloc[: len(df.columns)]
365
+
366
+ return meta_df
367
 
368
+ meta_df = ensure_feature_metadata(df, meta_df)
369
 
370
  # Build sidebar safely
371
  feat_types = sorted(meta_df["source_type"].dropna().unique().tolist())
372
  selected_types = st.sidebar.multiselect("Feature type", feat_types, default=feat_types)
373
+
374
  if "source_type" not in meta_df.columns or meta_df["source_type"].dropna().empty:
375
  filtered_meta = meta_df.copy()
376
  else:
 
378
 
379
  numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
380
 
381
+
382
  # -------------------------
383
  # Features tab (robust)
384
  # -------------------------