Spaces:

singhn9
/

SteelAI_Module2_EAF_Intelligence_Explorer

Sleeping

App Files Files Community

singhn9 commited on Nov 7, 2025

Commit

a66dff8

verified ·

1 Parent(s): 6ed7e1a

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +35 -8

src/streamlit_app.py CHANGED Viewed

@@ -27,7 +27,8 @@ import shap
 # Config & paths
 # -------------------------
 st.set_page_config(page_title="AI Feature Universe Explorer — Advanced + SHAP", layout="wide")
-DATA_DIR = "/mnt/data"
 CSV_PATH = os.path.join(DATA_DIR, "flatfile_universe_advanced.csv")
 META_PATH = os.path.join(DATA_DIR, "feature_metadata_advanced.json")
 PDF_PATH = os.path.join(DATA_DIR, "annotated_bibliography.pdf")
@@ -249,7 +250,7 @@ def generate_advanced_flatfile(n_rows=3000, random_seed=42, max_polynomial_new=6
 # Ensure dataset exists
 # -------------------------
 if not os.path.exists(CSV_PATH) or not os.path.exists(META_PATH):
-    with st.spinner("Generating advanced feature universe (this may take ~20-60s)..."):
         CSV_PATH, META_PATH, PDF_PATH = generate_advanced_flatfile(n_rows=3000, random_seed=42, max_polynomial_new=80)
         st.success(f"Generated dataset and metadata: {CSV_PATH}")
@@ -268,7 +269,7 @@ df, meta_df = load_data()
 # -------------------------
 # Sidebar filters & UI
 # -------------------------
-st.sidebar.title("🔎 Feature Explorer - Advanced + SHAP")
 feat_types = sorted(meta_df["source_type"].unique().tolist())
 selected_types = st.sidebar.multiselect("Feature type", feat_types, default=feat_types)
 numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
@@ -332,6 +333,31 @@ with tabs[4]:
     features = st.multiselect("Model input features (select many; start with defaults)", numeric_cols, default=default_features)
     sample_size = st.slider("Sample rows to use for training (speed vs fidelity)", min_value=200, max_value=min(4000, df.shape[0]), value=1000, step=100)
     train_button = st.button("Train ensemble & compute SHAP (recommended sample only)")
     if train_button:
         with st.spinner("Preparing data and training ensemble..."):
@@ -457,9 +483,10 @@ with tabs[4]:
                 st.warning(f"Could not plot waterfall: {e}")
-# ----- 📌 Target & Business Impact tab
 with tabs[5]:
-    st.subheader("🎯 Recommended Target Variables by Use Case")
     st.markdown("Each use case maps to a practical target variable that drives measurable business impact.")
     target_table = pd.DataFrame([
@@ -475,7 +502,7 @@ with tabs[5]:
     st.dataframe(target_table,  use_container_width=True)
     st.markdown("---")
-    st.subheader(" Business Framing for Clients")
     st.markdown("These metrics show approximate annual benefits from small process improvements.")
     business_table = pd.DataFrame([
@@ -489,9 +516,9 @@ with tabs[5]:
     st.dataframe(business_table, use_container_width=True)
     st.info("These numbers are indicative averages; actual benefits depend on plant capacity and process efficiency.")
-# ----- 📚 Bibliography tab
 with tabs[6]:
-    st.subheader("📚 Annotated Bibliography & Feature Justification")
     st.markdown("""
 This section summarizes published research supporting the feature design and modeling choices.
     """)

 # Config & paths
 # -------------------------
 st.set_page_config(page_title="AI Feature Universe Explorer — Advanced + SHAP", layout="wide")
+DATA_DIR = os.getenv("DATA_DIR", "./data")
+os.makedirs(DATA_DIR, exist_ok=True)
 CSV_PATH = os.path.join(DATA_DIR, "flatfile_universe_advanced.csv")
 META_PATH = os.path.join(DATA_DIR, "feature_metadata_advanced.json")
 PDF_PATH = os.path.join(DATA_DIR, "annotated_bibliography.pdf")
 # Ensure dataset exists
 # -------------------------
 if not os.path.exists(CSV_PATH) or not os.path.exists(META_PATH):
+    with st.spinner("Generating synthetic features (this may take ~20-60s)..."):
         CSV_PATH, META_PATH, PDF_PATH = generate_advanced_flatfile(n_rows=3000, random_seed=42, max_polynomial_new=80)
         st.success(f"Generated dataset and metadata: {CSV_PATH}")
 # -------------------------
 # Sidebar filters & UI
 # -------------------------
+st.sidebar.title("Feature Explorer - Advanced + SHAP")
 feat_types = sorted(meta_df["source_type"].unique().tolist())
 selected_types = st.sidebar.multiselect("Feature type", feat_types, default=feat_types)
 numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
     features = st.multiselect("Model input features (select many; start with defaults)", numeric_cols, default=default_features)
     sample_size = st.slider("Sample rows to use for training (speed vs fidelity)", min_value=200, max_value=min(4000, df.shape[0]), value=1000, step=100)
     train_button = st.button("Train ensemble & compute SHAP (recommended sample only)")
+    # Model Remediation & Tuning Options
+    st.markdown("###  Model Remediation & Tuning Options")
+    st.info("Use these to improve flat or low-variance predictions without editing code.")
+    colA, colB, colC = st.columns(3)
+    with colA:
+        apply_scaling = st.checkbox("Apply StandardScaler()", value=False)
+        feature_filter = st.checkbox("Use key furnace-relevant features", value=True)
+    with colB:
+        random_seed = st.number_input("Random Seed", min_value=0, max_value=9999, value=42)
+        n_estimators = st.slider("n_estimators (trees)", 50, 300, 150, step=25)
+    with colC:
+        furnace_temp_sd = st.slider("Synthetic Furnace Temp σ (spread)", 20, 200, 50, step=10)
+        arc_power_sd = st.slider("Synthetic Arc Power σ (spread)", 50, 300, 120, step=10)
+    st.markdown("---")
+    if st.button("Regenerate Synthetic Dataset with Updated Variance"):
+        with st.spinner("Regenerating synthetic data..."):
+            CSV_PATH, META_PATH, PDF_PATH = generate_advanced_flatfile(
+                n_rows=3000,
+                random_seed=random_seed,
+                max_polynomial_new=60
+            )
+            df, meta_df = load_data()
+            st.success("Synthetic dataset regenerated with new variance settings.")
     if train_button:
         with st.spinner("Preparing data and training ensemble..."):
                 st.warning(f"Could not plot waterfall: {e}")
+# -----  Target & Business Impact tab
 with tabs[5]:
+    st.subheader("Recommended Target Variables by Use Case")
     st.markdown("Each use case maps to a practical target variable that drives measurable business impact.")
     target_table = pd.DataFrame([
     st.dataframe(target_table,  use_container_width=True)
     st.markdown("---")
+    st.subheader("Business Framing for Clients")
     st.markdown("These metrics show approximate annual benefits from small process improvements.")
     business_table = pd.DataFrame([
     st.dataframe(business_table, use_container_width=True)
     st.info("These numbers are indicative averages; actual benefits depend on plant capacity and process efficiency.")
+# -----  Bibliography tab
 with tabs[6]:
+    st.subheader("Annotated Bibliography & Feature Justification")
     st.markdown("""
 This section summarizes published research supporting the feature design and modeling choices.
     """)