Spaces:

singhn9
/

SteelAI_Module2_EAF_Intelligence_Explorer

Sleeping

App Files Files Community

singhn9 commited on Nov 8, 2025

Commit

dceb721

verified ·

1 Parent(s): ce5795d

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +122 -7

src/streamlit_app.py CHANGED Viewed

@@ -31,6 +31,9 @@ import shap
 # -------------------------
 st.set_page_config(page_title="Steel Authority of India Limited (MODEX)", layout="wide")
 LOG_DIR = "./logs"
 os.makedirs(LOG_DIR, exist_ok=True)
@@ -414,15 +417,37 @@ with tabs[0]:
 # ----- Visualize tab
 with tabs[1]:
-    st.subheader("Feature visualization")
     col = st.selectbox("Choose numeric feature", numeric_cols, index=0)
     bins = st.slider("Histogram bins", 10, 200, 50)
-    fig, ax = plt.subplots(figsize=(8,4))
-    sns.histplot(df[col], bins=bins, kde=True, ax=ax)
-    ax.set_title(col)
-    st.pyplot(fig)
     st.write(df[col].describe().to_frame().T)
 # ----- Correlations tab
 with tabs[2]:
     st.subheader("Correlation explorer")
@@ -431,8 +456,14 @@ with tabs[2]:
     if len(corr_sel) >= 2:
         corr = df[corr_sel].corr()
         fig, ax = plt.subplots(figsize=(10,8))
-        sns.heatmap(corr, cmap="coolwarm", center=0, ax=ax)
-        st.pyplot(fig)
     else:
         st.info("Choose at least 2 numeric features to compute correlation.")
@@ -659,6 +690,21 @@ with tabs[4]:
             lb = lb.sort_values("cv_r2", ascending=False).reset_index(drop=True)
             st.markdown("### Tuning Leaderboard (by CV R²)")
             st.dataframe(lb[["family","cv_r2"]].round(4))
             # --- Build base-models and collect out-of-fold preds for stacking ---
             st.markdown("### Building base models & out-of-fold predictions for stacking")
@@ -849,6 +895,75 @@ with tabs[4]:
                 st.success(" AutoML + Stacking complete — metrics, artifacts, and SHAP ready.")
 # -----  Target & Business Impact tab

 # -------------------------
 st.set_page_config(page_title="Steel Authority of India Limited (MODEX)", layout="wide")
+plt.style.use("seaborn-v0_8-muted")
+sns.set_palette("muted")
+sns.set_style("whitegrid")
 LOG_DIR = "./logs"
 os.makedirs(LOG_DIR, exist_ok=True)
 # ----- Visualize tab
 with tabs[1]:
+    st.subheader("Feature Visualization")
     col = st.selectbox("Choose numeric feature", numeric_cols, index=0)
     bins = st.slider("Histogram bins", 10, 200, 50)
+    # --- Improved Histogram with style ---
+    fig, ax = plt.subplots(figsize=(8, 4))
+    sns.histplot(df[col], bins=bins, kde=True, ax=ax, color="#2C6E91", alpha=0.8)
+    ax.set_title(f"Distribution of {col.replace('_', ' ').title()}", fontsize=12)
+    ax.set_xlabel(col.replace("_", " ").title(), fontsize=10)
+    ax.set_ylabel("Frequency", fontsize=10)
+    sns.despine()
+    st.pyplot(fig, clear_figure=True)
     st.write(df[col].describe().to_frame().T)
+    # --- Add PCA scatter visualization ---
+    if all(x in df.columns for x in ["pca_1", "pca_2", "operating_mode"]):
+        st.markdown("### PCA Feature Space — Colored by Operating Mode")
+        fig2, ax2 = plt.subplots(figsize=(6, 5))
+        sns.scatterplot(
+            data=df.sample(min(1000, len(df)), random_state=42),
+            x="pca_1", y="pca_2", hue="operating_mode",
+            palette="tab10", alpha=0.7, s=40, ax=ax2
+        )
+        ax2.set_title("Operating Mode Clusters (PCA Projection)", fontsize=12)
+        ax2.set_xlabel("PCA 1")
+        ax2.set_ylabel("PCA 2")
+        ax2.legend(title="Operating Mode", bbox_to_anchor=(1.05, 1), loc="upper left")
+        sns.despine()
+        st.pyplot(fig2, clear_figure=True)
 # ----- Correlations tab
 with tabs[2]:
     st.subheader("Correlation explorer")
     if len(corr_sel) >= 2:
         corr = df[corr_sel].corr()
         fig, ax = plt.subplots(figsize=(10,8))
+        sns.heatmap(
+            corr, cmap="RdBu_r", center=0, annot=True, fmt=".2f",
+            linewidths=0.5, cbar_kws={"shrink": 0.7}, ax=ax
+        )
+        ax.set_title("Feature Correlation Matrix", fontsize=12)
+        sns.despine()
+        st.pyplot(fig, clear_figure=True)
     else:
         st.info("Choose at least 2 numeric features to compute correlation.")
             lb = lb.sort_values("cv_r2", ascending=False).reset_index(drop=True)
             st.markdown("### Tuning Leaderboard (by CV R²)")
             st.dataframe(lb[["family","cv_r2"]].round(4))
+            # --- Bonus Visualization: Model Performance Summary ---
+            if not lb.empty:
+                st.markdown("#### Model Performance Summary (CV R²)")
+                fig_perf, ax_perf = plt.subplots(figsize=(7, 4))
+                colors = ["#2C6E91" if fam != lb.iloc[0]["family"] else "#C65F00" for fam in lb["family"]]
+                ax_perf.barh(lb["family"], lb["cv_r2"], color=colors, alpha=0.85)
+                ax_perf.set_xlabel("Cross-Validated R² Score", fontsize=10)
+                ax_perf.set_ylabel("Model Family", fontsize=10)
+                ax_perf.set_title("Performance Comparison Across Model Families", fontsize=12)
+                ax_perf.invert_yaxis()
+                for i, v in enumerate(lb["cv_r2"]):
+                    ax_perf.text(v + 0.005, i, f"{v:.3f}", va="center", fontsize=9)
+                sns.despine()
+                st.pyplot(fig_perf, clear_figure=True)
             # --- Build base-models and collect out-of-fold preds for stacking ---
             st.markdown("### Building base models & out-of-fold predictions for stacking")
                 st.success(" AutoML + Stacking complete — metrics, artifacts, and SHAP ready.")
+                # --- Store AutoML summary for optional LLM advisory ---
+                st.session_state["automl_summary"] = {
+                    "leaderboard": lb[["family", "cv_r2"]].round(4).to_dict(orient="records"),
+                    "final_r2": float(final_r2),
+                    "final_rmse": float(final_rmse),
+                    "target": target,
+                    "use_case": use_case
+                }
+                # --- Optional: AI Model Recommendation Assistant ---
+                st.markdown("---")
+                st.subheader("AI Recommendation Assistant (cached local model)")
+                st.caption("Get quick local AI suggestions without internet — cached inside ./logs")
+                if st.button("Get AI Recommendation (tiny local LLM)", key="ai_reco"):
+                    summary = st.session_state.get("automl_summary", {})
+                    st.info("Loading local model... first time may take ~10s.")
+                    try:
+                        import importlib.util, os
+                        from pathlib import Path
+                        # Ensure transformers is available
+                        if importlib.util.find_spec("transformers") is None:
+                            st.error("Transformers not installed. Run `pip install transformers`.")
+                        else:
+                            from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
+                            MODEL_NAME = "sshleifer/tiny-gpt2"       # very small 6 MB model
+                            MODEL_DIR = Path(LOG_DIR) / "cached_tiny_llm"
+                            os.makedirs(MODEL_DIR, exist_ok=True)
+                            # If model is already cached locally, load from there
+                            if (MODEL_DIR / "config.json").exists():
+                                st.caption("Loading tiny model from local cache...")
+                                model = AutoModelForCausalLM.from_pretrained(MODEL_DIR)
+                                tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
+                            else:
+                                st.caption("☁️ Downloading tiny model (once only)...")
+                                model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
+                                tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+                                model.save_pretrained(MODEL_DIR)
+                                tokenizer.save_pretrained(MODEL_DIR)
+                                st.success("Cached tiny LLM in ./logs/cached_tiny_llm")
+                            assistant = pipeline("text-generation", model=model, tokenizer=tokenizer)
+                            prompt = f"""
+                You are an ML model tuning assistant.
+                Given this AutoML summary, provide 3 actionable steps for improvement if overfitting,
+                underfitting, or data quality issues are suspected.
+                Use case: {summary.get('use_case')}
+                Target: {summary.get('target')}
+                Final R²: {summary.get('final_r2')}
+                Final RMSE: {summary.get('final_rmse')}
+                Leaderboard: {summary.get('leaderboard')}
+                Respond in concise numbered steps.
+                """
+                            out = assistant(prompt, max_new_tokens=90, temperature=0.7, do_sample=True)[0]["generated_text"]
+                            st.success("LLM Recommendation:")
+                            st.markdown(out)
+                            log("Tiny LLM recommendation generated successfully.")
+                    except Exception as e:
+                        st.error(f"LLM generation failed: {e}")
+                        st.info("If the model download failed, rerun once — it will cache afterward.")
 # -----  Target & Business Impact tab