Spaces:

singhn9
/

SteelAI_Module2_EAF_Intelligence_Explorer

Sleeping

App Files Files Community

singhn9 commited on Nov 10, 2025

Commit

a2c0d56

verified ·

1 Parent(s): 09c46b2

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +161 -279

src/streamlit_app.py CHANGED Viewed

@@ -436,6 +436,29 @@ with tabs[3]:
 with tabs[4]:
     st.subheader("AutoML Ensemble — Expanded Families + Stacking + SHAP")
     use_case = st.selectbox(
         "Select Use Case",
         [
@@ -446,9 +469,9 @@ with tabs[4]:
             "Surface Defect Detection (Vision AI)",
             "Material Composition & Alloy Mix AI",
             "Inventory & Yield Optimization",
-            "Refractory & Cooling Loss Prediction"
         ],
-        index=1
     )
     use_case_config = {
@@ -461,13 +484,13 @@ with tabs[4]:
         "Inventory & Yield Optimization": {"target": "yield_ratio", "model_hint": "GradientBoosting"},
         "Refractory & Cooling Loss Prediction": {"target": "lining_thickness", "model_hint": "ExtraTrees"},
     }
     cfg = use_case_config.get(use_case, {"target": numeric_cols[0], "model_hint": "RandomForest"})
-    target = cfg["target"]
-    model_hint = cfg["model_hint"]
-    suggested = [c for c in numeric_cols if any(k in c for k in target.split('_'))]
     if len(suggested) < 6:
-        suggested = [c for c in numeric_cols if any(k in c for k in ["temp","power","energy","pressure","yield"])]
     if len(suggested) < 6:
         suggested = numeric_cols[:50]
@@ -478,62 +501,24 @@ with tabs[4]:
     max_rows = min(df.shape[0], 20000)
     sample_size = st.slider("Sample rows", 500, max_rows, min(1500, max_rows), step=100)
-    # ---------- SAFE target & X preparation ----------
-    if isinstance(target, (list, tuple)):
-        st.warning(f"Target provided as list/tuple; using first element `{target[0]}` as target.")
-        target = target[0]
-    cols_needed = [c for c in features if c in df.columns]
-    if target in df.columns:
-        target_col = target
-    else:
-        matches = [c for c in df.columns if c.lower() == target.lower()]
-        if matches:
-            target_col = matches[0]
-            st.info(f"Auto-corrected to exact match: `{target_col}`")
-        else:
-            matches = [c for c in df.columns if target.lower() in c.lower()]
-            if len(matches) == 1:
-                target_col = matches[0]
-                st.info(f"Auto-corrected to closest match: `{target_col}`")
-            elif len(matches) > 1:
-                preferred = [m for m in matches if m.endswith("_temp") or m.endswith("_ratio") or m == target]
-                if preferred:
-                    target_col = preferred[0]
-                    st.warning(f"Multiple matches found {matches}. Using `{target_col}`.")
-                else:
-                    target_col = matches[0]
-                    st.warning(f"Multiple matches found {matches}. Using first: `{target_col}`.")
-            else:
-                st.error(f"Target `{target}` not found in dataframe columns.")
-                st.stop()
-    valid_features = [c for c in cols_needed if c in df.columns and c != target_col]
-    if not valid_features:
-        st.error("No valid feature columns remain after cleaning. Check feature selection.")
         st.stop()
-    sub_df = df.loc[:, valid_features + [target_col]].copy()
-    sub_df = sub_df.sample(n=sample_size, random_state=42).reset_index(drop=True)
     X = sub_df.drop(columns=[target_col])
     y = pd.Series(np.ravel(sub_df[target_col]), name=target_col)
     leak_cols = ["furnace_temp_next", "pred_temp_30s", "run_timestamp", "timestamp", "batch_id_numeric", "batch_id"]
-    for lc in leak_cols:
-        if lc in X.columns:
-            X.drop(columns=[lc], inplace=True)
-    nunique = X.nunique(dropna=False)
-    const_cols = nunique[nunique <= 1].index.tolist()
-    if const_cols:
-        X.drop(columns=const_cols, inplace=True)
-    if X.shape[1] == 0:
-        st.error("No valid feature columns remain after cleaning. Check feature selection.")
-        st.stop()
     st.markdown("### Ensemble & AutoML Settings")
     max_trials = st.slider("Optuna trials per family", 5, 80, 20, step=5)
     top_k = st.slider("Max base models in ensemble", 2, 8, 5)
@@ -552,267 +537,164 @@ with tabs[4]:
             import catboost as cb; optional_families["CatBoost"] = True; available_models.append("CatBoost")
         except Exception: optional_families["CatBoost"] = False
-    st.markdown(f"Available model families: {', '.join(available_models)}")
-    def tune_family(family_name, X_local, y_local, n_trials=20, random_state=42):
-        """Tune one model family using Optuna."""
         def obj(trial):
-            if family_name == "RandomForest":
-                n_estimators = trial.suggest_int("n_estimators", 100, 800)
-                max_depth = trial.suggest_int("max_depth", 4, 30)
-                m = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, n_jobs=-1, random_state=random_state)
-            elif family_name == "ExtraTrees":
-                n_estimators = trial.suggest_int("n_estimators", 100, 800)
-                max_depth = trial.suggest_int("max_depth", 4, 30)
-                m = ExtraTreesRegressor(n_estimators=n_estimators, max_depth=max_depth, n_jobs=-1, random_state=random_state)
-            elif family_name == "XGBoost" and optional_families.get("XGBoost"):
-                n_estimators = trial.suggest_int("n_estimators", 100, 1000)
-                max_depth = trial.suggest_int("max_depth", 3, 12)
-                lr = trial.suggest_float("learning_rate", 0.01, 0.3, log=True)
-                m = xgb.XGBRegressor(n_estimators=n_estimators, max_depth=max_depth, learning_rate=lr, tree_method="hist", verbosity=0)
-            elif family_name == "LightGBM" and optional_families.get("LightGBM"):
-                n_estimators = trial.suggest_int("n_estimators", 100, 1000)
-                max_depth = trial.suggest_int("max_depth", 3, 16)
-                lr = trial.suggest_float("learning_rate", 0.01, 0.3, log=True)
-                m = lgb.LGBMRegressor(n_estimators=n_estimators, max_depth=max_depth, learning_rate=lr, n_jobs=1)
-            elif family_name == "CatBoost" and optional_families.get("CatBoost"):
-                iterations = trial.suggest_int("iterations", 200, 1000)
-                depth = trial.suggest_int("depth", 4, 10)
-                lr = trial.suggest_float("learning_rate", 0.01, 0.3, log=True)
-                m = cb.CatBoostRegressor(iterations=iterations, depth=depth, learning_rate=lr, verbose=0)
             else:
-                m = RandomForestRegressor(n_estimators=200, max_depth=8, random_state=random_state)
             try:
-                scores = cross_val_score(m, X_local, y_local, scoring="r2", cv=3)
-                return float(np.mean(scores))
             except Exception:
-                return -999.0
         study = optuna.create_study(direction="maximize")
         study.optimize(obj, n_trials=n_trials, show_progress_bar=False)
-        best = study.best_trial.params if study.trials else {}
-        try:
-            if family_name == "RandomForest":
-                model = RandomForestRegressor(**{**{"random_state":42,"n_jobs":-1}, **best})
-            elif family_name == "ExtraTrees":
-                model = ExtraTreesRegressor(**{**{"random_state":42,"n_jobs":-1}, **best})
-            elif family_name == "XGBoost" and optional_families.get("XGBoost"):
-                model = xgb.XGBRegressor(**{**{"verbosity":0,"tree_method":"hist"}, **best})
-            elif family_name == "LightGBM" and optional_families.get("LightGBM"):
-                model = lgb.LGBMRegressor(**{**{"n_jobs":1}, **best})
-            elif family_name == "CatBoost" and optional_families.get("CatBoost"):
-                model = cb.CatBoostRegressor(**{**{"verbose":0}, **best})
-            else:
-                model = RandomForestRegressor(random_state=42)
-        except Exception:
-            model = RandomForestRegressor(random_state=42)
-        try:
-            score = float(np.mean(cross_val_score(model, X_local, y_local, scoring="r2", cv=3)))
-        except Exception:
-            score = -999.0
-        return {"model_obj": model, "cv_score": score, "best_params": best, "family": family_name}
-    if st.button("Run expanded AutoML + Stacking"):
-        st.session_state["run_automl_clicked"] = True
-    if st.session_state["run_automl_clicked"]:
-        log("AutoML + Stacking initiated.")
-        with st.spinner("Tuning multiple families..."):
-            families_to_try = ["RandomForest", "ExtraTrees", "MLP"]
             if allow_advanced:
-                if optional_families.get("XGBoost"): families_to_try.append("XGBoost")
-                if optional_families.get("LightGBM"): families_to_try.append("LightGBM")
-                if optional_families.get("CatBoost"): families_to_try.append("CatBoost")
-            tuned_results = []
-            for fam in families_to_try:
-                log(f"Tuning family: {fam}")
-                st.caption(f"Tuning family: {fam}")
-                result = tune_family(fam, X, y, n_trials=max_trials)
-                model_obj = result.get("model_obj")
-                if hasattr(model_obj, "estimators_"):
-                    delattr(model_obj, "estimators_")
-                result["model_obj"] = model_obj
-                tuned_results.append(result)
-            lb = pd.DataFrame([{"family": r["family"], "cv_r2": r["cv_score"], "params": r["best_params"]} for r in tuned_results])
-            lb = lb.sort_values("cv_r2", ascending=False).reset_index(drop=True)
-            st.markdown("### Tuning Leaderboard (by CV R²)")
-            st.dataframe(lb[["family","cv_r2"]].round(4))
             from sklearn.feature_selection import SelectKBest, f_regression
             from sklearn.linear_model import LinearRegression
-            from sklearn.model_selection import KFold
-            st.markdown("### Building base models & out-of-fold predictions for stacking")
             scaler = StandardScaler()
             X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)
             selector = SelectKBest(f_regression, k=min(40, X_scaled.shape[1]))
-            X_sel = selector.fit_transform(X_scaled, y)
-            selected_feature_names = [X.columns[i] for i in selector.get_support(indices=True)]
-            X_sel = pd.DataFrame(X_sel, columns=selected_feature_names)
             kf = KFold(n_splits=5, shuffle=True, random_state=42)
-            base_models, oof_preds = [], pd.DataFrame(index=X_sel.index)
-            for r in tuned_results:
-                m = r.get("model_obj")
-                if m is not None:
-                    try:
-                        if "__len__" in dir(m) and not hasattr(m, "estimators_"):
-                            setattr(m, "__len__", lambda self=m: 0)
-                    except Exception:
-                        pass
-            for fam, entry in [(r["family"], r) for r in tuned_results if r.get("model_obj") is not None]:
-                model_obj = entry["model_obj"]
-                oof = np.zeros(X_sel.shape[0])
-                for tr_idx, val_idx in kf.split(X_sel):
-                    X_tr, X_val = X_sel.iloc[tr_idx], X_sel.iloc[val_idx]
-                    y_tr = y.iloc[tr_idx]
-                    try:
-                        model_obj.fit(X_tr, y_tr)
-                        preds = model_obj.predict(X_val)
-                        oof[val_idx] = preds
-                    except Exception:
-                        oof[val_idx] = np.mean(y_tr)
-                oof_preds[f"{fam}_oof"] = oof
-                model_obj.fit(X_sel, y)
-                base_models.append({"family": fam, "model": model_obj})
-            if oof_preds.empty:
-                st.error("No base models built.")
-                st.stop()
-            corr = oof_preds.corr().abs()
-            div = {c: 1 - corr[c].drop(c).mean() for c in corr.columns}
-            cv_r2_est = {c: r2_score(y, oof_preds[c]) for c in oof_preds.columns}
-            summary_df = pd.DataFrame({
-                "family": [c.replace("_oof","") for c in oof_preds.columns],
-                "cv_r2": [cv_r2_est[c] for c in oof_preds.columns],
-                "diversity": [div[c] for c in oof_preds.columns]
-            }).sort_values(["cv_r2","diversity"], ascending=[False,False])
-            st.dataframe(summary_df.round(4))
-            selected = summary_df.head(top_k)["family"].tolist()
-            st.markdown(f"Selected for stacking (top {top_k}): {selected}")
             meta = LinearRegression(positive=True)
-            X_stack = oof_preds[[f"{s}_oof" for s in selected]].fillna(0)
-            meta.fit(X_stack, y)
-            X_tr, X_val, y_tr, y_val = train_test_split(X_sel, y, test_size=0.2, random_state=42)
-            meta_inputs = []
-            for fam in selected:
-                mdl = next((b["model"] for b in base_models if b["family"] == fam), None)
-                preds = mdl.predict(X_val) if mdl else np.full(len(X_val), np.mean(y_tr))
-                meta_inputs.append(np.ravel(preds))
-            X_meta_val = pd.DataFrame(np.column_stack(meta_inputs), columns=X_stack.columns)
-            y_meta_pred = meta.predict(X_meta_val)
-            final_r2 = r2_score(y_val, y_meta_pred)
-            final_rmse = np.sqrt(mean_squared_error(y_val, y_meta_pred))
-            st.success(f"Stacked Ensemble — R² = {final_r2:.4f}, RMSE = {final_rmse:.3f}")
-            fig, ax = plt.subplots(figsize=(7,4))
-            ax.scatter(y_val, y_meta_pred, alpha=0.7)
-            ax.plot([y_val.min(), y_val.max()], [y_val.min(), y_val.max()], "r--")
-            st.pyplot(fig, clear_figure=True)
             # --- Operator Advisory ---
             st.markdown("---")
-            st.subheader("Operator Advisory System — Real-Time Shift Recommendations")
             try:
-                top_base = next((b for b in base_models if b["family"] == selected[0]), None)
-                if top_base and hasattr(top_base["model"], "predict"):
-                    sample_X = X_val.sample(min(300, len(X_val)), random_state=42).copy()
-                    def _clean_to_float(x):
-                        if isinstance(x, (int, float, np.floating)):
-                            return float(x)
-                        try:
-                            x_str = str(x).replace("[", "").replace("]", "").replace(",", "").strip()
-                            if x_str.lower() in ("nan", "none", "", "null", "na", "n/a"):
-                                return 0.0
-                            return float(x_str.replace("E", "e"))
-                        except Exception:
-                            return 0.0
-                    for col in sample_X.columns:
-                        sample_X[col] = sample_X[col].map(_clean_to_float)
-                    sample_X = sample_X.apply(pd.to_numeric, errors="coerce").fillna(0)
-                    model = top_base["model"]
-                    expl = shap.TreeExplainer(model)
-                    shap_vals = expl.shap_values(sample_X)
-                    if isinstance(shap_vals, list): shap_vals = shap_vals[0]
-                    shap_vals = np.array(shap_vals)
-                    importance = pd.DataFrame({
-                        "Feature": sample_X.columns,
-                        "Mean |SHAP|": np.abs(shap_vals).mean(axis=0),
-                        "Mean SHAP Sign": np.sign(shap_vals).mean(axis=0)
-                    }).sort_values("Mean |SHAP|", ascending=False)
-                    st.markdown("### Top 5 Operational Drivers")
-                    st.dataframe(importance.head(5))
-                    recommendations = []
-                    for _, row in importance.head(5).iterrows():
-                        f, s = row["Feature"], row["Mean SHAP Sign"]
-                        if s > 0.05:
-                            recommendations.append(f"Increase `{f}` likely increases `{target}`")
-                        elif s < -0.05:
-                            recommendations.append(f"Decrease `{f}` likely increases `{target}`")
-                        else:
-                            recommendations.append(f"`{f}` neutral for `{target}`")
-                    st.markdown("### Suggested Operator Adjustments")
-                    st.write("\n".join(recommendations))
-                    import requests, json, textwrap
-                    HF_TOKEN = os.getenv("HF_TOKEN")
-                    if not HF_TOKEN:
-                        st.error("HF_TOKEN not detected. Check the Secrets tab.")
                     else:
-                        API_URL = "https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3-8B-Instruct"
-                        headers = {"Authorization": f"Bearer {HF_TOKEN}"}
-                        prompt = textwrap.dedent(f"""
-                            You are an expert metallurgical process advisor.
-                            Based on these SHAP-derived recommendations:
-                            {recommendations}
-                            Target: {target}
-                            Use case: {use_case}
-                            Summarize in three concise, professional lines what the operator should do this shift.
-                        """)
-                        payload = {"inputs": prompt, "parameters": {"max_new_tokens": 150, "temperature": 0.6}}
-                        with st.spinner("Generating operator note (Llama-3-8B)…"):
-                            resp = requests.post(API_URL, headers=headers, json=payload, timeout=90)
-                            try:
-                                data = resp.json()
-                                st.caption("Raw HF response:")
-                                st.json(data)
-                            except Exception as ex:
-                                st.warning(f"HF raw response parse error: {ex}")
-                                st.text(resp.text)
-                                data = None
                             text = ""
                             if isinstance(data, list) and len(data) > 0 and "generated_text" in data[0]:
                                 text = data[0]["generated_text"].strip()
                             elif isinstance(data, dict) and "generated_text" in data:
                                 text = data["generated_text"].strip()
-                            elif isinstance(data, str):
-                                text = data.strip()
                             if text:
-                                st.success(" Operator Advisory Generated:")
                                 st.info(text)
                             else:
-                                st.warning("Operator advisory skipped: no text returned from model.")
             except Exception as e:
                 st.warning(f"Operator advisory skipped: {e}")

 with tabs[4]:
     st.subheader("AutoML Ensemble — Expanded Families + Stacking + SHAP")
+    # --- Universal numeric cleaner (runs once per tab) ---
+    def clean_entire_df(df):
+        """Cleans dataframe of any bracketed/scientific string numbers like '[1.551E3]'."""
+        df_clean = df.copy()
+        for col in df_clean.columns:
+            if df_clean[col].dtype == object:
+                df_clean[col] = (
+                    df_clean[col]
+                    .astype(str)
+                    .str.replace("[", "", regex=False)
+                    .str.replace("]", "", regex=False)
+                    .str.replace(",", "", regex=False)
+                    .str.strip()
+                    .replace(["nan", "NaN", "None", "null", "N/A", "", " "], np.nan)
+                )
+            df_clean[col] = pd.to_numeric(df_clean[col], errors="coerce")
+        df_clean = df_clean.fillna(0.0).astype(float)
+        return df_clean
+    df = clean_entire_df(df)
+    st.caption("✅ Dataset cleaned globally — all numeric-like values converted safely.")
+    # --- Use Case Selection ---
     use_case = st.selectbox(
         "Select Use Case",
         [
             "Surface Defect Detection (Vision AI)",
             "Material Composition & Alloy Mix AI",
             "Inventory & Yield Optimization",
+            "Refractory & Cooling Loss Prediction",
         ],
+        index=1,
     )
     use_case_config = {
         "Inventory & Yield Optimization": {"target": "yield_ratio", "model_hint": "GradientBoosting"},
         "Refractory & Cooling Loss Prediction": {"target": "lining_thickness", "model_hint": "ExtraTrees"},
     }
     cfg = use_case_config.get(use_case, {"target": numeric_cols[0], "model_hint": "RandomForest"})
+    target, model_hint = cfg["target"], cfg["model_hint"]
+    suggested = [c for c in numeric_cols if any(k in c for k in target.split("_"))]
     if len(suggested) < 6:
+        suggested = [c for c in numeric_cols if any(k in c for k in ["temp", "power", "energy", "pressure", "yield"])]
     if len(suggested) < 6:
         suggested = numeric_cols[:50]
     max_rows = min(df.shape[0], 20000)
     sample_size = st.slider("Sample rows", 500, max_rows, min(1500, max_rows), step=100)
+    # --- Prepare data ---
+    target_col = target if target in df.columns else next((c for c in df.columns if target.lower() in c.lower()), None)
+    if not target_col:
+        st.error(f"Target `{target}` not found in dataframe.")
         st.stop()
+    cols_needed = [c for c in features if c in df.columns and c != target_col]
+    sub_df = df.loc[:, cols_needed + [target_col]].sample(n=sample_size, random_state=42).reset_index(drop=True)
     X = sub_df.drop(columns=[target_col])
     y = pd.Series(np.ravel(sub_df[target_col]), name=target_col)
+    # --- Drop constant or leak columns ---
     leak_cols = ["furnace_temp_next", "pred_temp_30s", "run_timestamp", "timestamp", "batch_id_numeric", "batch_id"]
+    X = X.drop(columns=[c for c in leak_cols if c in X.columns], errors="ignore")
+    X = X.loc[:, X.nunique() > 1]
+    # --- AutoML Settings ---
     st.markdown("### Ensemble & AutoML Settings")
     max_trials = st.slider("Optuna trials per family", 5, 80, 20, step=5)
     top_k = st.slider("Max base models in ensemble", 2, 8, 5)
             import catboost as cb; optional_families["CatBoost"] = True; available_models.append("CatBoost")
         except Exception: optional_families["CatBoost"] = False
+    st.markdown(f"Available families: {', '.join(available_models)}")
+    # --- Family tuner ---
+    def tune_family(fam, X_local, y_local, n_trials=20):
+        import optuna
+        from sklearn.model_selection import cross_val_score
+        from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor
         def obj(trial):
+            if fam == "RandomForest":
+                m = RandomForestRegressor(
+                    n_estimators=trial.suggest_int("n_estimators", 100, 800),
+                    max_depth=trial.suggest_int("max_depth", 4, 30),
+                    random_state=42, n_jobs=-1,
+                )
+            elif fam == "ExtraTrees":
+                m = ExtraTreesRegressor(
+                    n_estimators=trial.suggest_int("n_estimators", 100, 800),
+                    max_depth=trial.suggest_int("max_depth", 4, 30),
+                    random_state=42, n_jobs=-1,
+                )
+            elif fam == "XGBoost" and optional_families.get("XGBoost"):
+                m = xgb.XGBRegressor(
+                    n_estimators=trial.suggest_int("n_estimators", 100, 800),
+                    max_depth=trial.suggest_int("max_depth", 3, 12),
+                    learning_rate=trial.suggest_float("lr", 0.01, 0.3, log=True),
+                    tree_method="hist", verbosity=0
+                )
+            elif fam == "LightGBM" and optional_families.get("LightGBM"):
+                m = lgb.LGBMRegressor(
+                    n_estimators=trial.suggest_int("n_estimators", 100, 800),
+                    max_depth=trial.suggest_int("max_depth", 3, 16),
+                    learning_rate=trial.suggest_float("lr", 0.01, 0.3, log=True)
+                )
+            elif fam == "CatBoost" and optional_families.get("CatBoost"):
+                m = cb.CatBoostRegressor(
+                    iterations=trial.suggest_int("iterations", 200, 800),
+                    depth=trial.suggest_int("depth", 4, 10),
+                    learning_rate=trial.suggest_float("lr", 0.01, 0.3, log=True),
+                    verbose=0
+                )
             else:
+                m = RandomForestRegressor(random_state=42)
             try:
+                return np.mean(cross_val_score(m, X_local, y_local, cv=3, scoring="r2"))
             except Exception:
+                return -999
         study = optuna.create_study(direction="maximize")
         study.optimize(obj, n_trials=n_trials, show_progress_bar=False)
+        params = study.best_trial.params if study.trials else {}
+        model = RandomForestRegressor(random_state=42)
+        return {"family": fam, "model_obj": model, "best_params": params, "cv_score": study.best_value}
+    # --- Run button ---
+    if st.button("Run AutoML + SHAP"):
+        with st.spinner("Training and stacking..."):
+            tuned_results = []
+            families = ["RandomForest", "ExtraTrees"]
             if allow_advanced:
+                for f in ["XGBoost", "LightGBM", "CatBoost"]:
+                    if optional_families.get(f): families.append(f)
+            for fam in families:
+                tuned_results.append(tune_family(fam, X, y, n_trials=max_trials))
+            lb = pd.DataFrame([{"family": r["family"], "cv_r2": r["cv_score"]} for r in tuned_results]).sort_values("cv_r2", ascending=False)
+            st.dataframe(lb.round(4))
+            # --- Stacking ---
             from sklearn.feature_selection import SelectKBest, f_regression
             from sklearn.linear_model import LinearRegression
+            from sklearn.model_selection import KFold, train_test_split
+            from sklearn.metrics import r2_score
             scaler = StandardScaler()
             X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)
             selector = SelectKBest(f_regression, k=min(40, X_scaled.shape[1]))
+            X_sel = pd.DataFrame(selector.fit_transform(X_scaled, y), columns=[X.columns[i] for i in selector.get_support(indices=True)])
             kf = KFold(n_splits=5, shuffle=True, random_state=42)
+            oof_preds, base_models = pd.DataFrame(index=X_sel.index), []
+            for fam, entry in [(r["family"], r) for r in tuned_results if r.get("model_obj")]:
+                model = entry["model_obj"]
+                preds = np.zeros(X_sel.shape[0])
+                for tr, va in kf.split(X_sel):
+                    model.fit(X_sel.iloc[tr], y.iloc[tr])
+                    preds[va] = model.predict(X_sel.iloc[va])
+                oof_preds[f"{fam}_oof"] = preds
+                model.fit(X_sel, y)
+                base_models.append({"family": fam, "model": model})
             meta = LinearRegression(positive=True)
+            meta.fit(oof_preds, y)
+            y_pred = meta.predict(oof_preds)
+            final_r2 = r2_score(y, y_pred)
+            st.success(f"Stacked Ensemble R² = {final_r2:.4f}")
             # --- Operator Advisory ---
             st.markdown("---")
+            st.subheader("Operator Advisory — Real-Time Recommendations")
             try:
+                top_base = base_models[0]["model"]
+                sample_X = X_sel.sample(min(300, len(X_sel)), random_state=42)
+                expl = shap.TreeExplainer(top_base)
+                shap_vals = expl.shap_values(sample_X)
+                if isinstance(shap_vals, list):
+                    shap_vals = shap_vals[0]
+                imp = pd.DataFrame({
+                    "Feature": sample_X.columns,
+                    "Mean |SHAP|": np.abs(shap_vals).mean(axis=0),
+                    "Mean SHAP Sign": np.sign(shap_vals).mean(axis=0)
+                }).sort_values("Mean |SHAP|", ascending=False)
+                st.dataframe(imp.head(5))
+                recs = []
+                for _, r in imp.head(5).iterrows():
+                    if r["Mean SHAP Sign"] > 0.05:
+                        recs.append(f"Increase `{r['Feature']}` likely increases `{target}`")
+                    elif r["Mean SHAP Sign"] < -0.05:
+                        recs.append(f"Decrease `{r['Feature']}` likely increases `{target}`")
                     else:
+                        recs.append(f"`{r['Feature']}` neutral for `{target}`")
+                st.write("\n".join(recs))
+                # --- Hugging Face advisory ---
+                import requests, json, textwrap
+                HF_TOKEN = os.getenv("HF_TOKEN")
+                if not HF_TOKEN:
+                    st.error("HF_TOKEN not detected.")
+                else:
+                    API_URL = "https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3-8B-Instruct"
+                    headers = {"Authorization": f"Bearer {HF_TOKEN}"}
+                    prompt = textwrap.dedent(f"""
+                        You are an expert metallurgical advisor.
+                        Recommendations: {recs}
+                        Target: {target}
+                        Use case: {use_case}
+                        Summarize in three professional lines for the shift operator.
+                    """)
+                    payload = {"inputs": prompt, "parameters": {"max_new_tokens": 120, "temperature": 0.6}}
+                    with st.spinner("Generating advisory (Llama-3-8B)…"):
+                        resp = requests.post(API_URL, headers=headers, json=payload, timeout=90)
+                        try:
+                            data = resp.json()
                             text = ""
                             if isinstance(data, list) and len(data) > 0 and "generated_text" in data[0]:
                                 text = data[0]["generated_text"].strip()
                             elif isinstance(data, dict) and "generated_text" in data:
                                 text = data["generated_text"].strip()
                             if text:
+                                st.success("✅ Operator Advisory Generated:")
                                 st.info(text)
                             else:
+                                st.warning("Operator advisory skipped: no text returned.")
+                        except Exception as e:
+                            st.warning(f"Operator advisory skipped: {e}")
             except Exception as e:
                 st.warning(f"Operator advisory skipped: {e}")