Max_Hz_Stresses

Paused

App Files Files Community

UCS2014 commited on Sep 12, 2025

Commit

862a511

verified ·

1 Parent(s): 428329a

Update app.py

Browse files

Files changed (1) hide show

app.py +118 -94

app.py CHANGED Viewed

@@ -28,15 +28,15 @@ from sklearn.impute import SimpleImputer
 APP_NAME = "ST_GeoMech_SHmax"
 TAGLINE  = "Real-Time Maximum Horizontal Stress Prediction"
-# Canonical feature names (match your files); target = actual MaxStress
 FEATURES   = ["Q (gpm)", "SPP (psi)", "T (kft.lbf)", "WOB (klbf)", "ROP (ft/h)"]
-TARGET     = "MaxStress"
 PRED_COL   = "SHmax_Pred"
 ACTUAL_COL = TARGET
 TRANSFORM  = "none"     # "none" | "log10" | "ln"
 UNITS      = "Psi"
-# --- Fixed "best" model params (from your MaxStress RF notebook) ---
 BEST_PARAMS = dict(
     n_estimators=100,
     max_depth=22,
@@ -81,7 +81,7 @@ TABLE_CENTER_CSS = [
 ]
 # =========================
-# Password gate (optional)
 # =========================
 def inline_logo(path="logo.png") -> str:
     try:
@@ -96,10 +96,13 @@ def add_password_gate() -> None:
         required = st.secrets.get("APP_PASSWORD", "")
     except Exception:
         required = os.environ.get("APP_PASSWORD", "")
     if not required:
-        return
     if st.session_state.get("auth_ok", False):
         return
     st.sidebar.markdown(f"""
         <div class="centered-container">
             <img src="{inline_logo('logo.png')}" class="brand-logo">
@@ -116,7 +119,7 @@ def add_password_gate() -> None:
             st.error("Incorrect key.")
     st.stop()
-# add_password_gate()   # enable if you want a password
 # =========================
 # Utilities
@@ -186,26 +189,6 @@ def ensure_cols(df: pd.DataFrame, cols: list[str]) -> bool:
 def _nice_tick0(xmin: float, step: float = 0.1) -> float:
     return step * math.floor(xmin / step) if np.isfinite(xmin) else xmin
-# ---------- Column normalization / aliases ----------
-ALIASES = {
-    "Q (gpm)":        ["Q, gpm", "Q_gpm", "Q(gpm)", "Q  (gpm)"],
-    "SPP (psi)":      ["SPP(psi)", "SPP  (psi)", "SPP psi"],
-    "T (kft.lbf)":    ["T(kft.lbf)", "T  (kft.lbf)"],
-    "WOB (klbf)":     ["WOB(klbf)", "WOB  (klbf)"],
-    "ROP (ft/h)":     ["ROP(ft/h)", "ROP  (ft/h)"],
-    "MaxStress":      ["Max Stress", "MAXStress", "SHmax", "SHmax_Actual"],
-}
-def _normalize_columns(df: pd.DataFrame) -> pd.DataFrame:
-    out = df.copy()
-    out.columns = [str(c).strip().replace(" ,", ",").replace(",  ", ", ").replace("  ", " ")
-                   .replace("psi)", "psi)").replace("(psi", "(psi") for c in out.columns]
-    mapping = {}
-    for canonical, alts in ALIASES.items():
-        for a in alts:
-            if a in out.columns and canonical != a:
-                mapping[a] = canonical
-    return out.rename(columns=mapping)
 # ---------- Transform helpers ----------
 def _inv_transform(x: np.ndarray, transform: str) -> np.ndarray:
     t = (transform or "none").lower()
@@ -346,7 +329,7 @@ def render_export_button(phase_key: str) -> None:
                        disabled=(data is None), key=f"download_{phase_key}")
 # =========================
-# Plots
 # =========================
 def cross_plot_static(actual, pred):
     a = pd.Series(actual, dtype=float)
@@ -365,7 +348,7 @@ def cross_plot_static(actual, pred):
     ax.set_xticks(ticks);  ax.set_yticks(ticks)
     ax.set_aspect("equal", adjustable="box")
-    fmt = FuncFormatter(lambda x, _: f"{x:.0f}")   # no decimals
     ax.xaxis.set_major_formatter(fmt); ax.yaxis.set_major_formatter(fmt)
     ax.set_xlabel(f"Actual Max Stress ({UNITS})",  fontweight="bold", fontsize=10, color="black")
@@ -421,13 +404,14 @@ def track_plot(df, include_actual=True):
                     bgcolor="rgba(255,255,255,0.75)", bordercolor="#ccc", borderwidth=1),
         legend_title_text=""
     )
-    # no decimals on the x ticks
     fig.update_xaxes(
         title_text=f"Max Stress ({UNITS})",
         title_font=dict(size=20, family=BOLD_FONT, color="#000"),
         tickfont=dict(size=15, family=BOLD_FONT, color="#000"),
         side="top", range=[xmin, xmax],
-        ticks="outside", tickformat=",.0f", tickmode="auto", tick0=tick0,
         showline=True, linewidth=1.2, linecolor="#444", mirror=True,
         showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True
     )
@@ -447,7 +431,8 @@ def preview_tracks(df: pd.DataFrame, cols: list[str]):
     if n == 0:
         fig, ax = plt.subplots(figsize=(4, 2))
         ax.text(0.5, 0.5, "No selected columns", ha="center", va="center")
-        ax.axis("off"); return fig
     depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
     if depth_col is not None:
@@ -463,16 +448,20 @@ def preview_tracks(df: pd.DataFrame, cols: list[str]):
     col_colors = {col: cmap(i % cmap.N) for i, col in enumerate(cols)}
     fig, axes = plt.subplots(1, n, figsize=(2.4 * n, 7.0), sharey=True, dpi=100)
-    if n == 1: axes = [axes]
     for i, (ax, col) in enumerate(zip(axes, cols)):
         x = pd.to_numeric(df[col], errors="coerce")
         ax.plot(x, idx, '-', lw=1.6, color=col_colors[col])
         ax.set_xlabel(col); ax.xaxis.set_label_position('top'); ax.xaxis.tick_top()
         ax.set_ylim(y_max, y_min)  # reversed depth down
         ax.grid(True, linestyle=":", alpha=0.3)
-        if i == 0: ax.set_ylabel(y_label)
         else:
             ax.tick_params(labelleft=False); ax.set_ylabel("")
     fig.tight_layout()
     return fig
@@ -480,7 +469,12 @@ def preview_tracks(df: pd.DataFrame, cols: list[str]):
 # Fixed training pipeline
 # =========================
 def build_pipeline() -> Pipeline:
-    """Impute numeric (median) + RandomForestRegressor with fixed tuned params."""
     model = RandomForestRegressor(**BEST_PARAMS)
     pipe = Pipeline(steps=[
         ("imputer", SimpleImputer(strategy="median")),
@@ -498,8 +492,11 @@ st.session_state.setdefault("dev_file_name","")
 st.session_state.setdefault("dev_file_bytes",b"")
 st.session_state.setdefault("dev_file_loaded",False)
 st.session_state.setdefault("dev_preview",False)
-st.session_state.setdefault("show_preview_modal", False)
-st.session_state.setdefault("fitted_model", None)
 # =========================
 # Sidebar branding
@@ -529,40 +526,49 @@ def sticky_header(title, message):
         unsafe_allow_html=True
     )
-def render_preview_top(book_bytes: bytes|None, upload_obj=None):
-    """Always render the Preview expander near the top (does not stop page)."""
-    if not st.session_state.show_preview_modal:
         return
-    book_to_preview = {}
-    if st.session_state.app_step == "dev":
-        book_to_preview = read_book_bytes(book_bytes or b"")
-    elif st.session_state.app_step in ["validate", "predict"] and upload_obj is not None:
-        book_to_preview = read_book_bytes(upload_obj.getvalue())
-    with st.expander("Preview data", expanded=True):
-        if not book_to_preview:
-            st.markdown('<div class="st-message-box">No data loaded yet.</div>', unsafe_allow_html=True)
-        else:
-            names = list(book_to_preview.keys())
-            tabs = st.tabs(names)
-            for t, name in zip(tabs, names):
-                with t:
-                    df = _normalize_columns(book_to_preview[name])
-                    t1, t2 = st.tabs(["Tracks", "Summary"])
-                    with t1:
-                        st.pyplot(preview_tracks(df, FEATURES), use_container_width=True)
-                    with t2:
-                        feat_present = [c for c in FEATURES if c in df.columns]
-                        if not feat_present:
-                            st.info("No feature columns found to summarize.")
-                        else:
-                            tbl = (
-                                df[feat_present]
-                                  .agg(['min','max','mean','std'])
-                                  .T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"})
-                                  .reset_index(names="Feature")
-                            )
-                            df_centered_rounded(tbl)
-    # keep it open; do not reset flag so user can collapse if they want
 # =========================
 # INTRO
@@ -572,7 +578,7 @@ if st.session_state.app_step == "intro":
     st.markdown(f"This software is developed by *Smart Thinking AI-Solutions Team* to estimate **Maximum Horizontal Stress** ({UNITS}) from drilling/offset data.")
     st.subheader("How It Works")
     st.markdown(
-        "1) **Upload your Train/Test file** and click **Run Model** to fit the baked-in pipeline.  \n"
         "2) **Validate** on held-out wells (with actual).  \n"
         "3) **Predict** on wells without actual."
     )
@@ -590,7 +596,7 @@ def _find_sheet(book, names):
 if st.session_state.app_step == "dev":
     st.sidebar.header("Case Building")
-    up = st.sidebar.file_uploader("Upload Your Data File (Train/Test sheets)", type=["xlsx","xls"])
     if up is not None:
         st.session_state.dev_file_bytes = up.getvalue()
         st.session_state.dev_file_name = up.name
@@ -604,18 +610,25 @@ if st.session_state.app_step == "dev":
             df0 = next(iter(tmp.values()))
             st.sidebar.caption(f"**Data loaded:** {st.session_state.dev_file_name} • {df0.shape[0]} rows × {df0.shape[1]} cols")
     if st.sidebar.button("Preview data", use_container_width=True, disabled=not st.session_state.dev_file_loaded):
-        st.session_state.show_preview_modal = True
-        st.session_state.dev_preview = True
     run = st.sidebar.button("Run Model", type="primary", use_container_width=True)
     if st.sidebar.button("Proceed to Validation ▶", use_container_width=True): st.session_state.app_step="validate"; st.rerun()
     if st.sidebar.button("Proceed to Prediction ▶", use_container_width=True): st.session_state.app_step="predict"; st.rerun()
-    sticky_header("Case Building", "Upload your data, preview it (top), then click **Run Model**.")
-    # Always render preview first (TOP)
-    render_preview_top(st.session_state.dev_file_bytes)
     if run and st.session_state.dev_file_bytes:
         book = read_book_bytes(st.session_state.dev_file_bytes)
@@ -625,9 +638,10 @@ if st.session_state.app_step == "dev":
             st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training and Test/Testing sheets.</div>', unsafe_allow_html=True)
             st.stop()
-        tr0 = _normalize_columns(book[sh_train].copy())
-        te0 = _normalize_columns(book[sh_test].copy())
         if not (ensure_cols(tr0, FEATURES+[TARGET]) and ensure_cols(te0, FEATURES+[TARGET])):
             st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True)
             st.stop()
@@ -639,8 +653,10 @@ if st.session_state.app_step == "dev":
         y_te = pd.to_numeric(te0[TARGET], errors="coerce")
         # Drop rows with NA in y
-        mask_tr = np.isfinite(y_tr); X_tr, y_tr = X_tr.loc[mask_tr], y_tr.loc[mask_tr]
-        mask_te = np.isfinite(y_te); X_te, y_te = X_te.loc[mask_te], y_te.loc[mask_te]
         pipe = build_pipeline()
         pipe.fit(X_tr, y_tr)
@@ -689,7 +705,7 @@ if st.session_state.app_step == "dev":
     if "Train" in st.session_state.results or "Test" in st.session_state.results:
         tab1, tab2 = st.tabs(["Training", "Testing"])
-        if "Train" in st.session_state.results:
             with tab1: _dev_block(st.session_state.results["Train"], st.session_state.results["m_train"])
         if "Test" in st.session_state.results:
             with tab2: _dev_block(st.session_state.results["Test"],  st.session_state.results["m_test"])
@@ -706,25 +722,29 @@ if st.session_state.app_step == "validate":
         if book:
             df0 = next(iter(book.values()))
             st.sidebar.caption(f"**Data loaded:** {up.name} • {df0.shape[0]} rows × {df0.shape[1]} cols")
     if st.sidebar.button("Preview data", use_container_width=True, disabled=(up is None)):
-        st.session_state.show_preview_modal = True
     go_btn = st.sidebar.button("Predict & Validate", type="primary", use_container_width=True)
     if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
     if st.sidebar.button("Proceed to Prediction ▶", use_container_width=True): st.session_state.app_step="predict"; st.rerun()
-    sticky_header("Validate the Model", "Upload a dataset with the same **features** and **MaxStress** to evaluate performance.")
-    # Preview on top
-    render_preview_top(None, upload_obj=up)
     if go_btn and up is not None:
         if st.session_state.fitted_model is None:
-            st.error("Please train the model first in Case Building."); st.stop()
         book = read_book_bytes(up.getvalue())
         names = list(book.keys())
         name = next((s for s in names if s.lower() in ("validation","validate","validation2","val","val2")), names[0])
-        df0 = _normalize_columns(book[name].copy())
         if not ensure_cols(df0, FEATURES+[TARGET]):
             st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
@@ -793,22 +813,26 @@ if st.session_state.app_step == "predict":
         if book:
             df0 = next(iter(book.values()))
             st.sidebar.caption(f"**Data loaded:** {up.name} • {df0.shape[0]} rows × {df0.shape[1]} cols")
     if st.sidebar.button("Preview data", use_container_width=True, disabled=(up is None)):
-        st.session_state.show_preview_modal = True
     go_btn = st.sidebar.button("Predict", type="primary", use_container_width=True)
     if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
     sticky_header("Prediction", "Upload a dataset with the 5 feature columns (no actual column).")
-    # Preview on top
-    render_preview_top(None, upload_obj=up)
     if go_btn and up is not None:
         if st.session_state.fitted_model is None:
-            st.error("Please train the model first in Case Building."); st.stop()
         book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
-        df0 = _normalize_columns(book[name].copy())
         if not ensure_cols(df0, FEATURES):
             st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
         df = df0.copy()

 APP_NAME = "ST_GeoMech_SHmax"
 TAGLINE  = "Real-Time Maximum Horizontal Stress Prediction"
+# -------- Canonical names (match your files) --------
 FEATURES   = ["Q (gpm)", "SPP (psi)", "T (kft.lbf)", "WOB (klbf)", "ROP (ft/h)"]
+TARGET     = "MaxStress_Actual"     # <-- matches your sheet (was 'MaxStress', causing the error)
 PRED_COL   = "SHmax_Pred"
 ACTUAL_COL = TARGET
 TRANSFORM  = "none"     # "none" | "log10" | "ln"
 UNITS      = "Psi"
+# ---- Fixed ("best") model params baked into the code ----
 BEST_PARAMS = dict(
     n_estimators=100,
     max_depth=22,
 ]
 # =========================
+# Password gate (same as shmin)
 # =========================
 def inline_logo(path="logo.png") -> str:
     try:
         required = st.secrets.get("APP_PASSWORD", "")
     except Exception:
         required = os.environ.get("APP_PASSWORD", "")
     if not required:
+        return  # no password configured
     if st.session_state.get("auth_ok", False):
         return
     st.sidebar.markdown(f"""
         <div class="centered-container">
             <img src="{inline_logo('logo.png')}" class="brand-logo">
             st.error("Incorrect key.")
     st.stop()
+add_password_gate()
 # =========================
 # Utilities
 def _nice_tick0(xmin: float, step: float = 0.1) -> float:
     return step * math.floor(xmin / step) if np.isfinite(xmin) else xmin
 # ---------- Transform helpers ----------
 def _inv_transform(x: np.ndarray, transform: str) -> np.ndarray:
     t = (transform or "none").lower()
                        disabled=(data is None), key=f"download_{phase_key}")
 # =========================
+# Plots (no decimals on X)
 # =========================
 def cross_plot_static(actual, pred):
     a = pd.Series(actual, dtype=float)
     ax.set_xticks(ticks);  ax.set_yticks(ticks)
     ax.set_aspect("equal", adjustable="box")
+    fmt = FuncFormatter(lambda x, _: f"{x:.0f}")  # no decimals
     ax.xaxis.set_major_formatter(fmt); ax.yaxis.set_major_formatter(fmt)
     ax.set_xlabel(f"Actual Max Stress ({UNITS})",  fontweight="bold", fontsize=10, color="black")
                     bgcolor="rgba(255,255,255,0.75)", bordercolor="#ccc", borderwidth=1),
         legend_title_text=""
     )
     fig.update_xaxes(
         title_text=f"Max Stress ({UNITS})",
         title_font=dict(size=20, family=BOLD_FONT, color="#000"),
         tickfont=dict(size=15, family=BOLD_FONT, color="#000"),
         side="top", range=[xmin, xmax],
+        ticks="outside",
+        tickformat=",.0f",      # <— no decimals on ticks
+        tickmode="auto", tick0=tick0,
         showline=True, linewidth=1.2, linecolor="#444", mirror=True,
         showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True
     )
     if n == 0:
         fig, ax = plt.subplots(figsize=(4, 2))
         ax.text(0.5, 0.5, "No selected columns", ha="center", va="center")
+        ax.axis("off")
+        return fig
     depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
     if depth_col is not None:
     col_colors = {col: cmap(i % cmap.N) for i, col in enumerate(cols)}
     fig, axes = plt.subplots(1, n, figsize=(2.4 * n, 7.0), sharey=True, dpi=100)
+    if n == 1:
+        axes = [axes]
     for i, (ax, col) in enumerate(zip(axes, cols)):
         x = pd.to_numeric(df[col], errors="coerce")
         ax.plot(x, idx, '-', lw=1.6, color=col_colors[col])
         ax.set_xlabel(col); ax.xaxis.set_label_position('top'); ax.xaxis.tick_top()
         ax.set_ylim(y_max, y_min)  # reversed depth down
         ax.grid(True, linestyle=":", alpha=0.3)
+        if i == 0:
+            ax.set_ylabel(y_label)
         else:
             ax.tick_params(labelleft=False); ax.set_ylabel("")
     fig.tight_layout()
     return fig
 # Fixed training pipeline
 # =========================
 def build_pipeline() -> Pipeline:
+    """
+    Fixed, optimized pipeline:
+      - Numeric imputation (median)
+      - RandomForestRegressor with tuned params (BEST_PARAMS)
+    Trees don't need scaling; robust to feature distributions.
+    """
     model = RandomForestRegressor(**BEST_PARAMS)
     pipe = Pipeline(steps=[
         ("imputer", SimpleImputer(strategy="median")),
 st.session_state.setdefault("dev_file_bytes",b"")
 st.session_state.setdefault("dev_file_loaded",False)
 st.session_state.setdefault("dev_preview",False)
+st.session_state.setdefault("fitted_model", None)      # cache trained pipeline
+# NEW: persistent top-of-page preview panel state (same as shmin)
+st.session_state.setdefault("show_preview_panel", False)
+st.session_state.setdefault("preview_book", {})        # parsed Excel sheets to preview
 # =========================
 # Sidebar branding
         unsafe_allow_html=True
     )
+# ---------- Top-of-page Preview Panel ----------
+def render_preview_panel():
+    """If enabled, draws a preview panel at the very top of the page."""
+    if not st.session_state.get("show_preview_panel"):
+        return
+    st.markdown("## 🔎 Data preview")
+    book = st.session_state.get("preview_book", {}) or {}
+    if not book:
+        st.info("No data loaded yet.")
+        col = st.columns(2)[1]
+        with col:
+            if st.button("Hide preview"):
+                st.session_state.show_preview_panel = False
+                st.session_state.preview_book = {}
+                st.rerun()
         return
+    names = list(book.keys())
+    tabs = st.tabs(names + ["✖ Hide preview"])
+    for i, name in enumerate(names):
+        with tabs[i]:
+            df = book[name]
+            t1, t2 = st.tabs(["Tracks", "Summary"])
+            with t1:
+                st.pyplot(preview_tracks(df, FEATURES), use_container_width=True)
+            with t2:
+                feat_present = [c for c in FEATURES if c in df.columns]
+                if not feat_present:
+                    st.info("No feature columns found to summarize.")
+                else:
+                    tbl = (
+                        df[feat_present]
+                          .agg(['min','max','mean','std'])
+                          .T.rename(columns={"Min":"Min","Max":"Max","mean":"Mean","std":"Std"})
+                          .reset_index(names="Feature")
+                    )
+                    df_centered_rounded(tbl)
+    with tabs[-1]:
+        if st.button("Hide preview", use_container_width=True):
+            st.session_state.show_preview_panel = False
+            st.session_state.preview_book = {}
+            st.rerun()
 # =========================
 # INTRO
     st.markdown(f"This software is developed by *Smart Thinking AI-Solutions Team* to estimate **Maximum Horizontal Stress** ({UNITS}) from drilling/offset data.")
     st.subheader("How It Works")
     st.markdown(
+        "1) **Upload your data file** and click **Run Model** to fit the baked-in pipeline.  \n"
         "2) **Validate** on held-out wells (with actual).  \n"
         "3) **Predict** on wells without actual."
     )
 if st.session_state.app_step == "dev":
     st.sidebar.header("Case Building")
+    up = st.sidebar.file_uploader("Upload Your Data File", type=["xlsx","xls"])
     if up is not None:
         st.session_state.dev_file_bytes = up.getvalue()
         st.session_state.dev_file_name = up.name
             df0 = next(iter(tmp.values()))
             st.sidebar.caption(f"**Data loaded:** {st.session_state.dev_file_name} • {df0.shape[0]} rows × {df0.shape[1]} cols")
+    # PREVIEW button -> show preview panel at top
     if st.sidebar.button("Preview data", use_container_width=True, disabled=not st.session_state.dev_file_loaded):
+        st.session_state.preview_book = read_book_bytes(st.session_state.dev_file_bytes) if st.session_state.dev_file_bytes else {}
+        st.session_state.show_preview_panel = True
+        st.rerun()
     run = st.sidebar.button("Run Model", type="primary", use_container_width=True)
     if st.sidebar.button("Proceed to Validation ▶", use_container_width=True): st.session_state.app_step="validate"; st.rerun()
     if st.sidebar.button("Proceed to Prediction ▶", use_container_width=True): st.session_state.app_step="predict"; st.rerun()
+    if st.session_state.dev_file_loaded and st.session_state.show_preview_panel:
+        sticky_header("Case Building", "Previewed ✓ — now click **Run Model**.")
+    elif st.session_state.dev_file_loaded:
+        sticky_header("Case Building", "📄 **Preview uploaded data** using the sidebar button, then click **Run Model**.")
+    else:
+        sticky_header("Case Building", "**Upload your data to build a case, then run the model to review performance.**")
+    # Render the preview panel at the very top (above results)
+    render_preview_panel()
     if run and st.session_state.dev_file_bytes:
         book = read_book_bytes(st.session_state.dev_file_bytes)
             st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training and Test/Testing sheets.</div>', unsafe_allow_html=True)
             st.stop()
+        tr0 = book[sh_train].copy()
+        te0 = book[sh_test].copy()
+        # Ensure columns exist
         if not (ensure_cols(tr0, FEATURES+[TARGET]) and ensure_cols(te0, FEATURES+[TARGET])):
             st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True)
             st.stop()
         y_te = pd.to_numeric(te0[TARGET], errors="coerce")
         # Drop rows with NA in y
+        mask_tr = np.isfinite(y_tr)
+        X_tr, y_tr = X_tr.loc[mask_tr], y_tr.loc[mask_tr]
+        mask_te = np.isfinite(y_te)
+        X_te, y_te = X_te.loc[mask_te], y_te.loc[mask_te]
         pipe = build_pipeline()
         pipe.fit(X_tr, y_tr)
     if "Train" in st.session_state.results or "Test" in st.session_state.results:
         tab1, tab2 = st.tabs(["Training", "Testing"])
+        if "Train" in st.session_state.results:
             with tab1: _dev_block(st.session_state.results["Train"], st.session_state.results["m_train"])
         if "Test" in st.session_state.results:
             with tab2: _dev_block(st.session_state.results["Test"],  st.session_state.results["m_test"])
         if book:
             df0 = next(iter(book.values()))
             st.sidebar.caption(f"**Data loaded:** {up.name} • {df0.shape[0]} rows × {df0.shape[1]} cols")
+    # PREVIEW button -> show preview panel at top
     if st.sidebar.button("Preview data", use_container_width=True, disabled=(up is None)):
+        st.session_state.preview_book = read_book_bytes(up.getvalue()) if up is not None else {}
+        st.session_state.show_preview_panel = True
+        st.rerun()
     go_btn = st.sidebar.button("Predict & Validate", type="primary", use_container_width=True)
     if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
     if st.sidebar.button("Proceed to Prediction ▶", use_container_width=True): st.session_state.app_step="predict"; st.rerun()
+    sticky_header("Validate the Model", "Upload a dataset with the same **features** and **MaxStress_Actual** to evaluate performance.")
+    render_preview_panel()  # top-of-page preview
     if go_btn and up is not None:
         if st.session_state.fitted_model is None:
+            st.error("Please train the model first in Case Building.")
+            st.stop()
         book = read_book_bytes(up.getvalue())
         names = list(book.keys())
         name = next((s for s in names if s.lower() in ("validation","validate","validation2","val","val2")), names[0])
+        df0 = book[name].copy()
         if not ensure_cols(df0, FEATURES+[TARGET]):
             st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
         if book:
             df0 = next(iter(book.values()))
             st.sidebar.caption(f"**Data loaded:** {up.name} • {df0.shape[0]} rows × {df0.shape[1]} cols")
+    # PREVIEW button -> show preview panel at top
     if st.sidebar.button("Preview data", use_container_width=True, disabled=(up is None)):
+        st.session_state.preview_book = read_book_bytes(up.getvalue()) if up is not None else {}
+        st.session_state.show_preview_panel = True
+        st.rerun()
     go_btn = st.sidebar.button("Predict", type="primary", use_container_width=True)
     if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
     sticky_header("Prediction", "Upload a dataset with the 5 feature columns (no actual column).")
+    render_preview_panel()  # top-of-page preview
     if go_btn and up is not None:
         if st.session_state.fitted_model is None:
+            st.error("Please train the model first in Case Building.")
+            st.stop()
         book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
+        df0 = book[name].copy()
         if not ensure_cols(df0, FEATURES):
             st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
         df = df0.copy()