Fracture_Pressure

Paused

App Files Files Community

UCS2014 commited on Sep 12, 2025

Commit

4d43e75

verified ·

1 Parent(s): fa1bd3b

Update app.py

Browse files

Files changed (1) hide show

app.py +451 -174

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # app.py — ST_Min_Horizontal_Stress (σhmin)
-# Streamlit app that LOADS THE MODEL/META FROM USER UPLOADS (memory only; no auth, no saving).
-# After the model is in memory, the rest of the workflow (Train/Test/Validate/Predict) is unchanged.
 import io, json, os, base64, math
 from pathlib import Path
@@ -29,7 +29,7 @@ TAGLINE  = "Real-Time Minimum Horizontal Stress Prediction"
 FEATURES   = ["Q (gpm)", "SPP (psi)", "ST (kft.lbf)", "WOB (klbf)", "ROP (ft/h)"]
 TARGET     = "σhmin (MPa)"
 PRED_COL   = "σhmin_Pred"
-ACTUAL_COL = None               # If your workbook has a separate actual column, set it via meta.json (actual_col)
 TRANSFORM  = "none"             # "none" | "log10" | "ln"
 UNITS      = "MPa"
@@ -42,6 +42,9 @@ BOLD_FONT = "Arial Black, Arial, sans-serif"
 STRICT_VERSION_CHECK = True
 # =========================
 # Page / CSS
 # =========================
@@ -71,7 +74,7 @@ TABLE_CENTER_CSS = [
 ]
 # =========================
-# Password gate (optional)
 # =========================
 def inline_logo(path="logo.png") -> str:
     try:
@@ -88,8 +91,8 @@ def add_password_gate() -> None:
         required = os.environ.get("APP_PASSWORD", "")
     if not required:
-        return  # disable gate if no password set
     if st.session_state.get("auth_ok", False):
         return
@@ -205,6 +208,266 @@ def _make_X(df: pd.DataFrame, features: list[str]) -> pd.DataFrame:
         X[c] = pd.to_numeric(X[c], errors="coerce")
     return X
 # =========================
 # Session state
 # =========================
@@ -216,12 +479,9 @@ st.session_state.setdefault("dev_file_bytes",b"")
 st.session_state.setdefault("dev_file_loaded",False)
 st.session_state.setdefault("dev_preview",False)
 st.session_state.setdefault("show_preview_modal", False)
-st.session_state.setdefault("model_loaded", False)
-st.session_state.setdefault("model_obj", None)
-st.session_state.setdefault("meta_dict", {})
 # =========================
-# Sidebar: branding + model upload
 # =========================
 st.sidebar.markdown(f"""
     <div class="centered-container">
@@ -231,68 +491,6 @@ st.sidebar.markdown(f"""
     </div>
 """, unsafe_allow_html=True)
-with st.sidebar.expander("① Load model (upload)", expanded=True):
-    up_model = st.file_uploader("Model file (.joblib)", type=["joblib","pkl"], key="mdl_up")
-    up_meta  = st.file_uploader("Meta file (.json)", type=["json"], key="meta_up")
-    load_btn = st.button("Load model", type="primary")
-    if load_btn:
-        if not up_model:
-            st.error("Please upload the model .joblib file.")
-            st.stop()
-        try:
-            st.session_state.model_obj = joblib.load(io.BytesIO(up_model.getvalue()))
-            st.session_state.model_loaded = True
-        except Exception as e:
-            st.error(f"Failed to load model: {e}")
-            st.stop()
-        if up_meta:
-            try:
-                st.session_state.meta_dict = json.loads(up_meta.getvalue().decode("utf-8"))
-            except Exception as e:
-                st.warning(f"Could not parse meta.json: {e}")
-                st.session_state.meta_dict = {}
-        else:
-            st.warning("No meta.json uploaded — using app defaults.")
-            st.session_state.meta_dict = {}
-        st.success("Model loaded in memory ✓")
-# Apply meta (if provided)
-meta = st.session_state.meta_dict
-if meta:
-    FEATURES   = meta.get("features", FEATURES)
-    TARGET     = meta.get("target", TARGET)
-    PRED_COL   = meta.get("pred_col", PRED_COL)
-    ACTUAL_COL = meta.get("actual_col", ACTUAL_COL)
-    TRANSFORM  = meta.get("transform", TRANSFORM)
-    UNITS      = meta.get("units", UNITS)
-    ALIASES    = meta.get("feature_aliases")
-    if STRICT_VERSION_CHECK and meta.get("versions"):
-        import numpy as _np, sklearn as _skl
-        mv = meta["versions"]; msg=[]
-        if mv.get("numpy") and mv["numpy"] != _np.__version__:
-            msg.append(f"NumPy {mv['numpy']} expected, running {_np.__version__}")
-        if mv.get("scikit_learn") and mv["scikit_learn"] != _skl.__version__:
-            msg.append(f"scikit-learn {mv['scikit_learn']} expected, running {_skl.__version__}")
-        if msg:
-            st.warning("Environment mismatch: " + " | ".join(msg))
-else:
-    ALIASES = None
-# Guard: require model first
-if not st.session_state.model_loaded:
-    st.header("Welcome!")
-    st.info("Upload your **model** (.joblib) and optional **meta.json** in the left sidebar, then click **Load model**.")
-    st.stop()
-# Keep a short alias
-model = st.session_state.model_obj
-# =========================
-# Sticky header helper
-# =========================
 def sticky_header(title, message):
     st.markdown(
         f"""
@@ -310,20 +508,161 @@ def sticky_header(title, message):
         unsafe_allow_html=True
     )
 # =========================
 # INTRO
 # =========================
 if st.session_state.app_step == "intro":
-    st.header("Model ready ✓")
     st.markdown(
-        f"This software estimates **Minimum Horizontal Stress** ({UNITS}). "
-        "Now build a case, validate, or predict."
     )
     if st.button("Start Showcase", type="primary"):
         st.session_state.app_step = "dev"; st.rerun()
 # =========================
-# CASE BUILDING (Train/Test)
 # =========================
 def _find_sheet(book, names):
     low2orig = {k.lower(): k for k in book.keys()}
@@ -332,8 +671,8 @@ def _find_sheet(book, names):
     return None
 if st.session_state.app_step == "dev":
-    st.sidebar.header("② Case Building")
-    up = st.sidebar.file_uploader("Upload Train/Test Excel", type=["xlsx","xls"])
     if up is not None:
         st.session_state.dev_file_bytes = up.getvalue()
         st.session_state.dev_file_name = up.name
@@ -368,8 +707,8 @@ if st.session_state.app_step == "dev":
             st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training and Test/Testing sheets.</div>', unsafe_allow_html=True)
             st.stop()
-        tr0 = _normalize_columns(book[sh_train].copy(), FEATURES, TARGET, ALIASES)
-        te0 = _normalize_columns(book[sh_test].copy(),  FEATURES, TARGET, ALIASES)
         actual_col = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in tr0.columns) else TARGET
         if not (ensure_cols(tr0, FEATURES+[actual_col]) and ensure_cols(te0, FEATURES+[actual_col])):
@@ -393,8 +732,8 @@ if st.session_state.app_step == "dev":
             "MAPE%": mape(te[actual_col], te[PRED_COL]),
         }
-        tr_min = tr[FEATURES].min().to_dict(); tr_max = tr[FEATURES].max().to_dict()
-        st.session_state.train_ranges = {f:(float(tr_min[f]), float(tr_max[f])) for f in FEATURES}
         st.markdown('<div class="st-message-box st-success">Case has been built and results are displayed below.</div>', unsafe_allow_html=True)
     def _dev_block(df, m):
@@ -414,8 +753,8 @@ if st.session_state.app_step == "dev":
             st.plotly_chart(track_plot(df, include_actual=True),
                             use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
         with col_cross:
-            act_col = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in df.columns) else TARGET
-            st.pyplot(cross_plot_static(df[act_col], df[PRED_COL]), use_container_width=False)
     if "Train" in st.session_state.results or "Test" in st.session_state.results:
         tab1, tab2 = st.tabs(["Training", "Testing"])
@@ -423,61 +762,13 @@ if st.session_state.app_step == "dev":
             with tab1: _dev_block(st.session_state.results["Train"], st.session_state.results["m_train"])
         if "Test" in st.session_state.results:
             with tab2: _dev_block(st.session_state.results["Test"],  st.session_state.results["m_test"])
-        # Export
-        st.divider()
-        st.markdown("### Export to Excel")
-        options = ["Training","Training_Metrics","Training_Summary","Testing","Testing_Metrics","Testing_Summary","Info"]
-        selected = st.multiselect("Sheets to include", options=options, default=[])
-        def _summary_table(df: pd.DataFrame, cols: list[str]) -> pd.DataFrame:
-            cols = [c for c in cols if c in df.columns]
-            if not cols: return pd.DataFrame()
-            tbl = (df[cols].agg(['min','max','mean','std'])
-                   .T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"})
-                   .reset_index(names="Field"))
-            return _round_numeric(tbl, 3)
-        def build_export(selected: list[str]) -> tuple[bytes|None, str|None]:
-            res = st.session_state.get("results", {})
-            if not res: return None, None
-            sheets, order = {}, []
-            def _add(n, d):
-                if isinstance(d, pd.DataFrame) and not d.empty: sheets[n]=_round_numeric(d,3); order.append(n)
-            if "Training" in selected and "Train" in res: _add("Training", res["Train"])
-            if "Training_Metrics" in selected and res.get("m_train"): _add("Training_Metrics", pd.DataFrame([res["m_train"]]))
-            if "Training_Summary" in selected and "Train" in res:
-                tr_cols = FEATURES + [c for c in [TARGET, PRED_COL] if c in res["Train"].columns]
-                _add("Training_Summary", _summary_table(res["Train"], tr_cols))
-            if "Testing" in selected and "Test" in res: _add("Testing", res["Test"])
-            if "Testing_Metrics" in selected and res.get("m_test"): _add("Testing_Metrics", pd.DataFrame([res["m_test"]]))
-            if "Testing_Summary" in selected and "Test" in res:
-                te_cols = FEATURES + [c for c in [TARGET, PRED_COL] if c in res["Test"].columns]
-                _add("Testing_Summary", _summary_table(res["Test"], te_cols))
-            if "Info" in selected:
-                info = pd.DataFrame([
-                    {"Key":"AppName","Value":APP_NAME},
-                    {"Key":"Tagline","Value":TAGLINE},
-                    {"Key":"Target","Value":TARGET},
-                    {"Key":"PredColumn","Value":PRED_COL},
-                    {"Key":"Features","Value":", ".join(FEATURES)},
-                    {"Key":"ExportedAt","Value":datetime.now().strftime("%Y-%m-%d %H:%M:%S")},
-                ])
-                _add("Info", info)
-            if not order: return None, None
-            bio = io.BytesIO()
-            with pd.ExcelWriter(bio, engine=_excel_engine()) as w:
-                for name in order:
-                    df = sheets[name]; df.to_excel(w, sheet_name=_excel_safe_name(name), index=False)
-            bio.seek(0)
-            return bio.getvalue(), f"MinStress_Export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
-        data, fname = build_export(selected)
-        st.download_button("⬇️ Export Excel", data=(data or b""), file_name=(fname or "MinStress_Export.xlsx"),
-                           mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
-                           disabled=(data is None))
 # =========================
 # VALIDATION (with actual)
 # =========================
 if st.session_state.app_step == "validate":
-    st.sidebar.header("③ Validate the Model")
     up = st.sidebar.file_uploader("Upload Validation Excel", type=["xlsx","xls"])
     if up is not None:
         book = read_book_bytes(up.getvalue())
@@ -496,10 +787,12 @@ if st.session_state.app_step == "validate":
         book = read_book_bytes(up.getvalue())
         names = list(book.keys())
         name = next((s for s in names if s.lower() in ("validation","validate","validation2","val","val2")), names[0])
-        df0 = _normalize_columns(book[name].copy(), FEATURES, TARGET, ALIASES)
-        act_col = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in df0.columns) else TARGET
-        if not ensure_cols(df0, FEATURES+[act_col]):
             st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
         df = df0.copy()
         df[PRED_COL] = _inv_transform(model.predict(_make_X(df0, FEATURES)), TRANSFORM)
         st.session_state.results["Validate"] = df
@@ -517,9 +810,9 @@ if st.session_state.app_step == "validate":
                 )
         st.session_state.results["m_val"] = {
-            "R":     pearson_r(df[act_col], df[PRED_COL]),
-            "RMSE":  rmse(df[act_col], df[PRED_COL]),
-            "MAPE%": mape(df[act_col], df[PRED_COL]),
         }
         st.session_state.results["sv_val"] = {"n":len(df), "pred_min":float(df[PRED_COL].min()), "pred_max":float(df[PRED_COL].max()), "oor":oor_pct}
         st.session_state.results["oor_tbl"] = tbl
@@ -541,28 +834,24 @@ if st.session_state.app_step == "validate":
             st.plotly_chart(track_plot(st.session_state.results["Validate"], include_actual=True),
                             use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
         with col_cross:
-            act_col = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in st.session_state.results["Validate"].columns) else TARGET
-            st.pyplot(cross_plot_static(st.session_state.results["Validate"][act_col],
                                         st.session_state.results["Validate"][PRED_COL]),
                       use_container_width=False)
-        # Export button
-        st.divider()
-        val_tbl = st.session_state.results["Validate"]
-        bio = io.BytesIO()
-        with pd.ExcelWriter(bio, engine=_excel_engine()) as w:
-            val_tbl.to_excel(w, sheet_name="Validation", index=False)
-            pd.DataFrame([m]).to_excel(w, sheet_name="Validation_Metrics", index=False)
-        bio.seek(0)
-        st.download_button("⬇️ Export Excel", data=bio.getvalue(),
-                           file_name=f"Validation_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx",
-                           mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
 # =========================
 # PREDICTION (no actual)
 # =========================
 if st.session_state.app_step == "predict":
-    st.sidebar.header("④ Prediction (No Actual)")
     up = st.sidebar.file_uploader("Upload Prediction Excel", type=["xlsx","xls"])
     if up is not None:
         book = read_book_bytes(up.getvalue())
@@ -578,7 +867,7 @@ if st.session_state.app_step == "predict":
     if go_btn and up is not None:
         book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
-        df0 = _normalize_columns(book[name].copy(), FEATURES, TARGET, ALIASES)
         if not ensure_cols(df0, FEATURES):
             st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
         df = df0.copy()
@@ -613,6 +902,7 @@ if st.session_state.app_step == "predict":
         with col_right:
             st.plotly_chart(track_plot(df, include_actual=False),
                             use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
 # =========================
 # Preview modal
@@ -632,36 +922,23 @@ if st.session_state.show_preview_modal:
             tabs = st.tabs(names)
             for t, name in zip(tabs, names):
                 with t:
-                    df = _normalize_columns(book_to_preview[name], FEATURES, TARGET, ALIASES)
                     t1, t2 = st.tabs(["Tracks", "Summary"])
                     with t1:
-                        # small quick-look plot of the features
-                        cols = [c for c in FEATURES if c in df.columns]
-                        if not cols:
-                            st.info("No feature columns to preview.")
-                        else:
-                            idx = np.arange(1, len(df)+1)
-                            fig, axes = plt.subplots(1, len(cols), figsize=(2.4*len(cols), 7.0), sharey=True, dpi=100)
-                            if len(cols)==1: axes=[axes]
-                            for ax, col in zip(axes, cols):
-                                x = pd.to_numeric(df[col], errors="coerce")
-                                ax.plot(x, idx, '-', lw=1.6)
-                                ax.set_xlabel(col); ax.xaxis.set_label_position('top'); ax.xaxis.tick_top()
-                                ax.set_ylim(idx.max(), idx.min()); ax.grid(True, linestyle=":", alpha=0.3)
-                            fig.tight_layout()
-                            st.pyplot(fig, use_container_width=True)
                     with t2:
-                        cols = [c for c in FEATURES if c in df.columns]
-                        if not cols:
                             st.info("No feature columns found to summarize.")
                         else:
                             tbl = (
-                                df[cols]
                                   .agg(['min','max','mean','std'])
                                   .T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"})
                                   .reset_index(names="Feature")
                             )
                             df_centered_rounded(tbl)
     st.session_state.show_preview_modal = False
 # =========================

 # app.py — ST_Min_Horizontal_Stress (σhmin)
+# Full Streamlit app — trains the model inside the app (fixed best params or optional GridSearch).
+# No external model file is required. Users can still download the trained .joblib + meta.json.
 import io, json, os, base64, math
 from pathlib import Path
 FEATURES   = ["Q (gpm)", "SPP (psi)", "ST (kft.lbf)", "WOB (klbf)", "ROP (ft/h)"]
 TARGET     = "σhmin (MPa)"
 PRED_COL   = "σhmin_Pred"
+ACTUAL_COL = None               # If your workbook has a separate actual column, set via meta.json (actual_col)
 TRANSFORM  = "none"             # "none" | "log10" | "ln"
 UNITS      = "MPa"
 STRICT_VERSION_CHECK = True
+# Local (optional) — only used for Excel export helper sizing
+MODELS_DIR = Path("models")
 # =========================
 # Page / CSS
 # =========================
 ]
 # =========================
+# Password gate
 # =========================
 def inline_logo(path="logo.png") -> str:
     try:
         required = os.environ.get("APP_PASSWORD", "")
     if not required:
+        st.warning("Set APP_PASSWORD in Secrets (or environment) and restart.")
+        st.stop()
     if st.session_state.get("auth_ok", False):
         return
         X[c] = pd.to_numeric(X[c], errors="coerce")
     return X
+# =========================
+# Export helpers
+# =========================
+def _summary_table(df: pd.DataFrame, cols: list[str]) -> pd.DataFrame:
+    cols = [c for c in cols if c in df.columns]
+    if not cols: return pd.DataFrame()
+    tbl = (df[cols]
+           .agg(['min','max','mean','std'])
+           .T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"})
+           .reset_index(names="Field"))
+    return _round_numeric(tbl, 3)
+def _train_ranges_df(ranges: dict[str, tuple[float, float]]) -> pd.DataFrame:
+    if not ranges: return pd.DataFrame()
+    df = pd.DataFrame(ranges).T.reset_index()
+    df.columns = ["Feature", "Min", "Max"]
+    return _round_numeric(df, 3)
+def _excel_autofit(writer, sheet_name: str, df: pd.DataFrame, min_w: int = 8, max_w: int = 40):
+    try:
+        import xlsxwriter  # noqa: F401
+    except Exception:
+        return
+    ws = writer.sheets[sheet_name]
+    for i, col in enumerate(df.columns):
+        series = df[col].astype(str)
+        max_len = max([len(str(col))] + series.map(len).tolist())
+        ws.set_column(i, i, max(min_w, min(max_len + 2, max_w)))
+    ws.freeze_panes(1, 0)
+def _available_sections() -> list[str]:
+    res = st.session_state.get("results", {})
+    sections = []
+    if "Train" in res:       sections += ["Training","Training_Metrics","Training_Summary"]
+    if "Test" in res:        sections += ["Testing","Testing_Metrics","Testing_Summary"]
+    if "Validate" in res:    sections += ["Validation","Validation_Metrics","Validation_Summary","Validation_OOR"]
+    if "PredictOnly" in res: sections += ["Prediction","Prediction_Summary"]
+    if st.session_state.get("train_ranges"): sections += ["Training_Ranges"]
+    sections += ["Info"]
+    return sections
+def build_export_workbook(selected: list[str], ndigits: int = 3, do_autofit: bool = True) -> tuple[bytes|None, str|None, list[str]]:
+    res = st.session_state.get("results", {})
+    if not res: return None, None, []
+    sheets: dict[str, pd.DataFrame] = {}
+    order: list[str] = []
+    def _add(name: str, df: pd.DataFrame):
+        if df is None or (isinstance(df, pd.DataFrame) and df.empty): return
+        sheets[name] = _round_numeric(df, ndigits); order.append(name)
+    if "Training" in selected and "Train" in res: _add("Training", res["Train"])
+    if "Training_Metrics" in selected and res.get("m_train"): _add("Training_Metrics", pd.DataFrame([res["m_train"]]))
+    if "Training_Summary" in selected and "Train" in res:
+        tr_cols = FEATURES + [c for c in [TARGET, PRED_COL] if c in res["Train"].columns]
+        _add("Training_Summary", _summary_table(res["Train"], tr_cols))
+    if "Testing" in selected and "Test" in res: _add("Testing", res["Test"])
+    if "Testing_Metrics" in selected and res.get("m_test"): _add("Testing_Metrics", pd.DataFrame([res["m_test"]]))
+    if "Testing_Summary" in selected and "Test" in res:
+        te_cols = FEATURES + [c for c in [TARGET, PRED_COL] if c in res["Test"].columns]
+        _add("Testing_Summary", _summary_table(res["Test"], te_cols))
+    if "Validation" in selected and "Validate" in res: _add("Validation", res["Validate"])
+    if "Validation_Metrics" in selected and res.get("m_val"): _add("Validation_Metrics", pd.DataFrame([res["m_val"]]))
+    if "Validation_Summary" in selected and res.get("sv_val"): _add("Validation_Summary", pd.DataFrame([res["sv_val"]]))
+    if "Validation_OOR" in selected and isinstance(res.get("oor_tbl"), pd.DataFrame) and not res["oor_tbl"].empty:
+        _add("Validation_OOR", res["oor_tbl"].reset_index(drop=True))
+    if "Prediction" in selected and "PredictOnly" in res: _add("Prediction", res["PredictOnly"])
+    if "Prediction_Summary" in selected and res.get("sv_pred"): _add("Prediction_Summary", pd.DataFrame([res["sv_pred"]]))
+    if "Training_Ranges" in selected and st.session_state.get("train_ranges"):
+        _add("Training_Ranges", _train_ranges_df(st.session_state["train_ranges"]))
+    if "Info" in selected:
+        info = pd.DataFrame([
+            {"Key": "AppName",    "Value": APP_NAME},
+            {"Key": "Tagline",    "Value": TAGLINE},
+            {"Key": "Target",     "Value": TARGET},
+            {"Key": "PredColumn", "Value": PRED_COL},
+            {"Key": "Features",   "Value": ", ".join(FEATURES)},
+            {"Key": "ExportedAt", "Value": datetime.now().strftime("%Y-%m-%d %H:%M:%S")},
+        ])
+        _add("Info", info)
+    if not order: return None, None, []
+    bio = io.BytesIO()
+    engine = _excel_engine()
+    with pd.ExcelWriter(bio, engine=engine) as writer:
+        for name in order:
+            df = sheets[name]; sheet = _excel_safe_name(name)
+            df.to_excel(writer, sheet_name=sheet, index=False)
+            if do_autofit: _excel_autofit(writer, sheet, df)
+    bio.seek(0)
+    fname = f"MinStress_Export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
+    return bio.getvalue(), fname, order
+def render_export_button(phase_key: str) -> None:
+    res = st.session_state.get("results", {})
+    if not res: return
+    st.divider()
+    st.markdown("### Export to Excel")
+    options = _available_sections()
+    selected_sheets = st.multiselect(
+        "Sheets to include",
+        options=options,
+        default=[],
+        placeholder="Choose option(s)",
+        help="Pick the sheets you want in the Excel export.",
+        key=f"sheets_{phase_key}",
+    )
+    if not selected_sheets:
+        st.caption("Select one or more sheets above to enable export.")
+        st.download_button("⬇️ Export Excel", data=b"", file_name="MinStress_Export.xlsx",
+                           mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+                           disabled=True, key=f"download_{phase_key}")
+        return
+    data, fname, names = build_export_workbook(selected=selected_sheets, ndigits=3, do_autofit=True)
+    if names: st.caption("Will include: " + ", ".join(names))
+    st.download_button("⬇️ Export Excel", data=(data or b""), file_name=(fname or "MinStress_Export.xlsx"),
+                       mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+                       disabled=(data is None), key=f"download_{phase_key}")
+# =========================
+# Plots
+# =========================
+def cross_plot_static(actual, pred):
+    a = pd.Series(actual, dtype=float)
+    p = pd.Series(pred,   dtype=float)
+    lo = float(min(a.min(), p.min())); hi = float(max(a.max(), p.max()))
+    pad = 0.03 * (hi - lo if hi > lo else 1.0)
+    lo2, hi2 = lo - pad, hi + pad
+    ticks = np.linspace(lo2, hi2, 5)
+    dpi = 110
+    fig, ax = plt.subplots(figsize=(CROSS_W / dpi, CROSS_H / dpi), dpi=dpi, constrained_layout=False)
+    ax.scatter(a, p, s=14, c=COLORS["pred"], alpha=0.9, linewidths=0)
+    ax.plot([lo2, hi2], [lo2, hi2], linestyle="--", linewidth=1.2, color=COLORS["ref"])
+    ax.set_xlim(lo2, hi2); ax.set_ylim(lo2, hi2)
+    ax.set_xticks(ticks);  ax.set_yticks(ticks)
+    ax.set_aspect("equal", adjustable="box")
+    fmt = FuncFormatter(lambda x, _: f"{x:.2f}")
+    ax.xaxis.set_major_formatter(fmt); ax.yaxis.set_major_formatter(fmt)
+    ax.set_xlabel(f"Actual Min Stress ({UNITS})",  fontweight="bold", fontsize=10, color="black")
+    ax.set_ylabel(f"Predicted Min Stress ({UNITS})", fontweight="bold", fontsize=10, color="black")
+    ax.tick_params(labelsize=6, colors="black")
+    ax.grid(True, linestyle=":", alpha=0.3)
+    for spine in ax.spines.values():
+        spine.set_linewidth(1.1); spine.set_color("#444")
+    fig.subplots_adjust(left=0.16, bottom=0.16, right=0.98, top=0.98)
+    return fig
+def track_plot(df, include_actual=True):
+    depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
+    if depth_col is not None:
+        y = pd.to_numeric(df[depth_col], errors="coerce"); ylab = depth_col
+        y_range = [float(np.nanmax(y)), float(np.nanmin(y))]  # reversed
+    else:
+        y = pd.Series(np.arange(1, len(df) + 1)); ylab = "Point Index"
+        y_range = [float(y.max()), float(y.min())]
+    x_series = pd.Series(df.get(PRED_COL, pd.Series(dtype=float))).astype(float)
+    act_col = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in df.columns) else TARGET
+    if include_actual and act_col in df.columns:
+        x_series = pd.concat([x_series, pd.Series(df[act_col]).astype(float)], ignore_index=True)
+    x_lo, x_hi = float(x_series.min()), float(x_series.max())
+    x_pad = 0.03 * (x_hi - x_lo if x_hi > x_lo else 1.0)
+    xmin, xmax = x_lo - x_pad, x_hi + x_pad
+    tick0 = _nice_tick0(xmin, step=max((xmax - xmin) / 10.0, 0.1))
+    fig = go.Figure()
+    if PRED_COL in df.columns:
+        fig.add_trace(go.Scatter(
+            x=df[PRED_COL], y=y, mode="lines",
+            line=dict(color=COLORS["pred"], width=1.8),
+            name=PRED_COL,
+            hovertemplate=f"{PRED_COL}: "+"%{x:.2f}<br>"+ylab+": %{y}<extra></extra>"
+        ))
+    if include_actual and act_col in df.columns:
+        fig.add_trace(go.Scatter(
+            x=df[act_col], y=y, mode="lines",
+            line=dict(color=COLORS["actual"], width=2.0, dash="dot"),
+            name=f"{act_col} (actual)",
+            hovertemplate=f"{act_col}: "+"%{x:.2f}<br>"+ylab+": %{y}<extra></extra>"
+        ))
+    fig.update_layout(
+        height=TRACK_H, width=TRACK_W, autosize=False,
+        paper_bgcolor="#fff", plot_bgcolor="#fff",
+        margin=dict(l=64, r=16, t=36, b=48), hovermode="closest",
+        font=dict(size=FONT_SZ, color="#000"),
+        legend=dict(x=0.98, y=0.05, xanchor="right", yanchor="bottom",
+                    bgcolor="rgba(255,255,255,0.75)", bordercolor="#ccc", borderwidth=1),
+        legend_title_text=""
+    )
+    fig.update_xaxes(
+        title_text=f"Min Stress ({UNITS})",
+        title_font=dict(size=20, family=BOLD_FONT, color="#000"),
+        tickfont=dict(size=15, family=BOLD_FONT, color="#000"),
+        side="top", range=[xmin, xmax],
+        ticks="outside", tickformat=",.2f", tickmode="auto", tick0=tick0,
+        showline=True, linewidth=1.2, linecolor="#444", mirror=True,
+        showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True
+    )
+    fig.update_yaxes(
+        title_text=ylab,
+        title_font=dict(size=20, family=BOLD_FONT, color="#000"),
+        tickfont=dict(size=15, family=BOLD_FONT, color="#000"),
+        range=y_range, ticks="outside",
+        showline=True, linewidth=1.2, linecolor="#444", mirror=True,
+        showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True
+    )
+    return fig
+def preview_tracks(df: pd.DataFrame, cols: list[str]):
+    cols = [c for c in cols if c in df.columns]
+    n = len(cols)
+    if n == 0:
+        fig, ax = plt.subplots(figsize=(4, 2))
+        ax.text(0.5, 0.5, "No selected columns", ha="center", va="center")
+        ax.axis("off")
+        return fig
+    depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
+    if depth_col is not None:
+        idx = pd.to_numeric(df[depth_col], errors="coerce")
+        y_label = depth_col
+        y_min, y_max = float(np.nanmin(idx)), float(np.nanmax(idx))
+    else:
+        idx = pd.Series(np.arange(1, len(df) + 1))
+        y_label = "Point Index"
+        y_min, y_max = float(idx.min()), float(idx.max())
+    cmap = plt.get_cmap("tab20")
+    col_colors = {col: cmap(i % cmap.N) for i, col in enumerate(cols)}
+    fig, axes = plt.subplots(1, n, figsize=(2.4 * n, 7.0), sharey=True, dpi=100)
+    if n == 1:
+        axes = [axes]
+    for i, (ax, col) in enumerate(zip(axes, cols)):
+        x = pd.to_numeric(df[col], errors="coerce")
+        ax.plot(x, idx, '-', lw=1.6, color=col_colors[col])
+        ax.set_xlabel(col); ax.xaxis.set_label_position('top'); ax.xaxis.tick_top()
+        ax.set_ylim(y_max, y_min)  # reversed depth down
+        ax.grid(True, linestyle=":", alpha=0.3)
+        if i == 0:
+            ax.set_ylabel(y_label)
+        else:
+            ax.tick_params(labelleft=False); ax.set_ylabel("")
+    fig.tight_layout()
+    return fig
 # =========================
 # Session state
 # =========================
 st.session_state.setdefault("dev_file_loaded",False)
 st.session_state.setdefault("dev_preview",False)
 st.session_state.setdefault("show_preview_modal", False)
 # =========================
+# Sidebar branding
 # =========================
 st.sidebar.markdown(f"""
     <div class="centered-container">
     </div>
 """, unsafe_allow_html=True)
 def sticky_header(title, message):
     st.markdown(
         f"""
         unsafe_allow_html=True
     )
+# ===============================================================
+# TRAIN THE MODEL IN-APP (no external pickle needed)
+# ===============================================================
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.model_selection import train_test_split
+# ❶ Set YOUR optimized hyperparameters here
+BEST_PARAMS = {
+    "n_estimators": 300,
+    "max_depth": 22,
+    "max_features": "sqrt",    # or "log2" / float in (0,1]
+    "min_samples_split": 2,
+    "min_samples_leaf": 1,
+    "bootstrap": True,
+    "random_state": 42,
+    "n_jobs": -1
+}
+st.sidebar.markdown("### Model source")
+source = st.sidebar.radio(
+    "Choose how to get the model",
+    ["Train now (fixed best params)", "Train with Grid Search (optional)"],
+    help="Avoids uploading big pickles. Deterministic best-params training is recommended."
+)
+st.sidebar.markdown("### Training data")
+file_train = st.sidebar.file_uploader("Upload Excel for training (has Train sheet or any sheet)", type=["xlsx","xls"])
+def _train_model_fixed(X: pd.DataFrame, y: pd.Series, params: dict) -> RandomForestRegressor:
+    rf = RandomForestRegressor(**params)
+    rf.fit(X, y)
+    return rf
+def _download_buttons(model_obj, meta_dict):
+    # model
+    buf_model = io.BytesIO()
+    joblib.dump(model_obj, buf_model)
+    buf_model.seek(0)
+    st.download_button("⬇️ Download trained model (.joblib)", buf_model.getvalue(), "minstress_model.joblib")
+    # meta
+    meta_bytes = json.dumps(meta_dict, indent=2).encode("utf-8")
+    st.download_button("⬇️ Download meta (.json)", meta_bytes, "minstress_meta.json")
+if not file_train:
+    st.info("Upload a training Excel file in the sidebar to build the model.")
+    st.stop()
+# Load train data
+book_train = read_book_bytes(file_train.getvalue())
+sheet_train = next((s for s in book_train if s.lower() in ("train", "training")), list(book_train)[0])
+df_tr0 = _normalize_columns(book_train[sheet_train].copy(), FEATURES, TARGET, None)
+# Build X/y
+act_col = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in df_tr0.columns) else TARGET
+if not ensure_cols(df_tr0, FEATURES + [act_col]):
+    st.stop()
+X_all = _make_X(df_tr0, FEATURES).copy()
+y_all = pd.to_numeric(df_tr0[act_col], errors="coerce")
+# Split for reporting
+tsz = st.sidebar.slider("Validation split for reporting", 0.10, 0.40, 0.20, 0.05)
+seed = st.sidebar.number_input("Random seed", 0, 1_000_000, BEST_PARAMS.get("random_state", 42), step=1)
+Xtr, Xva, ytr, yva = train_test_split(X_all, y_all, test_size=tsz, random_state=seed)
+if source == "Train with Grid Search (optional)":
+    from sklearn.model_selection import GridSearchCV
+    st.sidebar.markdown("### Grid Search")
+    n_list    = st.sidebar.multiselect("n_estimators", [100, 200, 300, 400], default=[BEST_PARAMS["n_estimators"]])
+    depth_list= st.sidebar.multiselect("max_depth",  [12, 16, 20, 22, 26], default=[BEST_PARAMS["max_depth"]])
+    maxf_list = st.sidebar.multiselect("max_features", ["sqrt", "log2"], default=[BEST_PARAMS["max_features"]])
+    param_grid = {
+        "n_estimators": n_list or [BEST_PARAMS["n_estimators"]],
+        "max_depth": depth_list or [BEST_PARAMS["max_depth"]],
+        "max_features": maxf_list or [BEST_PARAMS["max_features"]],
+        "min_samples_split": [BEST_PARAMS["min_samples_split"]],
+        "min_samples_leaf": [BEST_PARAMS["min_samples_leaf"]],
+        "bootstrap": [BEST_PARAMS["bootstrap"]],
+        "random_state": [seed]
+    }
+    base = RandomForestRegressor(n_jobs=-1)
+    with st.spinner("Running GridSearchCV..."):
+        gs = GridSearchCV(base, param_grid=param_grid, cv=3, n_jobs=-1, refit=True)
+        gs.fit(Xtr, ytr)
+    best = gs.best_estimator_
+    st.success(f"GridSearch done. Best params: {gs.best_params_}")
+    # Validation report
+    pred_tr = best.predict(Xtr); pred_va = best.predict(Xva)
+    m_train = {"R": pearson_r(ytr, pred_tr), "RMSE": rmse(ytr, pred_tr), "MAPE%": mape(ytr, pred_tr)}
+    m_valid = {"R": pearson_r(yva, pred_va), "RMSE": rmse(yva, pred_va), "MAPE%": mape(yva, pred_va)}
+    st.write("**Training split metrics**:", m_train)
+    st.write("**Validation split metrics**:", m_valid)
+    # Final fit on all data with best params
+    model = RandomForestRegressor(**{**gs.best_params_, "n_jobs": -1, "random_state": seed})
+    model.fit(X_all, y_all)
+else:
+    # Deterministic fixed-params training (recommended)
+    params = {**BEST_PARAMS, "random_state": seed}
+    with st.spinner("Training fixed-params model..."):
+        tmp_model = _train_model_fixed(Xtr, ytr, params)
+        pred_tr = tmp_model.predict(Xtr); pred_va = tmp_model.predict(Xva)
+        m_train = {"R": pearson_r(ytr, pred_tr), "RMSE": rmse(ytr, pred_tr), "MAPE%": mape(ytr, pred_tr)}
+        m_valid = {"R": pearson_r(yva, pred_va), "RMSE": rmse(yva, pred_va), "MAPE%": mape(yva, pred_va)}
+        st.write("**Training split metrics**:", m_train)
+        st.write("**Validation split metrics**:", m_valid)
+        model = _train_model_fixed(X_all, y_all, params)
+# Create meta + training ranges for OOR checks later
+meta = {
+    "features": FEATURES,
+    "target": TARGET,
+    "pred_col": PRED_COL,
+    "actual_col": ACTUAL_COL,
+    "transform": TRANSFORM,
+    "units": UNITS,
+    "versions": {
+        "numpy": np.__version__,
+        "scikit_learn": __import__("sklearn").__version__
+    },
+    "training": {
+        "n_rows": int(len(X_all)),
+        "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+        "used_grid_search": (source == "Train with Grid Search (optional)")
+    }
+}
+tr_min = X_all.min().to_dict()
+tr_max = X_all.max().to_dict()
+st.session_state.train_ranges = {f: (float(tr_min[f]), float(tr_max[f])) for f in FEATURES}
+st.success("Model ready ✓  — proceed to **Case Building**, **Validation**, or **Prediction**.")
+_download_buttons(model, meta)
 # =========================
 # INTRO
 # =========================
 if st.session_state.app_step == "intro":
+    st.header("Welcome!")
+    st.markdown(f"This software is developed by *Smart Thinking AI-Solutions Team* to estimate **Minimum Horizontal Stress** ({UNITS}) from drilling/offset data.")
+    st.subheader("How It Works")
     st.markdown(
+        "1) **Upload your data to build the case and preview the model performance.**  \n"
+        "2) Click **Run Model** to compute metrics and plots.  \n"
+        "3) **Proceed to Validation** (with actual) or **Proceed to Prediction** (no actual)."
     )
     if st.button("Start Showcase", type="primary"):
         st.session_state.app_step = "dev"; st.rerun()
 # =========================
+# CASE BUILDING (Train/Test) — optional evaluation stage
 # =========================
 def _find_sheet(book, names):
     low2orig = {k.lower(): k for k in book.keys()}
     return None
 if st.session_state.app_step == "dev":
+    st.sidebar.header("Case Building")
+    up = st.sidebar.file_uploader("Upload Your Data File (Train/Test sheets)", type=["xlsx","xls"])
     if up is not None:
         st.session_state.dev_file_bytes = up.getvalue()
         st.session_state.dev_file_name = up.name
             st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training and Test/Testing sheets.</div>', unsafe_allow_html=True)
             st.stop()
+        tr0 = _normalize_columns(book[sh_train].copy(), FEATURES, TARGET, None)
+        te0 = _normalize_columns(book[sh_test].copy(),  FEATURES, TARGET, None)
         actual_col = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in tr0.columns) else TARGET
         if not (ensure_cols(tr0, FEATURES+[actual_col]) and ensure_cols(te0, FEATURES+[actual_col])):
             "MAPE%": mape(te[actual_col], te[PRED_COL]),
         }
+        tr_min2 = tr[FEATURES].min().to_dict(); tr_max2 = tr[FEATURES].max().to_dict()
+        st.session_state.train_ranges = {f:(float(tr_min2[f]), float(tr_max2[f])) for f in FEATURES}
         st.markdown('<div class="st-message-box st-success">Case has been built and results are displayed below.</div>', unsafe_allow_html=True)
     def _dev_block(df, m):
             st.plotly_chart(track_plot(df, include_actual=True),
                             use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
         with col_cross:
+            act_col2 = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in df.columns) else TARGET
+            st.pyplot(cross_plot_static(df[act_col2], df[PRED_COL]), use_container_width=False)
     if "Train" in st.session_state.results or "Test" in st.session_state.results:
         tab1, tab2 = st.tabs(["Training", "Testing"])
             with tab1: _dev_block(st.session_state.results["Train"], st.session_state.results["m_train"])
         if "Test" in st.session_state.results:
             with tab2: _dev_block(st.session_state.results["Test"],  st.session_state.results["m_test"])
+        render_export_button(phase_key="dev")
 # =========================
 # VALIDATION (with actual)
 # =========================
 if st.session_state.app_step == "validate":
+    st.sidebar.header("Validate the Model")
     up = st.sidebar.file_uploader("Upload Validation Excel", type=["xlsx","xls"])
     if up is not None:
         book = read_book_bytes(up.getvalue())
         book = read_book_bytes(up.getvalue())
         names = list(book.keys())
         name = next((s for s in names if s.lower() in ("validation","validate","validation2","val","val2")), names[0])
+        df0 = _normalize_columns(book[name].copy(), FEATURES, TARGET, None)
+        act_col2 = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in df0.columns) else TARGET
+        if not ensure_cols(df0, FEATURES+[act_col2]):
             st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
         df = df0.copy()
         df[PRED_COL] = _inv_transform(model.predict(_make_X(df0, FEATURES)), TRANSFORM)
         st.session_state.results["Validate"] = df
                 )
         st.session_state.results["m_val"] = {
+            "R":     pearson_r(df[act_col2], df[PRED_COL]),
+            "RMSE":  rmse(df[act_col2], df[PRED_COL]),
+            "MAPE%": mape(df[act_col2], df[PRED_COL]),
         }
         st.session_state.results["sv_val"] = {"n":len(df), "pred_min":float(df[PRED_COL].min()), "pred_max":float(df[PRED_COL].max()), "oor":oor_pct}
         st.session_state.results["oor_tbl"] = tbl
             st.plotly_chart(track_plot(st.session_state.results["Validate"], include_actual=True),
                             use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
         with col_cross:
+            act_col3 = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in st.session_state.results["Validate"].columns) else TARGET
+            st.pyplot(cross_plot_static(st.session_state.results["Validate"][act_col3],
                                         st.session_state.results["Validate"][PRED_COL]),
                       use_container_width=False)
+        render_export_button(phase_key="validate")
+        sv = st.session_state.results["sv_val"]
+        if sv["oor"] > 0: st.markdown('<div class="st-message-box st-warning">Some inputs fall outside **training min–max** ranges.</div>', unsafe_allow_html=True)
+        if st.session_state.results["oor_tbl"] is not None:
+            st.write("*Out-of-range rows (vs. Training min–max):*")
+            df_centered_rounded(st.session_state.results["oor_tbl"])
 # =========================
 # PREDICTION (no actual)
 # =========================
 if st.session_state.app_step == "predict":
+    st.sidebar.header("Prediction (No Actual)")
     up = st.sidebar.file_uploader("Upload Prediction Excel", type=["xlsx","xls"])
     if up is not None:
         book = read_book_bytes(up.getvalue())
     if go_btn and up is not None:
         book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
+        df0 = _normalize_columns(book[name].copy(), FEATURES, TARGET, None)
         if not ensure_cols(df0, FEATURES):
             st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
         df = df0.copy()
         with col_right:
             st.plotly_chart(track_plot(df, include_actual=False),
                             use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
+        render_export_button(phase_key="predict")
 # =========================
 # Preview modal
             tabs = st.tabs(names)
             for t, name in zip(tabs, names):
                 with t:
+                    df = _normalize_columns(book_to_preview[name], FEATURES, TARGET, None)
                     t1, t2 = st.tabs(["Tracks", "Summary"])
                     with t1:
+                        st.pyplot(preview_tracks(df, FEATURES), use_container_width=True)
                     with t2:
+                        feat_present = [c for c in FEATURES if c in df.columns]
+                        if not feat_present:
                             st.info("No feature columns found to summarize.")
                         else:
                             tbl = (
+                                df[feat_present]
                                   .agg(['min','max','mean','std'])
                                   .T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"})
                                   .reset_index(names="Feature")
                             )
                             df_centered_rounded(tbl)
     st.session_state.show_preview_modal = False
 # =========================