Fracture_Pressure

Paused

App Files Files Community

UCS2014 commited on Sep 11, 2025

Commit

5f36e3f

verified ·

1 Parent(s): 83bd4f1

Update app.py

Browse files

Files changed (1) hide show

app.py +272 -348

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
-# app_rhob.py — ST_Log_RHOB (Formation Bulk Density, g/cc) — MAPE version
 import io, json, os, base64, math
 from pathlib import Path
@@ -7,8 +8,11 @@ import pandas as pd
 import numpy as np
 import joblib
 from datetime import datetime
-# Matplotlib (static plots)
 import matplotlib
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
@@ -17,31 +21,47 @@ from matplotlib.ticker import FuncFormatter
 import plotly.graph_objects as go
 from sklearn.metrics import mean_squared_error
 # =========================
-# Constants (RHOB variant)
 # =========================
-APP_NAME = "ST_Log_RHOB"
-TAGLINE  = "Real-Time Formation Bulk Density (RHOB) Prediction"
-# Defaults (overridden by rhob_meta.json if present)
-FEATURES  = [
-    "WOB (klbf)",
-    "Torque (kft.lbf)",
-    "SPP (psi)",
-    "RPM (1/min)",
-    "ROP (ft/h)",
-    "Flow Rate (gpm)",
-]
-TARGET    = "RHOB"          # canonical target name
-PRED_COL  = "RHOB_Pred"
 MODELS_DIR = Path("models")
-DEFAULT_MODEL = MODELS_DIR / "rhob_model.joblib"
 MODEL_FALLBACKS = [MODELS_DIR / "model.joblib", MODELS_DIR / "model.pkl"]
-COLORS = {"pred": "#1f77b4", "actual": "#f2b702", "ref": "#5a5a5a"}
-# Optional env banner from meta
-STRICT_VERSION_CHECK = False
 # ---- Plot sizing ----
 CROSS_W = 350
@@ -58,11 +78,11 @@ st.set_page_config(page_title=APP_NAME, page_icon="logo.png", layout="wide")
 st.markdown("""
 <style>
  .brand-logo { width: 200px; height: auto; object-fit: contain; }
- .centered-container { display: flex; flex-direction: column; align-items: center; text-align: center; }
- .st-message-box { background-color: #f0f2f6; color: #333; padding: 10px; border-radius: 10px; border: 1px solid #e6e9ef; }
- .st-message-box.st-success { background-color: #d4edda; color: #155724; border-color: #c3e6cb; }
- .st-message-box.st-warning { background-color: #fff3cd; color: #856404; border-color: #ffeeba; }
- .st-message-box.st-error   { background-color: #f8d7da; color: #721c24; border-color: #f5c6cb; }
  .main .block-container { overflow: unset !important; }
  div[data-testid="stVerticalBlock"] { overflow: unset !important; }
  div[data-testid="stExpander"] > details > summary {
@@ -95,22 +115,18 @@ def add_password_gate() -> None:
         required = st.secrets.get("APP_PASSWORD", "")
     except Exception:
         required = os.environ.get("APP_PASSWORD", "")
     if not required:
         st.warning("Set APP_PASSWORD in Secrets (or environment) and restart.")
         st.stop()
     if st.session_state.get("auth_ok", False):
         return
     st.sidebar.markdown(f"""
         <div class="centered-container">
-            <img src="{inline_logo('logo.png')}" style="width: 200px; height: auto; object-fit: contain;">
             <div style='font-weight:800;font-size:1.2rem; margin-top: 10px;'>{APP_NAME}</div>
             <div style='color:#667085;'>Smart Thinking • Secure Access</div>
         </div>
-        """, unsafe_allow_html=True
-    )
     pwd = st.sidebar.text_input("Access key", type="password", placeholder="••••••••")
     if st.sidebar.button("Unlock", type="primary"):
         if pwd == required:
@@ -128,6 +144,12 @@ add_password_gate()
 def rmse(y_true, y_pred) -> float:
     return float(np.sqrt(mean_squared_error(y_true, y_pred)))
 def pearson_r(y_true, y_pred) -> float:
     a = np.asarray(y_true, dtype=float)
     p = np.asarray(y_pred, dtype=float)
@@ -135,18 +157,6 @@ def pearson_r(y_true, y_pred) -> float:
     if np.all(a == a[0]) or np.all(p == p[0]): return float("nan")
     return float(np.corrcoef(a, p)[0, 1])
-def mape(y_true, y_pred) -> float:
-    """
-    Mean Absolute Percentage Error in PERCENT.
-    Ignores rows where true==0 or non-finite.
-    """
-    a = np.asarray(y_true, dtype=float)
-    p = np.asarray(y_pred, dtype=float)
-    mask = np.isfinite(a) & np.isfinite(p) & (a != 0)
-    if not np.any(mask):
-        return float("nan")
-    return float(np.mean(np.abs((p[mask] - a[mask]) / a[mask])) * 100.0)
 @st.cache_resource(show_spinner=False)
 def load_model(model_path: str):
     return joblib.load(model_path)
@@ -160,62 +170,35 @@ def parse_excel(data_bytes: bytes):
 def read_book_bytes(b: bytes):
     return parse_excel(b) if b else {}
-# ---- Canonical feature aliasing ------------------------------------------
-def _build_alias_map(canonical_features: list[str], target_name: str) -> dict:
-    """
-    Map common header variants -> the *canonical* names in canonical_features.
-    Whatever appears in canonical_features (from rhob_meta.json) wins.
-    """
-    def pick(expected_list, variants):
-        for v in variants:
-            if v in expected_list:
-                return v
-        return variants[0]
-    can_WOB    = pick(canonical_features, ["WOB (klbf)", "WOB, klbf", "WOB(klbf)", "WOB( klbf)"])
-    can_TORQUE = pick(canonical_features, ["Torque (kft.lbf)", "Torque(kft.lbf)", "TORQUE(kft.lbf)"])
-    can_SPP    = pick(canonical_features, ["SPP (psi)", "SPP(psi)"])
-    can_RPM    = pick(canonical_features, ["RPM (1/min)", "RPM(1/min)"])
-    can_ROP    = pick(canonical_features, ["ROP (ft/h)", "ROP(ft/h)"])
-    can_FR     = pick(canonical_features, [
-        "Flow Rate (gpm)","Flow Rate, gpm","Flow Rate,gpm","Flow Rate , gpm","Fow Rate, gpm","Fow Rate, gpm "
-    ])
-    can_DEPTH  = "Depth (ft)"
-    alias = {
-        # Features
-        "WOB (klbf)": can_WOB, "WOB, klbf": can_WOB, "WOB(klbf)": can_WOB, "WOB( klbf)": can_WOB,
-        "Torque (kft.lbf)": can_TORQUE, "Torque(kft.lbf)": can_TORQUE, "TORQUE(kft.lbf)": can_TORQUE,
-        "SPP (psi)": can_SPP, "SPP(psi)": can_SPP,
-        "RPM (1/min)": can_RPM, "RPM(1/min)": can_RPM,
-        "ROP (ft/h)": can_ROP, "ROP(ft/h)": can_ROP,
-        "Flow Rate (gpm)": can_FR, "Flow Rate, gpm": can_FR, "Flow Rate,gpm": can_FR, "Flow Rate , gpm": can_FR,
-        "Fow Rate, gpm": can_FR, "Fow Rate, gpm ": can_FR,
-        # Depth (plot only)
-        "Depth (ft)": can_DEPTH, "Depth, ft": can_DEPTH, "Depth(ft)": can_DEPTH, "DEPTH, ft": can_DEPTH,
-    }
-    # ---- Target family (RHOB) ----
-    target_variants = [
-        "RHOB", "RHOB (g/cc)", "RHOB (g/cm3)", "RHOB (g/cm³)",
-        "RHOB_Actual", "RHOB_Actual (g/cc)", "RHOB_Actual (g/cm3)", "RHOB_Actual(g/cc)", "RHOB_Actual(g/cm3)",
-        "RhoB", "RhoB (g/cc)", "RhoB (g/cm3)",
-        "RhoB_Actual", "RhoB_Actual (g/cc)", "RhoB_Actual (g/cm3)"
-    ]
-    for t in target_variants:
-        alias[t] = target_name
-    return alias
-def _normalize_columns(df: pd.DataFrame, canonical_features: list[str], target_name: str) -> pd.DataFrame:
     out = df.copy()
-    out.columns = [str(c).strip().replace(" ,", ",").replace(",  ", ", ").replace("  ", " ") for c in out.columns]
-    alias = _build_alias_map(canonical_features, target_name)
-    actual = {k: v for k, v in alias.items() if k in out.columns and k != v}
-    return out.rename(columns=actual)
 def ensure_cols(df: pd.DataFrame, cols: list[str]) -> bool:
     miss = [c for c in cols if c not in df.columns]
@@ -224,70 +207,52 @@ def ensure_cols(df: pd.DataFrame, cols: list[str]) -> bool:
         return False
     return True
-def find_sheet(book, names):
-    low2orig = {k.lower(): k for k in book.keys()}
-    for nm in names:
-        if nm.lower() in low2orig: return low2orig[nm.lower()]
-    return None
 def _nice_tick0(xmin: float, step: float = 0.1) -> float:
     return step * math.floor(xmin / step) if np.isfinite(xmin) else xmin
-def df_centered_rounded(df: pd.DataFrame, hide_index=True):
     out = df.copy()
-    numcols = out.select_dtypes(include=[np.number]).columns
-    styler = (
-        out.style
-           .format({c: "{:.2f}" for c in numcols})
-           .set_properties(**{"text-align": "center"})
-           .set_table_styles(TABLE_CENTER_CSS)
-    )
-    st.dataframe(styler, use_container_width=True, hide_index=hide_index)
 # ---------- Build X exactly as trained ----------
 def _make_X(df: pd.DataFrame, features: list[str]) -> pd.DataFrame:
-    """
-    Reindex columns to the exact training feature order and coerce to numeric.
-    Prevents scikit-learn 'feature names should match' errors.
-    """
     X = df.reindex(columns=features, copy=False)
     for c in X.columns:
         X[c] = pd.to_numeric(X[c], errors="coerce")
     return X
-# === Excel export helpers =================================================
-def _excel_engine() -> str:
-    try:
-        import xlsxwriter  # noqa: F401
-        return "xlsxwriter"
-    except Exception:
-        return "openpyxl"
-def _excel_safe_name(name: str) -> str:
-    bad = '[]:*?/\\'
-    safe = ''.join('_' if ch in bad else ch for ch in str(name))
-    return safe[:31]
-def _round_numeric(df: pd.DataFrame, ndigits: int = 3) -> pd.DataFrame:
-    out = df.copy()
-    for c in out.columns:
-        if pd.api.types.is_float_dtype(out[c]) or pd.api.types.is_integer_dtype(out[c]):
-            out[c] = pd.to_numeric(out[c], errors="coerce").round(ndigits)
-    return out
 def _summary_table(df: pd.DataFrame, cols: list[str]) -> pd.DataFrame:
     cols = [c for c in cols if c in df.columns]
-    if not cols:
-        return pd.DataFrame()
     tbl = (df[cols]
-            .agg(['min','max','mean','std'])
-            .T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"})
-            .reset_index(names="Field"))
     return _round_numeric(tbl, 3)
 def _train_ranges_df(ranges: dict[str, tuple[float, float]]) -> pd.DataFrame:
-    if not ranges:
-        return pd.DataFrame()
     df = pd.DataFrame(ranges).T.reset_index()
     df.columns = ["Feature", "Min", "Max"]
     return _round_numeric(df, 3)
@@ -304,18 +269,13 @@ def _excel_autofit(writer, sheet_name: str, df: pd.DataFrame, min_w: int = 8, ma
         ws.set_column(i, i, max(min_w, min(max_len + 2, max_w)))
     ws.freeze_panes(1, 0)
-def _add_sheet(sheets: dict, order: list, name: str, df: pd.DataFrame, ndigits: int):
-    if df is None or df.empty: return
-    sheets[name] = _round_numeric(df, ndigits)
-    order.append(name)
 def _available_sections() -> list[str]:
     res = st.session_state.get("results", {})
     sections = []
-    if "Train" in res:        sections += ["Training","Training_Metrics","Training_Summary"]
-    if "Test" in res:         sections += ["Testing","Testing_Metrics","Testing_Summary"]
-    if "Validate" in res:     sections += ["Validation","Validation_Metrics","Validation_Summary","Validation_OOR"]
-    if "PredictOnly" in res:  sections += ["Prediction","Prediction_Summary"]
     if st.session_state.get("train_ranges"): sections += ["Training_Ranges"]
     sections += ["Info"]
     return sections
@@ -323,43 +283,36 @@ def _available_sections() -> list[str]:
 def build_export_workbook(selected: list[str], ndigits: int = 3, do_autofit: bool = True) -> tuple[bytes|None, str|None, list[str]]:
     res = st.session_state.get("results", {})
     if not res: return None, None, []
     sheets: dict[str, pd.DataFrame] = {}
     order: list[str] = []
-    if "Training" in selected and "Train" in res:
-        _add_sheet(sheets, order, "Training", res["Train"], ndigits)
-    if "Training_Metrics" in selected and res.get("m_train"):
-        _add_sheet(sheets, order, "Training_Metrics", pd.DataFrame([res["m_train"]]), ndigits)
     if "Training_Summary" in selected and "Train" in res:
         tr_cols = FEATURES + [c for c in [TARGET, PRED_COL] if c in res["Train"].columns]
-        _add_sheet(sheets, order, "Training_Summary", _summary_table(res["Train"], tr_cols), ndigits)
-    if "Testing" in selected and "Test" in res:
-        _add_sheet(sheets, order, "Testing", res["Test"], ndigits)
-    if "Testing_Metrics" in selected and res.get("m_test"):
-        _add_sheet(sheets, order, "Testing_Metrics", pd.DataFrame([res["m_test"]]), ndigits)
     if "Testing_Summary" in selected and "Test" in res:
         te_cols = FEATURES + [c for c in [TARGET, PRED_COL] if c in res["Test"].columns]
-        _add_sheet(sheets, order, "Testing_Summary", _summary_table(res["Test"], te_cols), ndigits)
-    if "Validation" in selected and "Validate" in res:
-        _add_sheet(sheets, order, "Validation", res["Validate"], ndigits)
-    if "Validation_Metrics" in selected and res.get("m_val"):
-        _add_sheet(sheets, order, "Validation_Metrics", pd.DataFrame([res["m_val"]]), ndigits)
-    if "Validation_Summary" in selected and res.get("sv_val"):
-        _add_sheet(sheets, order, "Validation_Summary", pd.DataFrame([res["sv_val"]]), ndigits)
     if "Validation_OOR" in selected and isinstance(res.get("oor_tbl"), pd.DataFrame) and not res["oor_tbl"].empty:
-        _add_sheet(sheets, order, "Validation_OOR", res["oor_tbl"].reset_index(drop=True), ndigits)
-    if "Prediction" in selected and "PredictOnly" in res:
-        _add_sheet(sheets, order, "Prediction", res["PredictOnly"], ndigits)
-    if "Prediction_Summary" in selected and res.get("sv_pred"):
-        _add_sheet(sheets, order, "Prediction_Summary", pd.DataFrame([res["sv_pred"]]), ndigits)
     if "Training_Ranges" in selected and st.session_state.get("train_ranges"):
-        rr = _train_ranges_df(st.session_state["train_ranges"])
-        _add_sheet(sheets, order, "Training_Ranges", rr, ndigits)
     if "Info" in selected:
         info = pd.DataFrame([
@@ -370,7 +323,7 @@ def build_export_workbook(selected: list[str], ndigits: int = 3, do_autofit: boo
             {"Key": "Features",   "Value": ", ".join(FEATURES)},
             {"Key": "ExportedAt", "Value": datetime.now().strftime("%Y-%m-%d %H:%M:%S")},
         ])
-        _add_sheet(sheets, order, "Info", info, ndigits)
     if not order: return None, None, []
@@ -378,86 +331,64 @@ def build_export_workbook(selected: list[str], ndigits: int = 3, do_autofit: boo
     engine = _excel_engine()
     with pd.ExcelWriter(bio, engine=engine) as writer:
         for name in order:
-            df = sheets[name]
-            sheet = _excel_safe_name(name)
             df.to_excel(writer, sheet_name=sheet, index=False)
-            if do_autofit:
-                _excel_autofit(writer, sheet, df)
     bio.seek(0)
-    fname = f"RHOB_Export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
     return bio.getvalue(), fname, order
-# --------- SIMPLE export UI ----------
 def render_export_button(phase_key: str) -> None:
     res = st.session_state.get("results", {})
     if not res: return
     st.divider()
     st.markdown("### Export to Excel")
     options = _available_sections()
     selected_sheets = st.multiselect(
         "Sheets to include",
         options=options,
         default=[],
         placeholder="Choose option(s)",
-        help="Pick the sheets you want to include in the Excel export.",
         key=f"sheets_{phase_key}",
     )
     if not selected_sheets:
-        st.caption("Select one or more sheets above to enable the export.")
-        st.download_button(
-            label="⬇️ Export Excel",
-            data=b"",
-            file_name="RHOB_Export.xlsx",
-            mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
-            disabled=True,
-            key=f"download_{phase_key}",
-        )
         return
     data, fname, names = build_export_workbook(selected=selected_sheets, ndigits=3, do_autofit=True)
-    if names:
-        st.caption("Will include: " + ", ".join(names))
-    st.download_button(
-        "⬇️ Export Excel",
-        data=(data or b""),
-        file_name=(fname or "RHOB_Export.xlsx"),
-        mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
-        disabled=(data is None),
-        key=f"download_{phase_key}",
-    )
 # =========================
 # Cross plot (Matplotlib)
 # =========================
-def cross_plot_static(actual, pred, xlabel="Actual RHOB (g/cc)", ylabel="Predicted RHOB (g/cc)"):
     a = pd.Series(actual, dtype=float)
-    p = pd.Series(pred, dtype=float)
-    lo = float(min(a.min(), p.min()))
-    hi = float(max(a.max(), p.max()))
     pad = 0.03 * (hi - lo if hi > lo else 1.0)
     lo2, hi2 = lo - pad, hi + pad
     ticks = np.linspace(lo2, hi2, 5)
     dpi = 110
     fig, ax = plt.subplots(figsize=(CROSS_W / dpi, CROSS_H / dpi), dpi=dpi, constrained_layout=False)
     ax.scatter(a, p, s=14, c=COLORS["pred"], alpha=0.9, linewidths=0)
     ax.plot([lo2, hi2], [lo2, hi2], linestyle="--", linewidth=1.2, color=COLORS["ref"])
-    ax.set_xlim(lo2, hi2)
-    ax.set_ylim(lo2, hi2)
-    ax.set_xticks(ticks); ax.set_yticks(ticks)
     ax.set_aspect("equal", adjustable="box")
     fmt = FuncFormatter(lambda x, _: f"{x:.2f}")
     ax.xaxis.set_major_formatter(fmt); ax.yaxis.set_major_formatter(fmt)
-    ax.set_xlabel(xlabel, fontweight="bold", fontsize=10, color="black")
-    ax.set_ylabel(ylabel, fontweight="bold", fontsize=10, color="black")
     ax.tick_params(labelsize=6, colors="black")
     ax.grid(True, linestyle=":", alpha=0.3)
@@ -473,15 +404,16 @@ def cross_plot_static(actual, pred, xlabel="Actual RHOB (g/cc)", ylabel="Predict
 def track_plot(df, include_actual=True):
     depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
     if depth_col is not None:
-        y = pd.Series(df[depth_col]).astype(float); ylab = depth_col
-        y_range = [float(y.max()), float(y.min())]
     else:
         y = pd.Series(np.arange(1, len(df) + 1)); ylab = "Point Index"
         y_range = [float(y.max()), float(y.min())]
     x_series = pd.Series(df.get(PRED_COL, pd.Series(dtype=float))).astype(float)
-    if include_actual and TARGET in df.columns:
-        x_series = pd.concat([x_series, pd.Series(df[TARGET]).astype(float)], ignore_index=True)
     x_lo, x_hi = float(x_series.min()), float(x_series.max())
     x_pad = 0.03 * (x_hi - x_lo if x_hi > x_lo else 1.0)
     xmin, xmax = x_lo - x_pad, x_hi + x_pad
@@ -493,14 +425,15 @@ def track_plot(df, include_actual=True):
             x=df[PRED_COL], y=y, mode="lines",
             line=dict(color=COLORS["pred"], width=1.8),
             name=PRED_COL,
-            hovertemplate=f"{PRED_COL}: "+"%{x:.3f}<br>"+ylab+": %{y}<extra></extra>"
         ))
-    if include_actual and TARGET in df.columns:
         fig.add_trace(go.Scatter(
-            x=df[TARGET], y=y, mode="lines",
             line=dict(color=COLORS["actual"], width=2.0, dash="dot"),
-            name=f"{TARGET} (actual)",
-            hovertemplate=f"{TARGET}: "+"%{x:.3f}<br>"+ylab+": %{y}<extra></extra>"
         ))
     fig.update_layout(
@@ -512,26 +445,15 @@ def track_plot(df, include_actual=True):
                     bgcolor="rgba(255,255,255,0.75)", bordercolor="#ccc", borderwidth=1),
         legend_title_text=""
     )
     fig.update_xaxes(
-        title_text="RHOB (g/cc)",
         title_font=dict(size=20, family=BOLD_FONT, color="#000"),
         tickfont=dict(size=15, family=BOLD_FONT, color="#000"),
-        side="top",
-        range=[xmin, xmax],
-        ticks="outside",
-        tickformat=".2f",
-        tickmode="auto",
-        tick0=tick0,
-        showline=True,
-        linewidth=1.2,
-        linecolor="#444",
-        mirror=True,
-        showgrid=True,
-        gridcolor="rgba(0,0,0,0.12)",
-        automargin=True,
     )
     fig.update_yaxes(
         title_text=ylab,
         title_font=dict(size=20, family=BOLD_FONT, color="#000"),
@@ -542,14 +464,8 @@ def track_plot(df, include_actual=True):
     )
     return fig
-# ---------- Preview (matplotlib) ----------
 def preview_tracks(df: pd.DataFrame, cols: list[str]):
-    """
-    Quick-look multi-track preview:
-      - one subplot per selected column
-      - distinct stable colors per column
-      - shared & reversed Y-axis (Depth downwards)
-    """
     cols = [c for c in cols if c in df.columns]
     n = len(cols)
     if n == 0:
@@ -558,38 +474,36 @@ def preview_tracks(df: pd.DataFrame, cols: list[str]):
         ax.axis("off")
         return fig
-    # Depth or fallback to index
     depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
     if depth_col is not None:
         idx = pd.to_numeric(df[depth_col], errors="coerce")
         y_label = depth_col
     else:
         idx = pd.Series(np.arange(1, len(df) + 1))
         y_label = "Point Index"
-    y_min, y_max = float(idx.min()), float(idx.max())
-    # Stable qualitative palette
     cmap = plt.get_cmap("tab20")
     col_colors = {col: cmap(i % cmap.N) for i, col in enumerate(cols)}
-    fig, axes = plt.subplots(1, n, figsize=(2.3 * n, 7.0), sharey=True, dpi=100)
     if n == 1:
         axes = [axes]
     for i, (ax, col) in enumerate(zip(axes, cols)):
         x = pd.to_numeric(df[col], errors="coerce")
-        ax.plot(x, idx, '-', lw=1.8, color=col_colors[col])
         ax.set_xlabel(col)
         ax.xaxis.set_label_position('top')
         ax.xaxis.tick_top()
-        ax.set_ylim(y_max, y_min)  # reversed Y (Depth down)
         ax.grid(True, linestyle=":", alpha=0.3)
         if i == 0:
             ax.set_ylabel(y_label)
         else:
-            ax.tick_params(labelleft=False)
             ax.set_ylabel("")
     fig.tight_layout()
@@ -615,9 +529,10 @@ def ensure_model() -> Path|None:
     except Exception:
         return None
 mpath = ensure_model()
 if not mpath:
-    st.error("Model not found. Upload models/rhob_model.joblib (or set MODEL_URL).")
     st.stop()
 try:
     model = load_model(str(mpath))
@@ -625,39 +540,29 @@ except Exception as e:
     st.error(f"Failed to load model: {e}")
     st.stop()
-# Load meta (prefer RHOB-specific)
 meta = {}
-meta_candidates = [MODELS_DIR / "rhob_meta.json", MODELS_DIR / "meta.json"]
-meta_path = next((p for p in meta_candidates if p.exists()), None)
 if meta_path:
     try:
         meta = json.loads(meta_path.read_text(encoding="utf-8"))
-        FEATURES = meta.get("features", FEATURES)
-        TARGET   = meta.get("target", TARGET)
-        PRED_COL = meta.get("pred_col", PRED_COL)
-        # if training ranges were saved in meta, seed them so OOR works before any dev step
-        if isinstance(meta.get("train_ranges"), dict) and "train_ranges" not in st.session_state:
-            st.session_state["train_ranges"] = meta["train_ranges"]
     except Exception as e:
         st.warning(f"Could not parse meta file ({meta_path.name}): {e}")
-# Optional: version banner
-if STRICT_VERSION_CHECK and meta.get("versions"):
-    import numpy as _np, sklearn as _skl
-    mv = meta["versions"]; msg=[]
-    if mv.get("numpy") and mv["numpy"] != _np.__version__:
-        msg.append(f"NumPy {mv['numpy']} expected, running {_np.__version__}")
-    if mv.get("scikit_learn") and mv["scikit_learn"] != _skl.__version__:
-        msg.append(f"scikit-learn {mv['scikit_learn']} expected, running {_skl.__version__}")
-    if msg:
-        st.warning("Environment mismatch: " + " | ".join(msg))
 # =========================
 # Session state
 # =========================
 st.session_state.setdefault("app_step", "intro")
 st.session_state.setdefault("results", {})
-st.session_state.setdefault("train_ranges", st.session_state.get("train_ranges", None))
 st.session_state.setdefault("dev_file_name","")
 st.session_state.setdefault("dev_file_bytes",b"")
 st.session_state.setdefault("dev_file_loaded",False)
@@ -669,12 +574,11 @@ st.session_state.setdefault("show_preview_modal", False)
 # =========================
 st.sidebar.markdown(f"""
     <div class="centered-container">
-        <img src="{inline_logo('logo.png')}" style="width: 200px; height: auto; object-fit: contain;">
         <div style='font-weight:800;font-size:1.2rem;'>{APP_NAME}</div>
         <div style='color:#667085;'>{TAGLINE}</div>
     </div>
-    """, unsafe_allow_html=True
-)
 def sticky_header(title, message):
     st.markdown(
@@ -698,12 +602,12 @@ def sticky_header(title, message):
 # =========================
 if st.session_state.app_step == "intro":
     st.header("Welcome!")
-    st.markdown("This software is developed by *Smart Thinking AI-Solutions Team* to estimate **RHOB (Formation Bulk Density)** from drilling data.")
     st.subheader("How It Works")
     st.markdown(
-        "1) **Upload your data to build the case and preview the model performance.** \n"
-        "2) Click **Run Model** to compute metrics and plots. \n"
-        "3) **Proceed to Validation** (with actual RHOB) or **Proceed to Prediction** (no RHOB)."
     )
     if st.button("Start Showcase", type="primary"):
         st.session_state.app_step = "dev"; st.rerun()
@@ -713,7 +617,7 @@ if st.session_state.app_step == "intro":
 # =========================
 if st.session_state.app_step == "dev":
     st.sidebar.header("Case Building")
-    up = st.sidebar.file_uploader("Upload Your Data File", type=["xlsx","xls"])
     if up is not None:
         st.session_state.dev_file_bytes = up.getvalue()
         st.session_state.dev_file_name = up.name
@@ -738,37 +642,49 @@ if st.session_state.app_step == "dev":
     elif st.session_state.dev_file_loaded:
         sticky_header("Case Building", "📄 **Preview uploaded data** using the sidebar button, then click **Run Model**.")
     else:
-        sticky_header("Case Building", "**Upload your data to build a case, then run the model to review development performance.**")
     if run and st.session_state.dev_file_bytes:
         book = read_book_bytes(st.session_state.dev_file_bytes)
         sh_train = find_sheet(book, ["Train","Training","training2","train","training"])
         sh_test  = find_sheet(book, ["Test","Testing","testing2","test","testing"])
         if sh_train is None or sh_test is None:
-            st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training/training2 and Test/Testing/testing2 sheets.</div>', unsafe_allow_html=True)
             st.stop()
-        tr = _normalize_columns(book[sh_train].copy(), FEATURES, TARGET)
-        te = _normalize_columns(book[sh_test].copy(),  FEATURES, TARGET)
-        if not (ensure_cols(tr, FEATURES+[TARGET]) and ensure_cols(te, FEATURES+[TARGET])):
             st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True)
             st.stop()
-        # Predict with exactly the training feature order
-        tr[PRED_COL] = model.predict(_make_X(tr, FEATURES))
-        te[PRED_COL] = model.predict(_make_X(te, FEATURES))
-        st.session_state.results["Train"]=tr; st.session_state.results["Test"]=te
-        st.session_state.results["m_train"]={
-            "R":   pearson_r(tr[TARGET], tr[PRED_COL]),
-            "RMSE": rmse(tr[TARGET], tr[PRED_COL]),
-            "MAPE": mape(tr[TARGET], tr[PRED_COL])
         }
-        st.session_state.results["m_test"]={
-            "R":   pearson_r(te[TARGET], te[PRED_COL]),
-            "RMSE": rmse(te[TARGET], te[PRED_COL]),
-            "MAPE": mape(te[TARGET], te[PRED_COL])
         }
         tr_min = tr[FEATURES].min().to_dict(); tr_max = tr[FEATURES].max().to_dict()
@@ -777,9 +693,9 @@ if st.session_state.app_step == "dev":
     def _dev_block(df, m):
         c1,c2,c3 = st.columns(3)
-        c1.metric("R", f"{m['R']:.3f}")
-        c2.metric("RMSE", f"{m['RMSE']:.3f}")
-        c3.metric("MAPE (%)", f"{m['MAPE']:.2f}")
         st.markdown("""
             <div style='text-align: left; font-size: 0.8em; color: #6b7280; margin-top: -16px; margin-bottom: 8px;'>
                 <strong>R:</strong> Pearson Correlation Coefficient<br>
@@ -789,20 +705,22 @@ if st.session_state.app_step == "dev":
         """, unsafe_allow_html=True)
         col_track, col_cross = st.columns([2, 3], gap="large")
         with col_track:
-            st.plotly_chart(track_plot(df, include_actual=True), use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
         with col_cross:
-            st.pyplot(cross_plot_static(df[TARGET], df[PRED_COL]), use_container_width=False)
     if "Train" in st.session_state.results or "Test" in st.session_state.results:
         tab1, tab2 = st.tabs(["Training", "Testing"])
-        if "Train" in st.session_state.results:
             with tab1: _dev_block(st.session_state.results["Train"], st.session_state.results["m_train"])
         if "Test" in st.session_state.results:
-            with tab2: _dev_block(st.session_state.results["Test"], st.session_state.results["m_test"])
         render_export_button(phase_key="dev")
 # =========================
-# VALIDATION (with actual RHOB)
 # =========================
 if st.session_state.app_step == "validate":
     st.sidebar.header("Validate the Model")
@@ -818,42 +736,48 @@ if st.session_state.app_step == "validate":
     if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
     if st.sidebar.button("Proceed to Prediction ▶", use_container_width=True): st.session_state.app_step="predict"; st.rerun()
-    sticky_header("Validate the Model", "Upload a dataset with the same **features** and **RHOB** to evaluate performance.")
     if go_btn and up is not None:
         book = read_book_bytes(up.getvalue())
-        name = find_sheet(book, ["Validation","Validate","validation2","Val","val"]) or list(book.keys())[0]
-        df = _normalize_columns(book[name].copy(), FEATURES, TARGET)
-        if not ensure_cols(df, FEATURES+[TARGET]):
             st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
-        df[PRED_COL] = model.predict(_make_X(df, FEATURES))
-        st.session_state.results["Validate"]=df
         ranges = st.session_state.train_ranges; oor_pct = 0.0; tbl=None
         if ranges:
-            any_viol = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in FEATURES}).any(axis=1)
-            oor_pct = float(any_viol.mean()*100.0)
             if any_viol.any():
                 tbl = df.loc[any_viol, FEATURES].copy()
                 for c in FEATURES:
                     if pd.api.types.is_numeric_dtype(tbl[c]): tbl[c] = tbl[c].round(3)
-                tbl["Violations"] = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in FEATURES}).loc[any_viol].apply(
                     lambda r:", ".join([c for c,v in r.items() if v]), axis=1
                 )
-        st.session_state.results["m_val"]={
-            "R": pearson_r(df[TARGET], df[PRED_COL]),
-            "RMSE": rmse(df[TARGET], df[PRED_COL]),
-            "MAPE": mape(df[TARGET], df[PRED_COL])
         }
-        st.session_state.results["sv_val"]={"n":len(df), "pred_min":float(df[PRED_COL].min()), "pred_max":float(df[PRED_COL].max()), "oor":oor_pct}
-        st.session_state.results["oor_tbl"]=tbl
     if "Validate" in st.session_state.results:
         m = st.session_state.results["m_val"]
         c1,c2,c3 = st.columns(3)
-        c1.metric("R", f"{m['R']:.3f}")
-        c2.metric("RMSE", f"{m['RMSE']:.3f}")
-        c3.metric("MAPE (%)", f"{m['MAPE']:.2f}")
         st.markdown("""
             <div style='text-align: left; font-size: 0.8em; color: #6b7280; margin-top: -16px; margin-bottom: 8px;'>
                 <strong>R:</strong> Pearson Correlation Coefficient<br>
@@ -867,7 +791,8 @@ if st.session_state.app_step == "validate":
             st.plotly_chart(track_plot(st.session_state.results["Validate"], include_actual=True),
                             use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
         with col_cross:
-            st.pyplot(cross_plot_static(st.session_state.results["Validate"][TARGET],
                                         st.session_state.results["Validate"][PRED_COL]),
                       use_container_width=False)
@@ -880,10 +805,10 @@ if st.session_state.app_step == "validate":
             df_centered_rounded(st.session_state.results["oor_tbl"])
 # =========================
-# PREDICTION (no actual RHOB)
 # =========================
 if st.session_state.app_step == "predict":
-    st.sidebar.header("Prediction (No Actual RHOB)")
     up = st.sidebar.file_uploader("Upload Prediction Excel", type=["xlsx","xls"])
     if up is not None:
         book = read_book_bytes(up.getvalue())
@@ -895,21 +820,22 @@ if st.session_state.app_step == "predict":
     go_btn = st.sidebar.button("Predict", type="primary", use_container_width=True)
     if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
-    sticky_header("Prediction", "Upload a dataset with the feature columns (no **RHOB**).")
     if go_btn and up is not None:
         book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
-        df = _normalize_columns(book[name].copy(), FEATURES, TARGET)
-        if not ensure_cols(df, FEATURES):
             st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
-        df[PRED_COL] = model.predict(_make_X(df, FEATURES))
-        st.session_state.results["PredictOnly"]=df
         ranges = st.session_state.train_ranges; oor_pct = 0.0
         if ranges:
-            any_viol = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in FEATURES}).any(axis=1)
-            oor_pct = float(any_viol.mean()*100.0)
-        st.session_state.results["sv_pred"]={
             "n":len(df),
             "pred_min":float(df[PRED_COL].min()),
             "pred_max":float(df[PRED_COL].max()),
@@ -920,7 +846,6 @@ if st.session_state.app_step == "predict":
     if "PredictOnly" in st.session_state.results:
         df = st.session_state.results["PredictOnly"]; sv = st.session_state.results["sv_pred"]
         col_left, col_right = st.columns([2,3], gap="large")
         with col_left:
             table = pd.DataFrame({
@@ -934,7 +859,6 @@ if st.session_state.app_step == "predict":
         with col_right:
             st.plotly_chart(track_plot(df, include_actual=False),
                             use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
         render_export_button(phase_key="predict")
 # =========================
@@ -955,7 +879,7 @@ if st.session_state.show_preview_modal:
             tabs = st.tabs(names)
             for t, name in zip(tabs, names):
                 with t:
-                    df = _normalize_columns(book_to_preview[name], FEATURES, TARGET)
                     t1, t2 = st.tabs(["Tracks", "Summary"])
                     with t1:
                         st.pyplot(preview_tracks(df, FEATURES), use_container_width=True)

+# app.py — ST_Min_Horizontal_Stress (σ_hmin)
+# Mirrors RHOB/TS apps: same flow, preview tracks, export picker, password gate
 import io, json, os, base64, math
 from pathlib import Path
 import numpy as np
 import joblib
 from datetime import datetime
+import os
+from functools import lru_cache
+from huggingface_hub import hf_hub_download
+# Matplotlib for static previews & cross-plot
 import matplotlib
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
 import plotly.graph_objects as go
 from sklearn.metrics import mean_squared_error
+REPO_ID = "Smart-Thinking/minstress-model"  # <-- update this
+FILENAME = "minstress_model.joblib"
+@lru_cache
+def load_model():
+    # if private, we'll add a token in Step 5 and pass it here
+    token = os.environ.get("HF_TOKEN", None)
+    path = hf_hub_download(
+        repo_id=REPO_ID,
+        filename=FILENAME,
+        repo_type="model",
+        token=token
+    )
+    return joblib.load(path)
+# get your model once and reuse it
+model = load_model()
 # =========================
+# Constants / Defaults
 # =========================
+APP_NAME = "ST_Min_Horizontal_Stress"
+TAGLINE  = "Real-Time Minimum Horizontal Stress Prediction"
+# Defaults — can be overridden by meta JSON
+FEATURES   = ["WOB (klbf)", "Torque (kft.lbf)", "SPP (psi)", "RPM (1/min)", "ROP (ft/h)", "Flow Rate (gpm)"]
+TARGET     = "σhmin (MPa)"        # actual column in sheets
+PRED_COL   = "σhmin_Pred"         # prediction column name to create
+ACTUAL_COL = None                 # sometimes target is also the actual; you can set a dedicated actual column
+TRANSFORM  = "none"               # "none" | "log10" | "ln"
+UNITS      = "MPa"                # only for labels
 MODELS_DIR = Path("models")
+DEFAULT_MODEL = MODELS_DIR / "minstress_model.joblib"
+META_CANDIDATES = [
+    MODELS_DIR / "minstress_meta.json",
+    MODELS_DIR / "meta.json"
+]
 MODEL_FALLBACKS = [MODELS_DIR / "model.joblib", MODELS_DIR / "model.pkl"]
+# Colors
+COLORS = {"pred": "#1f77b4", "actual": "#f2b702", "ref": "#5a5a5a"}
 # ---- Plot sizing ----
 CROSS_W = 350
 st.markdown("""
 <style>
  .brand-logo { width: 200px; height: auto; object-fit: contain; }
+ .centered-container { display:flex; flex-direction:column; align-items:center; text-align:center; }
+ .st-message-box { background:#f0f2f6; color:#333; padding:10px; border-radius:10px; border:1px solid #e6e9ef; }
+ .st-message-box.st-success { background:#d4edda; color:#155724; border-color:#c3e6cb; }
+ .st-message-box.st-warning { background:#fff3cd; color:#856404; border-color:#ffeeba; }
+ .st-message-box.st-error   { background:#f8d7da; color:#721c24; border-color:#f5c6cb; }
  .main .block-container { overflow: unset !important; }
  div[data-testid="stVerticalBlock"] { overflow: unset !important; }
  div[data-testid="stExpander"] > details > summary {
         required = st.secrets.get("APP_PASSWORD", "")
     except Exception:
         required = os.environ.get("APP_PASSWORD", "")
     if not required:
         st.warning("Set APP_PASSWORD in Secrets (or environment) and restart.")
         st.stop()
     if st.session_state.get("auth_ok", False):
         return
     st.sidebar.markdown(f"""
         <div class="centered-container">
+            <img src="{inline_logo('logo.png')}" class="brand-logo">
             <div style='font-weight:800;font-size:1.2rem; margin-top: 10px;'>{APP_NAME}</div>
             <div style='color:#667085;'>Smart Thinking • Secure Access</div>
         </div>
+    """, unsafe_allow_html=True)
     pwd = st.sidebar.text_input("Access key", type="password", placeholder="••••••••")
     if st.sidebar.button("Unlock", type="primary"):
         if pwd == required:
 def rmse(y_true, y_pred) -> float:
     return float(np.sqrt(mean_squared_error(y_true, y_pred)))
+def mape(y_true, y_pred, eps: float = 1e-9) -> float:
+    a = np.asarray(y_true, dtype=float)
+    p = np.asarray(y_pred, dtype=float)
+    den = np.maximum(np.abs(a), eps)
+    return float(np.mean(np.abs((a - p) / den)) * 100.0)
 def pearson_r(y_true, y_pred) -> float:
     a = np.asarray(y_true, dtype=float)
     p = np.asarray(y_pred, dtype=float)
     if np.all(a == a[0]) or np.all(p == p[0]): return float("nan")
     return float(np.corrcoef(a, p)[0, 1])
 @st.cache_resource(show_spinner=False)
 def load_model(model_path: str):
     return joblib.load(model_path)
 def read_book_bytes(b: bytes):
     return parse_excel(b) if b else {}
+def _excel_engine() -> str:
+    try:
+        import xlsxwriter  # noqa: F401
+        return "xlsxwriter"
+    except Exception:
+        return "openpyxl"
+def _excel_safe_name(name: str) -> str:
+    bad = '[]:*?/\\'
+    safe = ''.join('_' if ch in bad else ch for ch in str(name))
+    return safe[:31]
+def _round_numeric(df: pd.DataFrame, ndigits: int = 3) -> pd.DataFrame:
     out = df.copy()
+    for c in out.columns:
+        if pd.api.types.is_float_dtype(out[c]) or pd.api.types.is_integer_dtype(out[c]):
+            out[c] = pd.to_numeric(out[c], errors="coerce").round(ndigits)
+    return out
+def df_centered_rounded(df: pd.DataFrame, hide_index=True):
+    out = df.copy()
+    numcols = out.select_dtypes(include=[np.number]).columns
+    styler = (
+        out.style
+           .format({c: "{:.3f}" for c in numcols})
+           .set_properties(**{"text-align": "center"})
+           .set_table_styles(TABLE_CENTER_CSS)
+    )
+    st.dataframe(styler, use_container_width=True, hide_index=hide_index)
 def ensure_cols(df: pd.DataFrame, cols: list[str]) -> bool:
     miss = [c for c in cols if c not in df.columns]
         return False
     return True
 def _nice_tick0(xmin: float, step: float = 0.1) -> float:
     return step * math.floor(xmin / step) if np.isfinite(xmin) else xmin
+# ---------- Aliasing / Normalization ----------
+def _normalize_columns(df: pd.DataFrame, canonical_features: list[str], target_name: str, aliases: dict|None) -> pd.DataFrame:
     out = df.copy()
+    out.columns = [str(c).strip().replace(" ,", ",").replace(",  ", ", ").replace("  ", " ") for c in out.columns]
+    if not aliases:
+        return out
+    # build mapping for any alias -> canonical
+    mapping = {}
+    for can, alist in aliases.items():
+        for a in alist:
+            if a in out.columns and can != a:
+                mapping[a] = can
+    # also allow direct canonical name
+    return out.rename(columns=mapping)
+# ---------- Transform helpers ----------
+def _inv_transform(x: np.ndarray, transform: str) -> np.ndarray:
+    t = (transform or "none").lower()
+    if t in ("log10", "log_10", "log10()"): return np.power(10.0, x)
+    if t in ("ln", "log", "loge", "log_e", "natural"): return np.exp(x)
+    return x
 # ---------- Build X exactly as trained ----------
 def _make_X(df: pd.DataFrame, features: list[str]) -> pd.DataFrame:
     X = df.reindex(columns=features, copy=False)
     for c in X.columns:
         X[c] = pd.to_numeric(X[c], errors="coerce")
     return X
+# =========================
+# Export helpers
+# =========================
 def _summary_table(df: pd.DataFrame, cols: list[str]) -> pd.DataFrame:
     cols = [c for c in cols if c in df.columns]
+    if not cols: return pd.DataFrame()
     tbl = (df[cols]
+           .agg(['min','max','mean','std'])
+           .T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"})
+           .reset_index(names="Field"))
     return _round_numeric(tbl, 3)
 def _train_ranges_df(ranges: dict[str, tuple[float, float]]) -> pd.DataFrame:
+    if not ranges: return pd.DataFrame()
     df = pd.DataFrame(ranges).T.reset_index()
     df.columns = ["Feature", "Min", "Max"]
     return _round_numeric(df, 3)
         ws.set_column(i, i, max(min_w, min(max_len + 2, max_w)))
     ws.freeze_panes(1, 0)
 def _available_sections() -> list[str]:
     res = st.session_state.get("results", {})
     sections = []
+    if "Train" in res:       sections += ["Training","Training_Metrics","Training_Summary"]
+    if "Test" in res:        sections += ["Testing","Testing_Metrics","Testing_Summary"]
+    if "Validate" in res:    sections += ["Validation","Validation_Metrics","Validation_Summary","Validation_OOR"]
+    if "PredictOnly" in res: sections += ["Prediction","Prediction_Summary"]
     if st.session_state.get("train_ranges"): sections += ["Training_Ranges"]
     sections += ["Info"]
     return sections
 def build_export_workbook(selected: list[str], ndigits: int = 3, do_autofit: bool = True) -> tuple[bytes|None, str|None, list[str]]:
     res = st.session_state.get("results", {})
     if not res: return None, None, []
     sheets: dict[str, pd.DataFrame] = {}
     order: list[str] = []
+    def _add(name: str, df: pd.DataFrame):
+        if df is None or (isinstance(df, pd.DataFrame) and df.empty): return
+        sheets[name] = _round_numeric(df, ndigits); order.append(name)
+    if "Training" in selected and "Train" in res: _add("Training", res["Train"])
+    if "Training_Metrics" in selected and res.get("m_train"): _add("Training_Metrics", pd.DataFrame([res["m_train"]]))
     if "Training_Summary" in selected and "Train" in res:
         tr_cols = FEATURES + [c for c in [TARGET, PRED_COL] if c in res["Train"].columns]
+        _add("Training_Summary", _summary_table(res["Train"], tr_cols))
+    if "Testing" in selected and "Test" in res: _add("Testing", res["Test"])
+    if "Testing_Metrics" in selected and res.get("m_test"): _add("Testing_Metrics", pd.DataFrame([res["m_test"]]))
     if "Testing_Summary" in selected and "Test" in res:
         te_cols = FEATURES + [c for c in [TARGET, PRED_COL] if c in res["Test"].columns]
+        _add("Testing_Summary", _summary_table(res["Test"], te_cols))
+    if "Validation" in selected and "Validate" in res: _add("Validation", res["Validate"])
+    if "Validation_Metrics" in selected and res.get("m_val"): _add("Validation_Metrics", pd.DataFrame([res["m_val"]]))
+    if "Validation_Summary" in selected and res.get("sv_val"): _add("Validation_Summary", pd.DataFrame([res["sv_val"]]))
     if "Validation_OOR" in selected and isinstance(res.get("oor_tbl"), pd.DataFrame) and not res["oor_tbl"].empty:
+        _add("Validation_OOR", res["oor_tbl"].reset_index(drop=True))
+    if "Prediction" in selected and "PredictOnly" in res: _add("Prediction", res["PredictOnly"])
+    if "Prediction_Summary" in selected and res.get("sv_pred"): _add("Prediction_Summary", pd.DataFrame([res["sv_pred"]]))
     if "Training_Ranges" in selected and st.session_state.get("train_ranges"):
+        _add("Training_Ranges", _train_ranges_df(st.session_state["train_ranges"]))
     if "Info" in selected:
         info = pd.DataFrame([
             {"Key": "Features",   "Value": ", ".join(FEATURES)},
             {"Key": "ExportedAt", "Value": datetime.now().strftime("%Y-%m-%d %H:%M:%S")},
         ])
+        _add("Info", info)
     if not order: return None, None, []
     engine = _excel_engine()
     with pd.ExcelWriter(bio, engine=engine) as writer:
         for name in order:
+            df = sheets[name]; sheet = _excel_safe_name(name)
             df.to_excel(writer, sheet_name=sheet, index=False)
+            if do_autofit: _excel_autofit(writer, sheet, df)
     bio.seek(0)
+    fname = f"MinStress_Export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
     return bio.getvalue(), fname, order
 def render_export_button(phase_key: str) -> None:
     res = st.session_state.get("results", {})
     if not res: return
     st.divider()
     st.markdown("### Export to Excel")
     options = _available_sections()
     selected_sheets = st.multiselect(
         "Sheets to include",
         options=options,
         default=[],
         placeholder="Choose option(s)",
+        help="Pick the sheets you want in the Excel export.",
         key=f"sheets_{phase_key}",
     )
     if not selected_sheets:
+        st.caption("Select one or more sheets above to enable export.")
+        st.download_button("⬇️ Export Excel", data=b"", file_name="MinStress_Export.xlsx",
+                           mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+                           disabled=True, key=f"download_{phase_key}")
         return
     data, fname, names = build_export_workbook(selected=selected_sheets, ndigits=3, do_autofit=True)
+    if names: st.caption("Will include: " + ", ".join(names))
+    st.download_button("⬇️ Export Excel", data=(data or b""), file_name=(fname or "MinStress_Export.xlsx"),
+                       mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+                       disabled=(data is None), key=f"download_{phase_key}")
 # =========================
 # Cross plot (Matplotlib)
 # =========================
+def cross_plot_static(actual, pred):
     a = pd.Series(actual, dtype=float)
+    p = pd.Series(pred,   dtype=float)
+    lo = float(min(a.min(), p.min())); hi = float(max(a.max(), p.max()))
     pad = 0.03 * (hi - lo if hi > lo else 1.0)
     lo2, hi2 = lo - pad, hi + pad
     ticks = np.linspace(lo2, hi2, 5)
     dpi = 110
     fig, ax = plt.subplots(figsize=(CROSS_W / dpi, CROSS_H / dpi), dpi=dpi, constrained_layout=False)
     ax.scatter(a, p, s=14, c=COLORS["pred"], alpha=0.9, linewidths=0)
     ax.plot([lo2, hi2], [lo2, hi2], linestyle="--", linewidth=1.2, color=COLORS["ref"])
+    ax.set_xlim(lo2, hi2); ax.set_ylim(lo2, hi2)
+    ax.set_xticks(ticks);  ax.set_yticks(ticks)
     ax.set_aspect("equal", adjustable="box")
     fmt = FuncFormatter(lambda x, _: f"{x:.2f}")
     ax.xaxis.set_major_formatter(fmt); ax.yaxis.set_major_formatter(fmt)
+    ax.set_xlabel(f"Actual Min Stress ({UNITS})",  fontweight="bold", fontsize=10, color="black")
+    ax.set_ylabel(f"Predicted Min Stress ({UNITS})", fontweight="bold", fontsize=10, color="black")
     ax.tick_params(labelsize=6, colors="black")
     ax.grid(True, linestyle=":", alpha=0.3)
 def track_plot(df, include_actual=True):
     depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
     if depth_col is not None:
+        y = pd.to_numeric(df[depth_col], errors="coerce"); ylab = depth_col
+        y_range = [float(np.nanmax(y)), float(np.nanmin(y))]  # reversed
     else:
         y = pd.Series(np.arange(1, len(df) + 1)); ylab = "Point Index"
         y_range = [float(y.max()), float(y.min())]
     x_series = pd.Series(df.get(PRED_COL, pd.Series(dtype=float))).astype(float)
+    if include_actual and (ACTUAL_COL or TARGET) in df.columns:
+        act_col = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in df.columns) else TARGET
+        x_series = pd.concat([x_series, pd.Series(df[act_col]).astype(float)], ignore_index=True)
     x_lo, x_hi = float(x_series.min()), float(x_series.max())
     x_pad = 0.03 * (x_hi - x_lo if x_hi > x_lo else 1.0)
     xmin, xmax = x_lo - x_pad, x_hi + x_pad
             x=df[PRED_COL], y=y, mode="lines",
             line=dict(color=COLORS["pred"], width=1.8),
             name=PRED_COL,
+            hovertemplate=f"{PRED_COL}: "+"%{x:.2f}<br>"+ylab+": %{y}<extra></extra>"
         ))
+    if include_actual and ((ACTUAL_COL and ACTUAL_COL in df.columns) or TARGET in df.columns):
+        act_col = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in df.columns) else TARGET
         fig.add_trace(go.Scatter(
+            x=df[act_col], y=y, mode="lines",
             line=dict(color=COLORS["actual"], width=2.0, dash="dot"),
+            name=f"{act_col} (actual)",
+            hovertemplate=f"{act_col}: "+"%{x:.2f}<br>"+ylab+": %{y}<extra></extra>"
         ))
     fig.update_layout(
                     bgcolor="rgba(255,255,255,0.75)", bordercolor="#ccc", borderwidth=1),
         legend_title_text=""
     )
     fig.update_xaxes(
+        title_text=f"Min Stress ({UNITS})",
         title_font=dict(size=20, family=BOLD_FONT, color="#000"),
         tickfont=dict(size=15, family=BOLD_FONT, color="#000"),
+        side="top", range=[xmin, xmax],
+        ticks="outside", tickformat=",.2f", tickmode="auto", tick0=tick0,
+        showline=True, linewidth=1.2, linecolor="#444", mirror=True,
+        showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True
     )
     fig.update_yaxes(
         title_text=ylab,
         title_font=dict(size=20, family=BOLD_FONT, color="#000"),
     )
     return fig
+# ---------- Preview (matplotlib) — colorful tracks, y ticks only on left ----------
 def preview_tracks(df: pd.DataFrame, cols: list[str]):
     cols = [c for c in cols if c in df.columns]
     n = len(cols)
     if n == 0:
         ax.axis("off")
         return fig
     depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
     if depth_col is not None:
         idx = pd.to_numeric(df[depth_col], errors="coerce")
         y_label = depth_col
+        y_min, y_max = float(np.nanmin(idx)), float(np.nanmax(idx))
     else:
         idx = pd.Series(np.arange(1, len(df) + 1))
         y_label = "Point Index"
+        y_min, y_max = float(idx.min()), float(idx.max())
     cmap = plt.get_cmap("tab20")
     col_colors = {col: cmap(i % cmap.N) for i, col in enumerate(cols)}
+    fig, axes = plt.subplots(1, n, figsize=(2.4 * n, 7.0), sharey=True, dpi=100)
     if n == 1:
         axes = [axes]
     for i, (ax, col) in enumerate(zip(axes, cols)):
         x = pd.to_numeric(df[col], errors="coerce")
+        ax.plot(x, idx, '-', lw=1.6, color=col_colors[col])
         ax.set_xlabel(col)
         ax.xaxis.set_label_position('top')
         ax.xaxis.tick_top()
+        ax.set_ylim(y_max, y_min)  # reversed depth down
         ax.grid(True, linestyle=":", alpha=0.3)
         if i == 0:
             ax.set_ylabel(y_label)
         else:
+            ax.tick_params(labelleft=False)  # hide ticks on all but left-most
             ax.set_ylabel("")
     fig.tight_layout()
     except Exception:
         return None
+# load model
 mpath = ensure_model()
 if not mpath:
+    st.error("Model not found. Upload models/minstress_model.joblib (or set MODEL_URL).")
     st.stop()
 try:
     model = load_model(str(mpath))
     st.error(f"Failed to load model: {e}")
     st.stop()
+# meta: features / target / pred_col / aliases / transform / units / actual_col
 meta = {}
+meta_path = next((p for p in META_CANDIDATES if p.exists()), None)
+ALIASES = None
 if meta_path:
     try:
         meta = json.loads(meta_path.read_text(encoding="utf-8"))
+        FEATURES   = meta.get("features", FEATURES)
+        TARGET     = meta.get("target", TARGET)
+        PRED_COL   = meta.get("pred_col", PRED_COL)
+        ACTUAL_COL = meta.get("actual_col", ACTUAL_COL)
+        TRANSFORM  = meta.get("transform", TRANSFORM)
+        UNITS      = meta.get("units", UNITS)
+        ALIASES    = meta.get("feature_aliases")  # {"RPM (1/min)": ["RPM", "RPM(1/min)"], ...}
     except Exception as e:
         st.warning(f"Could not parse meta file ({meta_path.name}): {e}")
 # =========================
 # Session state
 # =========================
 st.session_state.setdefault("app_step", "intro")
 st.session_state.setdefault("results", {})
+st.session_state.setdefault("train_ranges", None)
 st.session_state.setdefault("dev_file_name","")
 st.session_state.setdefault("dev_file_bytes",b"")
 st.session_state.setdefault("dev_file_loaded",False)
 # =========================
 st.sidebar.markdown(f"""
     <div class="centered-container">
+        <img src="{inline_logo('logo.png')}" class="brand-logo">
         <div style='font-weight:800;font-size:1.2rem;'>{APP_NAME}</div>
         <div style='color:#667085;'>{TAGLINE}</div>
     </div>
+""", unsafe_allow_html=True)
 def sticky_header(title, message):
     st.markdown(
 # =========================
 if st.session_state.app_step == "intro":
     st.header("Welcome!")
+    st.markdown(f"This software is developed by *Smart Thinking AI-Solutions Team* to estimate **Minimum Horizontal Stress** ({UNITS}) from drilling/offset data.")
     st.subheader("How It Works")
     st.markdown(
+        "1) **Upload your data to build the case and preview the model performance.**  \n"
+        "2) Click **Run Model** to compute metrics and plots.  \n"
+        "3) **Proceed to Validation** (with actual stress) or **Proceed to Prediction** (no actual)."
     )
     if st.button("Start Showcase", type="primary"):
         st.session_state.app_step = "dev"; st.rerun()
 # =========================
 if st.session_state.app_step == "dev":
     st.sidebar.header("Case Building")
+    up = st.sidebar.file_uploader("Upload Your Data File (Train/Test sheets)", type=["xlsx","xls"])
     if up is not None:
         st.session_state.dev_file_bytes = up.getvalue()
         st.session_state.dev_file_name = up.name
     elif st.session_state.dev_file_loaded:
         sticky_header("Case Building", "📄 **Preview uploaded data** using the sidebar button, then click **Run Model**.")
     else:
+        sticky_header("Case Building", "**Upload your data to build a case, then run the model to review performance.**")
     if run and st.session_state.dev_file_bytes:
         book = read_book_bytes(st.session_state.dev_file_bytes)
+        # Expect Train/Test sheets already prepared (no random split)
+        def find_sheet(book, names):
+            low2orig = {k.lower(): k for k in book.keys()}
+            for nm in names:
+                if nm.lower() in low2orig: return low2orig[nm.lower()]
+            return None
         sh_train = find_sheet(book, ["Train","Training","training2","train","training"])
         sh_test  = find_sheet(book, ["Test","Testing","testing2","test","testing"])
         if sh_train is None or sh_test is None:
+            st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training and Test/Testing sheets.</div>', unsafe_allow_html=True)
             st.stop()
+        tr0 = _normalize_columns(book[sh_train].copy(), FEATURES, TARGET, ALIASES)
+        te0 = _normalize_columns(book[sh_test].copy(),  FEATURES, TARGET, ALIASES)
+        # Determine actual column: ACTUAL_COL (preferred) else TARGET
+        actual_col = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in tr0.columns) else TARGET
+        if not (ensure_cols(tr0, FEATURES+[actual_col]) and ensure_cols(te0, FEATURES+[actual_col])):
             st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True)
             st.stop()
+        # Predict using the trained feature order
+        tr = tr0.copy(); te = te0.copy()
+        tr[PRED_COL] = _inv_transform(model.predict(_make_X(tr0, FEATURES)), TRANSFORM)
+        te[PRED_COL] = _inv_transform(model.predict(_make_X(te0, FEATURES)), TRANSFORM)
+        # Metrics
+        st.session_state.results["Train"] = tr
+        st.session_state.results["Test"]  = te
+        st.session_state.results["m_train"] = {
+            "R":    pearson_r(tr[actual_col], tr[PRED_COL]),
+            "RMSE": rmse(tr[actual_col], tr[PRED_COL]),
+            "MAPE%": mape(tr[actual_col], tr[PRED_COL]),
         }
+        st.session_state.results["m_test"] = {
+            "R":    pearson_r(te[actual_col], te[PRED_COL]),
+            "RMSE": rmse(te[actual_col], te[PRED_COL]),
+            "MAPE%": mape(te[actual_col], te[PRED_COL]),
         }
         tr_min = tr[FEATURES].min().to_dict(); tr_max = tr[FEATURES].max().to_dict()
     def _dev_block(df, m):
         c1,c2,c3 = st.columns(3)
+        c1.metric("R",     f"{m['R']:.3f}")
+        c2.metric("RMSE",  f"{m['RMSE']:.2f}")
+        c3.metric("MAPE%", f"{m['MAPE%']:.2f}")
         st.markdown("""
             <div style='text-align: left; font-size: 0.8em; color: #6b7280; margin-top: -16px; margin-bottom: 8px;'>
                 <strong>R:</strong> Pearson Correlation Coefficient<br>
         """, unsafe_allow_html=True)
         col_track, col_cross = st.columns([2, 3], gap="large")
         with col_track:
+            st.plotly_chart(track_plot(df, include_actual=True),
+                            use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
         with col_cross:
+            act_col = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in df.columns) else TARGET
+            st.pyplot(cross_plot_static(df[act_col], df[PRED_COL]), use_container_width=False)
     if "Train" in st.session_state.results or "Test" in st.session_state.results:
         tab1, tab2 = st.tabs(["Training", "Testing"])
+        if "Train" in st.session_state.results:
             with tab1: _dev_block(st.session_state.results["Train"], st.session_state.results["m_train"])
         if "Test" in st.session_state.results:
+            with tab2: _dev_block(st.session_state.results["Test"],  st.session_state.results["m_test"])
         render_export_button(phase_key="dev")
 # =========================
+# VALIDATION (with actual)
 # =========================
 if st.session_state.app_step == "validate":
     st.sidebar.header("Validate the Model")
     if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
     if st.sidebar.button("Proceed to Prediction ▶", use_container_width=True): st.session_state.app_step="predict"; st.rerun()
+    sticky_header("Validate the Model", "Upload a dataset with the same **features** and **actual stress** to evaluate performance.")
     if go_btn and up is not None:
         book = read_book_bytes(up.getvalue())
+        # choose sheet named Validation/Validate/Val or first
+        names = list(book.keys())
+        name = next((s for s in names if s.lower() in ("validation","validate","validation2","val","val2")), names[0])
+        df0 = _normalize_columns(book[name].copy(), FEATURES, TARGET, ALIASES)
+        act_col = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in df0.columns) else TARGET
+        if not ensure_cols(df0, FEATURES+[act_col]):
             st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
+        df = df0.copy()
+        df[PRED_COL] = _inv_transform(model.predict(_make_X(df0, FEATURES)), TRANSFORM)
+        st.session_state.results["Validate"] = df
+        # OOR check vs training ranges
         ranges = st.session_state.train_ranges; oor_pct = 0.0; tbl=None
         if ranges:
+            any_viol = pd.DataFrame({f:(df[f] < ranges[f][0]) | (df[f] > ranges[f][1]) for f in FEATURES}).any(axis=1)
+            oor_pct = float(any_viol.mean() * 100.0)
             if any_viol.any():
                 tbl = df.loc[any_viol, FEATURES].copy()
                 for c in FEATURES:
                     if pd.api.types.is_numeric_dtype(tbl[c]): tbl[c] = tbl[c].round(3)
+                tbl["Violations"] = pd.DataFrame({f:(df[f] < ranges[f][0]) | (df[f] > ranges[f][1]) for f in FEATURES}).loc[any_viol].apply(
                     lambda r:", ".join([c for c,v in r.items() if v]), axis=1
                 )
+        st.session_state.results["m_val"] = {
+            "R":     pearson_r(df[act_col], df[PRED_COL]),
+            "RMSE":  rmse(df[act_col], df[PRED_COL]),
+            "MAPE%": mape(df[act_col], df[PRED_COL]),
         }
+        st.session_state.results["sv_val"] = {"n":len(df), "pred_min":float(df[PRED_COL].min()), "pred_max":float(df[PRED_COL].max()), "oor":oor_pct}
+        st.session_state.results["oor_tbl"] = tbl
     if "Validate" in st.session_state.results:
         m = st.session_state.results["m_val"]
         c1,c2,c3 = st.columns(3)
+        c1.metric("R", f"{m['R']:.3f}"); c2.metric("RMSE", f"{m['RMSE']:.2f}"); c3.metric("MAPE%", f"{m['MAPE%']:.2f}")
         st.markdown("""
             <div style='text-align: left; font-size: 0.8em; color: #6b7280; margin-top: -16px; margin-bottom: 8px;'>
                 <strong>R:</strong> Pearson Correlation Coefficient<br>
             st.plotly_chart(track_plot(st.session_state.results["Validate"], include_actual=True),
                             use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
         with col_cross:
+            act_col = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in st.session_state.results["Validate"].columns) else TARGET
+            st.pyplot(cross_plot_static(st.session_state.results["Validate"][act_col],
                                         st.session_state.results["Validate"][PRED_COL]),
                       use_container_width=False)
             df_centered_rounded(st.session_state.results["oor_tbl"])
 # =========================
+# PREDICTION (no actual)
 # =========================
 if st.session_state.app_step == "predict":
+    st.sidebar.header("Prediction (No Actual)")
     up = st.sidebar.file_uploader("Upload Prediction Excel", type=["xlsx","xls"])
     if up is not None:
         book = read_book_bytes(up.getvalue())
     go_btn = st.sidebar.button("Predict", type="primary", use_container_width=True)
     if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
+    sticky_header("Prediction", "Upload a dataset with the feature columns (no actual column).")
     if go_btn and up is not None:
         book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
+        df0 = _normalize_columns(book[name].copy(), FEATURES, TARGET, ALIASES)
+        if not ensure_cols(df0, FEATURES):
             st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
+        df = df0.copy()
+        df[PRED_COL] = _inv_transform(model.predict(_make_X(df0, FEATURES)), TRANSFORM)
+        st.session_state.results["PredictOnly"] = df
         ranges = st.session_state.train_ranges; oor_pct = 0.0
         if ranges:
+            any_viol = pd.DataFrame({f:(df[f] < ranges[f][0]) | (df[f] > ranges[f][1]) for f in FEATURES}).any(axis=1)
+            oor_pct = float(any_viol.mean() * 100.0)
+        st.session_state.results["sv_pred"] = {
             "n":len(df),
             "pred_min":float(df[PRED_COL].min()),
             "pred_max":float(df[PRED_COL].max()),
     if "PredictOnly" in st.session_state.results:
         df = st.session_state.results["PredictOnly"]; sv = st.session_state.results["sv_pred"]
         col_left, col_right = st.columns([2,3], gap="large")
         with col_left:
             table = pd.DataFrame({
         with col_right:
             st.plotly_chart(track_plot(df, include_actual=False),
                             use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
         render_export_button(phase_key="predict")
 # =========================
             tabs = st.tabs(names)
             for t, name in zip(tabs, names):
                 with t:
+                    df = _normalize_columns(book_to_preview[name], FEATURES, TARGET, ALIASES)
                     t1, t2 = st.tabs(["Tracks", "Summary"])
                     with t1:
                         st.pyplot(preview_tracks(df, FEATURES), use_container_width=True)