Spaces:

Smart-Thinking
/

UCS

Sleeping

App Files Files Community

UCS2014 commited on Aug 26, 2025

Commit

843472e

verified ·

1 Parent(s): ac61d22

Update app.py

Browse files

Files changed (1) hide show

app.py +395 -493

app.py CHANGED Viewed

@@ -1,83 +1,75 @@
-# app.py
-import io, os, json, base64
 from pathlib import Path
-import numpy as np
-import pandas as pd
 import streamlit as st
 import joblib
-# =========================
-# Constants / defaults
-# =========================
 FEATURES = ["Q, gpm", "SPP(psi)", "T (kft.lbf)", "WOB (klbf)", "ROP (ft/h)"]
 TARGET = "UCS"
 MODELS_DIR = Path("models")
 DEFAULT_MODEL = MODELS_DIR / "ucs_rf.joblib"
 MODEL_FALLBACKS = [MODELS_DIR / "model.joblib", MODELS_DIR / "model.pkl"]
-COLORS = {
-    "pred": "#1f77b4",   # blue
-    "actual": "#f2c94c", # yellow
-    "ref": "#444444",    # 1:1 line
-}
-# =========================
-# Page config + CSS
-# =========================
 st.set_page_config(page_title="ST_GeoMech_UCS", page_icon="logo.png", layout="wide")
-st.markdown("""
-<style>
-/* Hide default header/footer chrome */
-header, footer {visibility: hidden !important;}
-.stApp { background: #ffffff; }
-/* Sidebar look */
-section[data-testid="stSidebar"] { background: #F6F9FC; }
-/* Hero */
-.st-hero { display:flex; align-items:center; gap:14px; padding: 4px 0 2px 0; }
-.st-hero .brand { width:90px; height:90px; object-fit:contain; }
-.st-hero h1 { margin:0; line-height:1.05; }
-.st-hero .tagline { margin:2px 0 0 2px; color:#6b7280; font-size:1.05rem; font-style:italic; }
-/* Keep hero snug to the top */
-[data-testid="stBlock"] { margin-top:0 !important; }
-/* Global primary button style (Run Model stays blue) */
-.stButton > button {
-  background:#2563eb; color:#fff; font-weight:600; border:none; border-radius:8px;
-  padding:9px 18px;
-}
-/* Orange preview button (scoped by wrapper) */
-#preview-btn button {
-  background:#f59e0b !important; color:#fff !important;
-}
-/* Green proceed button (scoped by wrapper) */
-#proceed-btn button {
-  background:#16a34a !important; color:#fff !important;
-}
-/* Info helper chip */
-.helper-note {
-  background:#e7f0ff; border-radius:10px; padding:14px 16px; border:1px solid #d4e3ff;
-  color:#0f172a;
-}
-/* Make tab content tighter */
-[data-baseweb="tab-border"] { margin-top: 0.2rem; }
-/* Plotly charts use white backgrounds via functions below */
-</style>
-""", unsafe_allow_html=True)
-# =========================
-# Utils
-# =========================
 def inline_logo(path="logo.png") -> str:
     try:
         p = Path(path)
@@ -86,32 +78,26 @@ def inline_logo(path="logo.png") -> str:
     except Exception:
         return ""
-def _get_model_url():
-    # Safe access (prevents the "No secrets files" banner)
-    try:
-        return (st.secrets.get("MODEL_URL", "") or os.environ.get("MODEL_URL", "") or "").strip()
-    except Exception:
-        return (os.environ.get("MODEL_URL", "") or "").strip()
 @st.cache_data(show_spinner=False)
-def parse_excel_bytes(data_bytes: bytes):
     bio = io.BytesIO(data_bytes)
     xl = pd.ExcelFile(bio)
     return {sh: xl.parse(sh) for sh in xl.sheet_names}
-def ensure_required_columns(df: pd.DataFrame, cols) -> bool:
     miss = [c for c in cols if c not in df.columns]
     if miss:
         st.error(f"Missing columns: {miss}\nFound: {list(df.columns)}")
         return False
     return True
-@st.cache_resource(show_spinner=False)
-def load_model(model_path: str):
-    return joblib.load(model_path)
 def infer_features_from_model(m):
-    # Try scikit-learn feature names if present
     try:
         if hasattr(m, "feature_names_in_") and len(getattr(m, "feature_names_in_")):
             return [str(x) for x in m.feature_names_in_]
@@ -124,25 +110,114 @@ def infer_features_from_model(m):
     except Exception: pass
     return None
-def rmse(y_true, y_pred):  # convenience
-    from sklearn.metrics import mean_squared_error
-    return float(np.sqrt(mean_squared_error(y_true, y_pred)))
-# =========================
-# Model availability
-# =========================
-MODEL_URL = _get_model_url()
 def ensure_model_present() -> Path | None:
     for p in [DEFAULT_MODEL, *MODEL_FALLBACKS]:
-        if p.exists(): return p
     if MODEL_URL:
         try:
             import requests
-        except Exception:
-            st.error("Downloading the model requires 'requests'. Please add it to requirements.txt.")
-            return None
-        try:
             DEFAULT_MODEL.parent.mkdir(parents=True, exist_ok=True)
             with requests.get(MODEL_URL, stream=True) as r:
                 r.raise_for_status()
@@ -151,22 +226,22 @@ def ensure_model_present() -> Path | None:
                         f.write(chunk)
             return DEFAULT_MODEL
         except Exception as e:
-            st.error(f"Failed to download model from MODEL_URL. {e}")
-            return None
     return None
 model_path = ensure_model_present()
 if not model_path:
-    st.error("Model not found. Upload models/ucs_rf.joblib (or set MODEL_URL in Settings → Variables).")
     st.stop()
 try:
     model = load_model(str(model_path))
 except Exception as e:
     st.error(f"Failed to load model: {model_path}\n{e}")
     st.stop()
-# Optional meta overrides
 meta_path = MODELS_DIR / "meta.json"
 if meta_path.exists():
     try:
@@ -176,159 +251,26 @@ if meta_path.exists():
     except Exception:
         pass
 else:
-    _inf = infer_features_from_model(model)
-    if _inf: FEATURES = _inf
-# =========================
-# Plotly helpers (no titles, white background, safe margins)
-# =========================
-def _apply_plotly_base_layout(fig, *, top=40, left=60):
-    fig.update_layout(
-        margin=dict(l=left, r=10, t=top, b=40),
-        paper_bgcolor="#ffffff",
-        plot_bgcolor="#ffffff",
-        font=dict(size=12),
-    )
-    fig.update_xaxes(automargin=True, title_font=dict(size=12), tickfont=dict(size=11))
-    fig.update_yaxes(automargin=True, title_font=dict(size=12), tickfont=dict(size=11))
-    return fig
-def cross_plotly(actual, pred):
-    import plotly.graph_objects as go
-    lo = float(np.nanmin([actual.min(), pred.min()]))
-    hi = float(np.nanmax([actual.max(), pred.max()]))
-    pad = 0.03 * (hi - lo if hi > lo else 1.0)
-    fig = go.Figure()
-    fig.add_trace(go.Scatter(
-        x=actual, y=pred, mode="markers",
-        marker=dict(size=6, color=COLORS["pred"]),
-        hovertemplate="Actual: %{x:.2f}<br>Pred: %{y:.2f}<extra></extra>",
-        showlegend=False, name="Points",
-    ))
-    fig.add_trace(go.Scatter(
-        x=[lo - pad, hi + pad], y=[lo - pad, hi + pad],
-        mode="lines", line=dict(dash="dash", width=1.5, color=COLORS["ref"]),
-        hoverinfo="skip", showlegend=False,
-    ))
-    _apply_plotly_base_layout(fig, top=10, left=60)
-    fig.update_xaxes(
-        title_text="Actual UCS", title_standoff=10,
-        showgrid=True, gridcolor="rgba(0,0,0,0.12)",
-        zeroline=False, scaleanchor="y", scaleratio=1
-    )
-    fig.update_yaxes(
-        title_text="Predicted UCS", title_standoff=10,
-        showgrid=True, gridcolor="rgba(0,0,0,0.12)",
-        zeroline=False
-    )
-    return fig
-def track_plotly(df, include_actual=True):
-    import plotly.graph_objects as go
-    depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
-    if depth_col is not None:
-        y = df[depth_col]; y_label = depth_col
-    else:
-        y = np.arange(1, len(df) + 1); y_label = "Point Index"
-    fig = go.Figure()
-    fig.add_trace(go.Scatter(
-        x=df["UCS_Pred"], y=y, mode="lines",
-        line=dict(color=COLORS["pred"], width=2),
-        name="UCS_Pred",
-        hovertemplate="UCS_Pred: %{x:.2f}<br>"+y_label+": %{y}<extra></extra>"
-    ))
-    if include_actual and TARGET in df.columns:
-        fig.add_trace(go.Scatter(
-            x=df[TARGET], y=y, mode="lines",
-            line=dict(color=COLORS["actual"], dash="dot", width=2.2),
-            name="UCS (actual)",
-            hovertemplate="UCS (actual): %{x:.2f}<br>"+y_label+": %{y}<extra></extra>"
-        ))
-    _apply_plotly_base_layout(fig, top=60, left=70)
-    fig.update_layout(
-        legend=dict(orientation="h", yanchor="bottom", y=1.02, x=0),
-        height=650
-    )
-    fig.update_xaxes(
-        title_text="UCS", side="top", title_standoff=12,
-        showgrid=True, gridcolor="rgba(0,0,0,0.12)"
-    )
-    fig.update_yaxes(
-        title_text=y_label, autorange="reversed", title_standoff=10,
-        showgrid=True, gridcolor="rgba(0,0,0,0.12)"
-    )
-    return fig
-def make_index_tracks_plotly(df: pd.DataFrame, cols: list[str]):
-    from plotly.subplots import make_subplots
-    import plotly.graph_objects as go
-    cols = [c for c in cols if c in df.columns]
-    if not cols:
-        fig = go.Figure()
-        fig.add_annotation(text="No selected columns in sheet", showarrow=False, x=0.5, y=0.5)
-        fig.update_xaxes(visible=False); fig.update_yaxes(visible=False)
-        fig.update_layout(height=200, margin=dict(l=10,r=10,t=10,b=10),
-                          paper_bgcolor="#ffffff", plot_bgcolor="#ffffff")
-        return fig
-    n = len(cols)
-    # IMPORTANT: shared_yaxes (not shared_y)
-    fig = make_subplots(rows=1, cols=n, shared_yaxes=True, horizontal_spacing=0.05)
-    idx = np.arange(1, len(df) + 1)
-    for i, col in enumerate(cols, start=1):
-        fig.add_trace(
-            go.Scatter(
-                x=df[col], y=idx, mode="lines",
-                line=dict(color="#333333", width=1.2),
-                hovertemplate=f"{col}: "+"%{x:.2f}<br>Index: %{y}<extra></extra>",
-                showlegend=False, name=col,
-            ), row=1, col=i
-        )
-        fig.update_xaxes(
-            title_text=col, side="top", title_standoff=10,
-            tickfont=dict(size=10),
-            showgrid=True, gridcolor="rgba(0,0,0,0.12)",
-            row=1, col=i
-        )
-    fig.update_yaxes(
-        autorange="reversed", title_text="Point Index", title_standoff=10,
-        tickfont=dict(size=10),
-        showgrid=True, gridcolor="rgba(0,0,0,0.12)",
-        row=1, col=1
-    )
-    fig.update_layout(
-        height=650,
-        margin=dict(l=60, r=10, t=60, b=40),
-        paper_bgcolor="#ffffff",
-        plot_bgcolor="#ffffff",
-        font=dict(size=12),
-    )
-    return fig
-# =========================
 # Session state defaults
-# =========================
 ss = st.session_state
-ss.setdefault("app_step", "dev")               # intro/dev/predict (you asked to start at dev)
-ss.setdefault("dev_bytes", None)               # raw uploaded bytes
-ss.setdefault("dev_book", None)                # parsed workbook dict
-ss.setdefault("dev_sheet_train", None)         # chosen train sheet
-ss.setdefault("dev_sheet_test", None)          # chosen test sheet
 ss.setdefault("dev_previewed", False)
 ss.setdefault("dev_ran", False)
 ss.setdefault("results", {})
 ss.setdefault("train_ranges", None)
-# =========================
-# Hero header
-# =========================
 st.markdown(
     f"""
     <div class="st-hero">
@@ -342,309 +284,269 @@ st.markdown(
     unsafe_allow_html=True,
 )
-# =========================
-# INTRO (kept for completeness – you said start in dev)
-# =========================
 if ss.app_step == "intro":
     st.header("Welcome!")
     st.markdown(
-        "1. **Upload your data** to build the case and preview the performance of our model.\n"
-        "2. **Run Model** to compute metrics and plots.\n"
-        "3. **Proceed to Prediction** to validate on a new dataset and export results."
     )
-    if st.button("Start", type="primary"): ss.app_step = "dev"; st.rerun()
-# =========================
-# DEVELOPMENT
-# =========================
-if ss.app_step == "dev":
-    # Sidebar controls
-    st.sidebar.header("Model Development Data")
-    dev_file = st.sidebar.file_uploader("Replace data (Excel)", type=["xlsx","xls"], key="dev_upload")
-    # Cache uploaded file into session (so preview doesn't clear it)
-    if dev_file is not None:
-        ss.dev_bytes = dev_file.getvalue()
-        try:
-            ss.dev_book = parse_excel_bytes(ss.dev_bytes)
-        except Exception as e:
-            st.sidebar.error(f"Failed to read workbook: {e}")
-            ss.dev_book = None
-            ss.dev_previewed = False
-            ss.dev_ran = False
-    # PREVIEW button (orange)
-    st.sidebar.markdown("<div id='preview-btn'>", unsafe_allow_html=True)
-    preview_click = st.sidebar.button("Preview data", use_container_width=True)
-    st.sidebar.markdown("</div>", unsafe_allow_html=True)
-    # RUN button (blue)
-    run_click = st.sidebar.button("Run Model", use_container_width=True)
-    # Proceed button (green; enabled after run)
-    st.sidebar.markdown("<div id='proceed-btn'>", unsafe_allow_html=True)
-    proceed_click = st.sidebar.button(
-        "Proceed to Prediction ▶",
-        use_container_width=True,
-        disabled=not ss.dev_ran
-    )
-    st.sidebar.markdown("</div>", unsafe_allow_html=True)
-    if proceed_click and ss.dev_ran:
-        ss.app_step = "predict"
         st.rerun()
-    # Section heading
     st.subheader("Model Development")
-    # Helper message (sticks here always)
-    helper = st.empty()
-    if ss.dev_book is None:
-        helper.markdown("<div class='helper-note'>Upload your data to build the case and preview the dataset.</div>", unsafe_allow_html=True)
-    elif not ss.dev_previewed:
-        helper.markdown("<div class='helper-note'>Data loaded ✓ — click <b>Preview data</b> to review tracks and summary.</div>", unsafe_allow_html=True)
     elif ss.dev_previewed and not ss.dev_ran:
-        helper.markdown("<div class='helper-note'>Previewed ✓ — now click <b>Run Model</b> to build the case.</div>", unsafe_allow_html=True)
-    else:
-        helper.markdown("<div class='helper-note'>Case built ✓ — results are displayed below.</div>", unsafe_allow_html=True)
-    # ----------------- Preview modal -----------------
-    def preview_modal(book: dict, feature_cols: list[str]):
-        if not book: return
-        with st.expander("▼ Preview (tracks & summary)", expanded=True):
-            # Choose a sheet to preview
-            sheetnames = list(book.keys())
-            sh = st.selectbox("Sheet", options=sheetnames, index=0, key="preview_sheet_sel")
-            df = book[sh].copy()
-            # Tracks tab + Stats tab
-            t1, t2 = st.tabs(["Tracks", "Summary"])
-            with t1:
-                fig = make_index_tracks_plotly(df, feature_cols)
-                st.plotly_chart(fig, use_container_width=True, config={"displayModeBar": False})
-            with t2:
-                stats = df[feature_cols].describe().T[["min", "max", "mean", "std"]].rename(
-                    columns={"min":"Min", "max":"Max", "mean":"Mean", "std":"Std"}
-                )
-                st.dataframe(stats, use_container_width=True)
-    # If preview clicked and we have data
-    if preview_click:
-        if ss.dev_book:
-            preview_modal(ss.dev_book, FEATURES)
-            ss.dev_previewed = True
-            ss.dev_ran = False
-            st.rerun()
         else:
-            st.warning("Please upload an Excel file first.")
-    # If run clicked and we have data
-    if run_click:
-        if not ss.dev_book:
-            st.warning("Please upload and preview your data first.")
         else:
-            # Try to find common sheet names
-            names = list(ss.dev_book.keys())
-            def find_sheet(book, alts):
-                lo = {k.lower(): k for k in book.keys()}
-                for nm in alts:
-                    if nm.lower() in lo: return lo[nm.lower()]
-                return None
-            sh_train = find_sheet(ss.dev_book, ["Train","Training","training2","train","training"]) or names[0]
-            sh_test  = find_sheet(ss.dev_book, ["Test","Testing","testing2","test","testing"]) or (names[1] if len(names)>1 else names[0])
-            ss.dev_sheet_train, ss.dev_sheet_test = sh_train, sh_test
-            df_tr = ss.dev_book[sh_train].copy()
-            df_te = ss.dev_book[sh_test].copy()
-            ok = ensure_required_columns(df_tr, FEATURES+[TARGET]) and ensure_required_columns(df_te, FEATURES+[TARGET])
-            if ok:
                 df_tr["UCS_Pred"] = model.predict(df_tr[FEATURES])
                 df_te["UCS_Pred"] = model.predict(df_te[FEATURES])
-                from sklearn.metrics import r2_score, mean_absolute_error
                 ss.results["Train"] = df_tr
                 ss.results["Test"]  = df_te
                 ss.results["metrics_train"] = {
                     "R2": r2_score(df_tr[TARGET], df_tr["UCS_Pred"]),
                     "RMSE": rmse(df_tr[TARGET], df_tr["UCS_Pred"]),
-                    "MAE": mean_absolute_error(df_tr[TARGET], df_tr["UCS_Pred"]),
                 }
                 ss.results["metrics_test"]  = {
                     "R2": r2_score(df_te[TARGET], df_te["UCS_Pred"]),
                     "RMSE": rmse(df_te[TARGET], df_te["UCS_Pred"]),
-                    "MAE": mean_absolute_error(df_te[TARGET], df_te["UCS_Pred"]),
                 }
                 tr_min = df_tr[FEATURES].min().to_dict()
                 tr_max = df_tr[FEATURES].max().to_dict()
                 ss.train_ranges = {f:(float(tr_min[f]), float(tr_max[f])) for f in FEATURES}
                 ss.dev_ran = True
-                helper.markdown("<div class='helper-note'>Case built ✓ — results are displayed below.</div>", unsafe_allow_html=True)
-            else:
-                ss.dev_ran = False
-    # Show results if available
-    if ss.dev_ran and ("Train" in ss.results or "Test" in ss.results):
-        ttr, tte = st.tabs(["Training", "Testing"])
-        if "Train" in ss.results:
-            with ttr:
-                m = ss.results["metrics_train"]
-                c1,c2,c3 = st.columns([1,1,1])
                 c1.metric("R²", f"{m['R2']:.4f}")
                 c2.metric("RMSE", f"{m['RMSE']:.4f}")
                 c3.metric("MAE", f"{m['MAE']:.4f}")
-                l, r = st.columns([0.55, 0.45])
-                with l:
-                    st.plotly_chart(cross_plotly(ss.results["Train"][TARGET], ss.results["Train"]["UCS_Pred"]),
-                                    use_container_width=True, config={"displayModeBar": False})
-                with r:
-                    st.plotly_chart(track_plotly(ss.results["Train"], include_actual=True),
-                                    use_container_width=True, config={"displayModeBar": False})
-        if "Test" in ss.results:
-            with tte:
-                m = ss.results["metrics_test"]
-                c1,c2,c3 = st.columns([1,1,1])
                 c1.metric("R²", f"{m['R2']:.4f}")
                 c2.metric("RMSE", f"{m['RMSE']:.4f}")
                 c3.metric("MAE", f"{m['MAE']:.4f}")
-                l, r = st.columns([0.55, 0.45])
-                with l:
-                    st.plotly_chart(cross_plotly(ss.results["Test"][TARGET], ss.results["Test"]["UCS_Pred"]),
-                                    use_container_width=True, config={"displayModeBar": False})
-                with r:
-                    st.plotly_chart(track_plotly(ss.results["Test"], include_actual=True),
-                                    use_container_width=True, config={"displayModeBar": False})
-# =========================
-# PREDICTION
-# =========================
-if ss.app_step == "predict":
-    st.sidebar.header("Prediction (Validation)")
-    val_file = st.sidebar.file_uploader("Upload Validation Excel", type=["xlsx","xls"], key="val_upload")
-    predict_click = st.sidebar.button("Predict", use_container_width=True)
-    back_click = st.sidebar.button("⬅ Back", use_container_width=True)
-    if back_click:
-        ss.app_step = "dev"; st.rerun()
-    st.subheader("Prediction")
-    st.markdown("Upload a new dataset to generate UCS predictions and evaluate performance on unseen data.")
-    st.success("Predictions ready ✓" if "Validate" in ss.results else "Waiting for input…")
-    if predict_click and val_file is not None:
-        try:
-            vbook = parse_excel_bytes(val_file.getvalue())
-        except Exception as e:
-            st.error(f"Could not read the Validation Excel: {e}")
-            vbook = {}
-        if vbook:
-            # Pick first sheet by default
-            vname = list(vbook.keys())[0]
-            df_val = vbook[vname].copy()
-            if ensure_required_columns(df_val, FEATURES):
-                df_val["UCS_Pred"] = model.predict(df_val[FEATURES])
-                ss.results["Validate"] = df_val
-                # Out-of-range check vs training ranges
-                ranges = ss.train_ranges; oor_table = None; oor_pct = 0.0
-                if ranges:
-                    viol = {f: (df_val[f] < ranges[f][0]) | (df_val[f] > ranges[f][1]) for f in FEATURES}
-                    any_viol = pd.DataFrame(viol).any(axis=1); oor_pct = float(any_viol.mean()*100.0)
-                    if any_viol.any():
-                        offenders = df_val.loc[any_viol, FEATURES].copy()
-                        offenders["Violations"] = pd.DataFrame(viol).loc[any_viol].apply(
-                            lambda r: ", ".join([c for c,v in r.items() if v]), axis=1)
-                        offenders.index = offenders.index + 1; oor_table = offenders
-                from sklearn.metrics import r2_score, mean_absolute_error
-                metrics_val = None
-                if TARGET in df_val.columns:
-                    metrics_val = {
-                        "R2": r2_score(df_val[TARGET], df_val["UCS_Pred"]),
-                        "RMSE": rmse(df_val[TARGET], df_val["UCS_Pred"]),
-                        "MAE": mean_absolute_error(df_val[TARGET], df_val["UCS_Pred"]),
-                    }
-                ss.results["metrics_val"] = metrics_val
-                ss.results["summary_val"] = {
-                    "n_points": len(df_val),
-                    "pred_min": float(df_val["UCS_Pred"].min()),
-                    "pred_max": float(df_val["UCS_Pred"].max()),
-                    "oor_pct": oor_pct
-                }
-                ss.results["oor_table"] = oor_table
-                st.experimental_rerun()
-    # Show prediction results
-    if "Validate" in ss.results:
         sv = ss.results["summary_val"]; oor_table = ss.results.get("oor_table")
         c1,c2,c3,c4 = st.columns(4)
-        c1.metric("# points", f"{sv['n_points']}")
-        c2.metric("Pred min", f"{sv['pred_min']:.2f}")
-        c3.metric("Pred max", f"{sv['pred_max']:.2f}")
-        c4.metric("OOR %", f"{sv['oor_pct']:.1f}%")
-        if sv["oor_pct"] > 0:
-            st.warning("Some validation rows contain inputs outside the Training min–max ranges. Review the table below.")
-        left, right = st.columns([0.55, 0.45])
         with left:
             if TARGET in ss.results["Validate"].columns:
                 st.plotly_chart(
-                    cross_plotly(ss.results["Validate"][TARGET], ss.results["Validate"]["UCS_Pred"]),
-                    use_container_width=True, config={"displayModeBar": False}
                 )
             else:
-                st.info("Actual UCS values are not available in the validation data. Cross-plot cannot be generated.")
         with right:
             st.plotly_chart(
-                track_plotly(ss.results["Validate"], include_actual=(TARGET in ss.results["Validate"].columns)),
-                use_container_width=True, config={"displayModeBar": False}
             )
         if oor_table is not None:
-            st.write("*Out-of-range rows (vs. Training min–max):*")
             st.dataframe(oor_table, use_container_width=True)
-        # Export
-        def export_workbook(sheets_dict, summary_df=None):
-            try:
-                import openpyxl
-            except Exception:
-                raise RuntimeError("Export requires openpyxl. Please add it to requirements.txt.")
-            buf = io.BytesIO()
-            with pd.ExcelWriter(buf, engine="openpyxl") as xw:
-                for name, frame in sheets_dict.items():
-                    frame.to_excel(xw, sheet_name=name[:31], index=False)
-                if summary_df is not None:
-                    summary_df.to_excel(xw, sheet_name="Summary", index=False)
-            return buf.getvalue()
-        st.markdown("---")
-        sheets_to_save = {"Validate_with_pred": ss.results["Validate"]}
-        rows = []
-        for name, key in [("Train","metrics_train"), ("Test","metrics_test"), ("Validate","metrics_val")]:
-            m = ss.results.get(key)
-            if m: rows.append({"Split": name, **{k: round(v,6) for k,v in m.items()}})
-        summary_df = pd.DataFrame(rows) if rows else None
-        try:
-            data_bytes = export_workbook(sheets_to_save, summary_df)
-            st.download_button("Export Validation Results to Excel",
-                data=data_bytes, file_name="UCS_Validation_Results.xlsx",
-                mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
-        except RuntimeError as e:
-            st.warning(str(e))
-# =========================
 # Footer
-# =========================
 st.markdown("---")
 st.markdown(
     "<div style='text-align:center; color:#6b7280;'>"

+import io, json, os, base64
 from pathlib import Path
 import streamlit as st
+import pandas as pd
+import numpy as np
 import joblib
+# --- Plotly (interactive) ---
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
+# =========================================================
+# Defaults (overridden by models/meta.json or model.feature_names_in_)
+# =========================================================
 FEATURES = ["Q, gpm", "SPP(psi)", "T (kft.lbf)", "WOB (klbf)", "ROP (ft/h)"]
 TARGET = "UCS"
 MODELS_DIR = Path("models")
 DEFAULT_MODEL = MODELS_DIR / "ucs_rf.joblib"
 MODEL_FALLBACKS = [MODELS_DIR / "model.joblib", MODELS_DIR / "model.pkl"]
+# =========================================================
+# Page / Theme + CSS
+# =========================================================
 st.set_page_config(page_title="ST_GeoMech_UCS", page_icon="logo.png", layout="wide")
+st.markdown(
+    """
+    <style>
+    /* App + sidebar background */
+    .stApp { background: #FFFFFF; }
+    section[data-testid="stSidebar"] { background: #F6F9FC; }
+    /* Tighten top spacing */
+    [data-testid="stBlock"]{ margin-top: 0 !important; }
+    /* Hero row */
+    .st-hero { display:flex; align-items:center; gap:16px; padding-top: 6px; }
+    .st-hero .brand { width:90px; height:90px; object-fit:contain; }
+    .st-hero h1 { margin:0; line-height:1.05; }
+    .st-hero .tagline { margin:2px 0 0 2px; color:#6b7280; font-size:1.05rem; font-style:italic; }
+    /* Sidebar button palette (order-based within the Sidebar section)
+       1) Preview (orange)  2) Run (blue)  3) Proceed (green)
+       We scope to the sidebar and to stButton blocks only. */
+    section[data-testid="stSidebar"] div.stButton > button {
+        font-weight:700; border-radius:10px; border:none; padding:10px 20px;
+    }
+    section[data-testid="stSidebar"] div.stButton:nth-of-type(1) > button { /* Preview */
+        background:#f59e0b; color:#fff;
+    }
+    section[data-testid="stSidebar"] div.stButton:nth-of-type(2) > button { /* Run (blue) */
+        background:#2563eb; color:#fff;
+    }
+    section[data-testid="stSidebar"] div.stButton:nth-of-type(3) > button { /* Proceed (green) */
+        background:#10b981; color:#fff;
+    }
+    section[data-testid="stSidebar"] div.stButton:nth-of-type(3) > button:disabled {
+        background:#a7f3d0 !important; color:#064e3b !important; opacity:.7 !important;
+    }
+    /* Modal tabs spacing */
+    .stTabs [data-baseweb="tab-list"] { gap: 6px; }
+    </style>
+    """,
+    unsafe_allow_html=True
+)
+# =========================================================
+# Helpers
+# =========================================================
 def inline_logo(path="logo.png") -> str:
     try:
         p = Path(path)
     except Exception:
         return ""
+def rmse(y_true, y_pred): return float(np.sqrt(mean_squared_error(y_true, y_pred)))
+@st.cache_resource(show_spinner=False)
+def load_model(model_path: str):
+    return joblib.load(model_path)
 @st.cache_data(show_spinner=False)
+def parse_excel(data_bytes: bytes):
     bio = io.BytesIO(data_bytes)
     xl = pd.ExcelFile(bio)
     return {sh: xl.parse(sh) for sh in xl.sheet_names}
+def ensure_cols(df, cols):
     miss = [c for c in cols if c not in df.columns]
     if miss:
         st.error(f"Missing columns: {miss}\nFound: {list(df.columns)}")
         return False
     return True
 def infer_features_from_model(m):
     try:
         if hasattr(m, "feature_names_in_") and len(getattr(m, "feature_names_in_")):
             return [str(x) for x in m.feature_names_in_]
     except Exception: pass
     return None
+def export_workbook(sheets_dict, summary_df=None):
+    try: import openpyxl  # ensure engine is available
+    except Exception:
+        raise RuntimeError("Export requires openpyxl. Please add it to requirements.txt.")
+    buf = io.BytesIO()
+    with pd.ExcelWriter(buf, engine="openpyxl") as xw:
+        for name, frame in sheets_dict.items():
+            frame.to_excel(xw, sheet_name=name[:31], index=False)
+        if summary_df is not None:
+            summary_df.to_excel(xw, sheet_name="Summary", index=False)
+    return buf.getvalue()
+# -------------------- Plotly styling blocks --------------------
+AXES_STYLE = dict(
+    showline=True, linewidth=1.4, linecolor="#444",
+    mirror=True, ticks="outside", ticklen=4, tickwidth=1,
+    showgrid=True, gridcolor="rgba(0,0,0,0.08)"
+)
+FONT = dict(color="#111", size=13)
+def style_layout(fig, width=None, height=None, margins=(12,18,36,12)):
+    t, r, b, l = margins
+    fig.update_layout(
+        margin=dict(t=t, r=r, b=b, l=l),
+        paper_bgcolor="white",
+        plot_bgcolor="white",
+        font=FONT,
+        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
+    )
+    if width: fig.update_layout(width=width)
+    if height: fig.update_layout(height=height)
+    # Apply to all axes
+    fig.update_xaxes(**AXES_STYLE, title_font=dict(size=14, color="#111"))
+    fig.update_yaxes(**AXES_STYLE, title_font=dict(size=14, color="#111"))
+    return fig
+def make_cross_plotly(A, P, height=440, width=640):
+    a = pd.Series(A).astype(float)
+    p = pd.Series(P).astype(float)
+    lo = float(np.nanmin([a.min(), p.min()]))
+    hi = float(np.nanmax([a.max(), p.max()]))
+    fig = go.Figure()
+    fig.add_trace(go.Scatter(
+        x=a, y=p, mode="markers", name="Points", marker=dict(size=6)
+    ))
+    fig.add_trace(go.Scatter(
+        x=[lo, hi], y=[lo, hi], mode="lines", name="1:1",
+        line=dict(color="#666", width=2, dash="dash")
+    ))
+    fig.update_xaxes(range=[lo, hi], title="Actual UCS")
+    fig.update_yaxes(range=[lo, hi], title="Predicted UCS", scaleanchor="x", scaleratio=1)
+    style_layout(fig, width=width, height=height, margins=(8,10,36,50))
+    return fig
+def make_depth_track_plotly(df, include_actual=True, height=640, width=360):
+    idx = np.arange(1, len(df) + 1)
+    fig = go.Figure()
+    # Predicted (solid blue)
+    fig.add_trace(go.Scatter(
+        x=df["UCS_Pred"], y=idx, mode="lines", name="UCS_Pred",
+        line=dict(color="#1f77b4", width=2)
+    ))
+    # Actual (dotted yellow)
+    if include_actual and TARGET in df.columns:
+        fig.add_trace(go.Scatter(
+            x=df[TARGET], y=idx, mode="lines", name="UCS (actual)",
+            line=dict(color="#f2b01e", width=2, dash="dot")
+        ))
+    fig.update_yaxes(autorange="reversed", title="Point Index")
+    fig.update_xaxes(title="UCS")
+    style_layout(fig, width=width, height=height, margins=(8,12,36,60))
+    return fig
+def make_index_tracks_plotly(df, feature_cols, height=640, width=980):
+    n = len(feature_cols)
+    fig = make_subplots(rows=1, cols=n, shared_yaxes=True, horizontal_spacing=0.05)
+    idx = np.arange(1, len(df) + 1)
+    for i, col in enumerate(feature_cols, start=1):
+        fig.add_trace(
+            go.Scatter(x=df[col], y=idx, mode="lines", line=dict(color="#444", width=1.2), name=col, showlegend=False),
+            row=1, col=i
+        )
+        fig.update_xaxes(title=col, row=1, col=i)
+    fig.update_yaxes(autorange="reversed", title="Point Index", row=1, col=1)
+    style_layout(fig, width=width, height=height, margins=(6,8,36,60))
+    return fig
+# =========================================================
+# Model availability (cloud-safe)
+# =========================================================
+def _get_model_url():
+    try:
+        return (st.secrets.get("MODEL_URL", "") or os.environ.get("MODEL_URL", "") or "").strip()
+    except Exception:
+        return (os.environ.get("MODEL_URL", "") or "").strip()
 def ensure_model_present() -> Path | None:
+    # local candidates
     for p in [DEFAULT_MODEL, *MODEL_FALLBACKS]:
+        if p.exists():
+            return p
+    # cloud download
+    MODEL_URL = _get_model_url()
     if MODEL_URL:
         try:
             import requests
             DEFAULT_MODEL.parent.mkdir(parents=True, exist_ok=True)
             with requests.get(MODEL_URL, stream=True) as r:
                 r.raise_for_status()
                         f.write(chunk)
             return DEFAULT_MODEL
         except Exception as e:
+            st.error(f"Failed to download model from MODEL_URL: {e}")
     return None
 model_path = ensure_model_present()
 if not model_path:
+    st.error("Model not found. Upload models/ucs_rf.joblib (or set MODEL_URL).")
     st.stop()
+# Load model
 try:
     model = load_model(str(model_path))
 except Exception as e:
     st.error(f"Failed to load model: {model_path}\n{e}")
     st.stop()
+# Meta overrides
 meta_path = MODELS_DIR / "meta.json"
 if meta_path.exists():
     try:
     except Exception:
         pass
 else:
+    infer = infer_features_from_model(model)
+    if infer: FEATURES = infer
+# =========================================================
 # Session state defaults
+# =========================================================
 ss = st.session_state
+ss.setdefault("app_step", "intro")   # ← we start on Intro
+ss.setdefault("dev_bytes", None)
+ss.setdefault("dev_book", None)
 ss.setdefault("dev_previewed", False)
 ss.setdefault("dev_ran", False)
 ss.setdefault("results", {})
 ss.setdefault("train_ranges", None)
+ss.setdefault("val_bytes", None)
+ss.setdefault("val_book", None)
+# =========================================================
+# HERO (logo + title)
+# =========================================================
 st.markdown(
     f"""
     <div class="st-hero">
     unsafe_allow_html=True,
 )
+# =========================================================
+# INTRO PAGE
+# =========================================================
 if ss.app_step == "intro":
     st.header("Welcome!")
     st.markdown(
+        "1. **Upload your data to build the case** and preview the performance of our model.  \n"
+        "2. Click **Run Model** to compute metrics, cross-plots, and the index track.  \n"
+        "3. Click **Proceed to Prediction** to validate on a new dataset."
     )
+    if st.button("Start", type="primary"):
+        ss.app_step = "dev"
         st.rerun()
+# =========================================================
+# Helper banner (stays at top of Development page)
+# =========================================================
+def render_dev_helper():
     st.subheader("Model Development")
+    if not ss.dev_bytes:
+        st.info("Upload your data to build the case and preview the performance of our model.")
+    elif ss.dev_bytes and not ss.dev_previewed and not ss.dev_ran:
+        st.info("File loaded — click **Preview data**.")
     elif ss.dev_previewed and not ss.dev_ran:
+        st.info("Previewed ✓ — now click **Run Model** to build the case.")
+    elif ss.dev_ran:
+        st.success("Case built ✓ — results are displayed below. You can now **Proceed to Prediction**.")
+# =========================================================
+# PREVIEW MODAL
+# =========================================================
+def preview_modal_dev(book, feature_cols):
+    sh_train = None
+    sh_test  = None
+    # try common names
+    low2orig = {k.lower(): k for k in book.keys()}
+    for nm in ["train","training","training2"]:
+        if nm in low2orig: sh_train = low2orig[nm]; break
+    for nm in ["test","testing","testing2"]:
+        if nm in low2orig: sh_test  = low2orig[nm]; break
+    tabs = st.tabs(["Tracks", "Summary"])
+    with tabs[0]:
+        # prefer Train if available; else first sheet
+        pick = sh_train or list(book.keys())[0]
+        df = book[pick]
+        # only numeric columns needed for plotting
+        ok_cols = [c for c in feature_cols if c in df.columns]
+        if not ok_cols:
+            st.warning("No matching feature columns found for plotting.")
         else:
+            fig = make_index_tracks_plotly(df, ok_cols, height=640, width=1000)
+            st.plotly_chart(fig, use_container_width=True, theme=None)
+    with tabs[1]:
+        pick = sh_train or list(book.keys())[0]
+        df = book[pick]
+        st.dataframe(
+            df.describe().T.rename(columns={
+                "mean":"Mean","std":"Std","min":"Min","max":"Max"
+            })[["Min","Max","Mean","Std"]].round(4),
+            use_container_width=True
+        )
+# =========================================================
+# DEVELOPMENT PAGE
+# =========================================================
+if ss.app_step == "dev":
+    render_dev_helper()
+    with st.sidebar:
+        st.header("Model Development Data")
+        def _on_dev_upload():
+            file = st.session_state.get("dev_upload")
+            if file is not None:
+                ss.dev_bytes = file.getvalue()
+                ss.dev_book  = parse_excel(ss.dev_bytes)
+                ss.dev_previewed = False
+                ss.dev_ran = False
+        st.file_uploader("Replace data (Excel)", type=["xlsx","xls"], key="dev_upload",
+                         on_change=_on_dev_upload, help="Limit 200MB per file • XLSX, XLS")
+        if ss.dev_bytes and ss.dev_book:
+            # Small status line under upload
+            any_sheet = next(iter(ss.dev_book.values()))
+            st.caption(f"Data loaded: {getattr(st.session_state.get('dev_upload'), 'name', 'file')} • "
+                       f"{any_sheet.shape[0]} rows × {any_sheet.shape[1]} cols")
+        preview_clicked = st.button("Preview data", disabled=not bool(ss.dev_book))
+        run_clicked = st.button("Run Model", disabled=not bool(ss.dev_book))
+        proceed_clicked = st.button("Proceed to Prediction ▶", disabled=not ss.get("dev_ran", False))
+    # Modal preview (does NOT clear the uploaded file)
+    if preview_clicked and ss.dev_book:
+        with st.modal("Preview data"):
+            st.write("Use the tabs below to inspect the uploaded data before running the model.")
+            preview_modal_dev(ss.dev_book, FEATURES)
+            if st.button("Close", type="primary"):
+                ss.dev_previewed = True
+                st.rerun()
+    # Run model
+    if run_clicked and ss.dev_book:
+        # pick sheets
+        book = ss.dev_book
+        low2orig = {k.lower(): k for k in book.keys()}
+        sh_train = None; sh_test=None
+        for nm in ["train","training","training2"]:
+            if nm in low2orig: sh_train = low2orig[nm]; break
+        for nm in ["test","testing","testing2"]:
+            if nm in low2orig: sh_test  = low2orig[nm]; break
+        if sh_train is None or sh_test is None:
+            st.error("Workbook must include sheets named *Train/Training* and *Test/Testing* (any one of those).")
         else:
+            df_tr = book[sh_train].copy()
+            df_te = book[sh_test].copy()
+            if ensure_cols(df_tr, FEATURES+[TARGET]) and ensure_cols(df_te, FEATURES+[TARGET]):
+                # predict
                 df_tr["UCS_Pred"] = model.predict(df_tr[FEATURES])
                 df_te["UCS_Pred"] = model.predict(df_te[FEATURES])
                 ss.results["Train"] = df_tr
                 ss.results["Test"]  = df_te
                 ss.results["metrics_train"] = {
                     "R2": r2_score(df_tr[TARGET], df_tr["UCS_Pred"]),
                     "RMSE": rmse(df_tr[TARGET], df_tr["UCS_Pred"]),
+                    "MAE": mean_absolute_error(df_tr[TARGET], df_tr["UCS_Pred"])
                 }
                 ss.results["metrics_test"]  = {
                     "R2": r2_score(df_te[TARGET], df_te["UCS_Pred"]),
                     "RMSE": rmse(df_te[TARGET], df_te["UCS_Pred"]),
+                    "MAE": mean_absolute_error(df_te[TARGET], df_te["UCS_Pred"])
                 }
                 tr_min = df_tr[FEATURES].min().to_dict()
                 tr_max = df_tr[FEATURES].max().to_dict()
                 ss.train_ranges = {f:(float(tr_min[f]), float(tr_max[f])) for f in FEATURES}
                 ss.dev_ran = True
+                st.rerun()
+    # Results (if available)
+    if ss.results.get("Train") is not None or ss.results.get("Test") is not None:
+        tab1, tab2 = st.tabs(["Training", "Testing"])
+        if ss.results.get("Train") is not None:
+            with tab1:
+                df = ss.results["Train"]; m = ss.results["metrics_train"]
+                c1,c2,c3 = st.columns(3)
                 c1.metric("R²", f"{m['R2']:.4f}")
                 c2.metric("RMSE", f"{m['RMSE']:.4f}")
                 c3.metric("MAE", f"{m['MAE']:.4f}")
+                left, right = st.columns([0.58, 0.42])
+                with left:
+                    st.plotly_chart(make_cross_plotly(df[TARGET], df["UCS_Pred"], height=440, width=640),
+                                    use_container_width=True, theme=None)
+                with right:
+                    st.plotly_chart(make_depth_track_plotly(df, include_actual=True, height=640, width=360),
+                                    use_container_width=True, theme=None)
+        if ss.results.get("Test") is not None:
+            with tab2:
+                df = ss.results["Test"]; m = ss.results["metrics_test"]
+                c1,c2,c3 = st.columns(3)
                 c1.metric("R²", f"{m['R2']:.4f}")
                 c2.metric("RMSE", f"{m['RMSE']:.4f}")
                 c3.metric("MAE", f"{m['MAE']:.4f}")
+                left, right = st.columns([0.58, 0.42])
+                with left:
+                    st.plotly_chart(make_cross_plotly(df[TARGET], df["UCS_Pred"], height=440, width=640),
+                                    use_container_width=True, theme=None)
+                with right:
+                    st.plotly_chart(make_depth_track_plotly(df, include_actual=True, height=640, width=360),
+                                    use_container_width=True, theme=None)
+# =========================================================
+# PREDICTION PAGE
+# =========================================================
+if ss.app_step == "dev" and st.sidebar.button("→ Open Prediction in main area", key="force_pred"):
+    ss.app_step = "predict"; st.rerun()
+if ss.app_step == "predict":
+    st.subheader("Prediction")
+    st.write("Upload a new dataset to generate UCS predictions and evaluate performance on unseen data.")
+    with st.sidebar:
+        st.header("Prediction (Validation)")
+        def _on_val_upload():
+            file = st.session_state.get("val_upload")
+            if file is not None:
+                ss.val_bytes = file.getvalue()
+                ss.val_book  = parse_excel(ss.val_bytes)
+        st.file_uploader("Upload Validation Excel", type=["xlsx","xls"], key="val_upload", on_change=_on_val_upload)
+        predict_clicked = st.button("Predict", type="primary", use_container_width=True)
+        st.button("⬅ Back", on_click=lambda: ss.update(app_step="dev"))
+    if predict_clicked and ss.val_book:
+        vname = list(ss.val_book.keys())[0]
+        df_val = ss.val_book[vname].copy()
+        if not ensure_cols(df_val, FEATURES):
+            st.stop()
+        df_val["UCS_Pred"] = model.predict(df_val[FEATURES])
+        ss.results["Validate"] = df_val
+        ranges = ss.train_ranges
+        oor_table = None; oor_pct = 0.0
+        if ranges:
+            viol = {f: (df_val[f] < ranges[f][0]) | (df_val[f] > ranges[f][1]) for f in FEATURES}
+            any_viol = pd.DataFrame(viol).any(axis=1); oor_pct = float(any_viol.mean()*100.0)
+            if any_viol.any():
+                offenders = df_val.loc[any_viol, FEATURES].copy()
+                offenders["Violations"] = pd.DataFrame(viol).loc[any_viol].apply(
+                    lambda r: ", ".join([c for c,v in r.items() if v]), axis=1)
+                offenders.index = offenders.index + 1; oor_table = offenders
+        metrics_val = None
+        if TARGET in df_val.columns:
+            metrics_val = {
+                "R2": r2_score(df_val[TARGET], df_val["UCS_Pred"]),
+                "RMSE": rmse(df_val[TARGET], df_val["UCS_Pred"]),
+                "MAE": mean_absolute_error(df_val[TARGET], df_val["UCS_Pred"])
+            }
+        ss.results["metrics_val"] = metrics_val
+        ss.results["summary_val"] = {
+            "n_points": len(df_val),
+            "pred_min": float(df_val["UCS_Pred"].min()),
+            "pred_max": float(df_val["UCS_Pred"].max()),
+            "oor_pct": oor_pct
+        }
+        ss.results["oor_table"] = oor_table
+        st.experimental_rerun()
+    if ss.results.get("Validate") is not None:
+        st.subheader("Validation Results")
         sv = ss.results["summary_val"]; oor_table = ss.results.get("oor_table")
         c1,c2,c3,c4 = st.columns(4)
+        c1.metric("# points", f"{sv['n_points']}"); c2.metric("Pred min", f"{sv['pred_min']:.2f}")
+        c3.metric("Pred max", f"{sv['pred_max']:.2f}"); c4.metric("OOR %", f"{sv['oor_pct']:.1f}%")
+        left,right = st.columns([0.58, 0.42])
         with left:
             if TARGET in ss.results["Validate"].columns:
                 st.plotly_chart(
+                    make_cross_plotly(ss.results["Validate"][TARGET], ss.results["Validate"]["UCS_Pred"], height=440, width=640),
+                    use_container_width=True, theme=None
                 )
             else:
+                st.info("Actual UCS values are not available in the validation data.")
         with right:
             st.plotly_chart(
+                make_depth_track_plotly(ss.results["Validate"], include_actual=(TARGET in ss.results["Validate"].columns),
+                                        height=640, width=360),
+                use_container_width=True, theme=None
             )
         if oor_table is not None:
+            st.warning("Some validation rows contain inputs **outside** the training min–max. Review the table below.")
             st.dataframe(oor_table, use_container_width=True)
+# =========================================================
 # Footer
+# =========================================================
 st.markdown("---")
 st.markdown(
     "<div style='text-align:center; color:#6b7280;'>"