UCS2014 commited on
Commit
82f0ae2
·
verified ·
1 Parent(s): 3da05e1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +698 -240
app.py CHANGED
@@ -1,21 +1,19 @@
1
- # app.py — ST_GeoMech_SMW (final, simplified & robust)
2
-
3
  import io, json, os, base64, math
4
  from pathlib import Path
5
- from datetime import datetime
6
-
7
  import streamlit as st
8
  import pandas as pd
9
  import numpy as np
10
  import joblib
 
11
 
12
- # plotting
13
  import matplotlib
14
  matplotlib.use("Agg")
15
  import matplotlib.pyplot as plt
16
  from matplotlib.ticker import FuncFormatter
17
- import plotly.graph_objects as go
18
 
 
19
  from sklearn.metrics import mean_squared_error, mean_absolute_error
20
 
21
  # =========================
@@ -24,15 +22,31 @@ from sklearn.metrics import mean_squared_error, mean_absolute_error
24
  APP_NAME = "ST_GeoMech_SMW"
25
  TAGLINE = "Real-Time Upper/Lower Mud Weight (MW) Limits For Safe Drilling"
26
 
27
- MODELS_DIR = Path("models")
28
- BO_MODEL_PATH = MODELS_DIR / "bo_model.joblib"
29
- BD_MODEL_PATH = MODELS_DIR / "bd_model.joblib"
30
- BO_META_PATH = MODELS_DIR / "bo_meta.json"
31
- BD_META_PATH = MODELS_DIR / "bd_meta.json"
 
 
 
 
32
 
 
 
33
  PRED_BO = "BO_Pred"
34
  PRED_BD = "BD_Pred"
35
- X_UNITS = "MW (pcf)" # your metas use pcf units
 
 
 
 
 
 
 
 
 
 
36
 
37
  COLORS = {
38
  "pred_bo": "#1f77b4", # blue
@@ -42,21 +56,23 @@ COLORS = {
42
  "ref": "#5a5a5a"
43
  }
44
 
 
 
45
  # Plot sizing
46
- CROSS_W = 360
47
- CROSS_H = 360
48
- TRACK_H = 900
49
- TRACK_W = 480
50
  FONT_SZ = 13
51
  BOLD_FONT = "Arial Black, Arial, sans-serif"
52
 
53
  # =========================
54
  # Page / CSS
55
  # =========================
56
- st.set_page_config(page_title=APP_NAME, page_icon="🛢️", layout="wide")
57
  st.markdown("""
58
  <style>
59
- .brand-logo { width: 180px; height: auto; object-fit: contain; }
60
  .centered-container { display:flex; flex-direction:column; align-items:center; text-align:center; }
61
  .st-message-box { background:#f0f2f6; color:#333; padding:10px; border-radius:10px; border:1px solid #e6e9ef; }
62
  .st-message-box.st-success { background:#d4edda; color:#155724; border-color:#c3e6cb; }
@@ -64,6 +80,12 @@ st.markdown("""
64
  .st-message-box.st-error { background:#f8d7da; color:#721c24; border-color:#f5c6cb; }
65
  .main .block-container { overflow: unset !important; }
66
  div[data-testid="stVerticalBlock"] { overflow: unset !important; }
 
 
 
 
 
 
67
  </style>
68
  """, unsafe_allow_html=True)
69
 
@@ -72,6 +94,49 @@ TABLE_CENTER_CSS = [
72
  dict(selector="td", props=[("text-align","center")]),
73
  ]
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  # =========================
76
  # Utilities
77
  # =========================
@@ -95,6 +160,9 @@ def parse_excel(data_bytes: bytes):
95
  xl = pd.ExcelFile(bio)
96
  return {sh: xl.parse(sh) for sh in xl.sheet_names}
97
 
 
 
 
98
  def _nice_tick0(xmin: float, step: float = 0.1) -> float:
99
  return step * math.floor(xmin / step) if np.isfinite(xmin) else xmin
100
 
@@ -118,35 +186,45 @@ def _make_X(df: pd.DataFrame, features: list[str]) -> pd.DataFrame:
118
 
119
  # ---- Column name normalization (aliases) ----
120
  def _build_alias_map(canonical_features: list[str], tgt_bo: str, tgt_bd: str) -> dict:
121
- # canonical list comes from metas or model.feature_names_in_
122
  def pick(expected_list, variants):
123
  for v in variants:
124
  if v in expected_list:
125
  return v
126
  return variants[0]
127
 
128
- can_Q = pick(canonical_features, ["Q, gpm", "Flow Rate (gpm)", "Flow Rate, gpm", "Flow Rate , gpm", "Flow Rate,gpm"])
129
- can_SPP = pick(canonical_features, ["SPP(psi)", "SPP (psi)"])
130
- can_TORQUE = pick(canonical_features, ["T (kft.lbf)", "Torque (kft.lbf)", "Torque(kft.lbf)", "TORQUE(kft.lbf)"])
131
  can_WOB = pick(canonical_features, ["WOB (klbf)", "WOB, klbf", "WOB(klbf)", "WOB( klbf)"])
 
 
 
132
  can_ROP = pick(canonical_features, ["ROP (ft/h)", "ROP(ft/h)"])
 
 
 
 
133
 
134
- alias = {
135
- # Flow
136
- "Q, gpm": can_Q, "Flow Rate (gpm)": can_Q, "Flow Rate, gpm": can_Q, "Flow Rate , gpm": can_Q, "Flow Rate,gpm": can_Q,
137
- # SPP
138
- "SPP(psi)": can_SPP, "SPP (psi)": can_SPP,
139
- # Torque
140
- "T (kft.lbf)": can_TORQUE, "Torque (kft.lbf)": can_TORQUE, "Torque(kft.lbf)": can_TORQUE, "TORQUE(kft.lbf)": can_TORQUE,
141
  # WOB
142
  "WOB (klbf)": can_WOB, "WOB, klbf": can_WOB, "WOB(klbf)": can_WOB, "WOB( klbf)": can_WOB,
 
 
 
 
 
 
143
  # ROP
144
  "ROP (ft/h)": can_ROP, "ROP(ft/h)": can_ROP,
145
- # Targets (accept common variants as well)
146
- "BO_Actual": tgt_bo, "Breakout MW": tgt_bo, "BOMW": tgt_bo, "BO MW": tgt_bo,
147
- "BD_Actual": tgt_bd, "Breakdown MW": tgt_bd, "BDMW": tgt_bd, "BD MW": tgt_bd,
 
 
 
 
148
  }
149
- return alias
150
 
151
  DEPTH_CANDIDATES = ["Depth", "Depth (ft)", "Depth, ft", "Depth(ft)", "DEPTH, ft"]
152
 
@@ -157,42 +235,244 @@ def _normalize_columns(df: pd.DataFrame, canonical_features: list[str], tgt_bo:
157
  actual = {k: v for k, v in alias.items() if k in out.columns and k != v}
158
  return out.rename(columns=actual)
159
 
160
- def _depth_series(df):
161
- depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
162
- if depth_col is not None:
163
- y = pd.to_numeric(df[depth_col], errors="coerce"); ylab = depth_col
164
- rng = [float(y.max()), float(y.min())] # reversed
165
- else:
166
- y = pd.Series(np.arange(1, len(df) + 1)); ylab = "Point Index"
167
- rng = [float(y.max()), float(y.min())]
168
- return y, ylab, rng
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
 
170
- def _x_range_for_tracks(df, cols):
171
- x_series = pd.concat([pd.to_numeric(df[c], errors="coerce") for c in cols if c in df], ignore_index=True)
172
- x_lo, x_hi = float(x_series.min()), float(x_series.max())
173
- pad = 0.03 * (x_hi - x_lo if x_hi > x_lo else 1.0)
174
- xmin, xmax = x_lo - pad, x_hi + pad
175
- tick0 = _nice_tick0(xmin, step=max((xmax - xmin)/10.0, 0.1))
176
- return xmin, xmax, tick0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
 
178
  # =========================
179
- # Plots
180
  # =========================
181
  def cross_plot_static(actual, pred, xlabel, ylabel, color="#1f77b4"):
182
  a = pd.Series(actual, dtype=float)
183
  p = pd.Series(pred, dtype=float)
 
184
  lo = float(min(a.min(), p.min()))
185
  hi = float(max(a.max(), p.max()))
186
  pad = 0.03 * (hi - lo if hi > lo else 1.0)
187
  lo2, hi2 = lo - pad, hi + pad
188
  ticks = np.linspace(lo2, hi2, 5)
 
189
  dpi = 110
190
  fig, ax = plt.subplots(figsize=(CROSS_W/dpi, CROSS_H/dpi), dpi=dpi, constrained_layout=False)
191
  ax.scatter(a, p, s=14, c=color, alpha=0.9, linewidths=0)
192
  ax.plot([lo2, hi2], [lo2, hi2], linestyle="--", linewidth=1.2, color=COLORS["ref"])
 
193
  ax.set_xlim(lo2, hi2); ax.set_ylim(lo2, hi2)
194
  ax.set_xticks(ticks); ax.set_yticks(ticks)
195
  ax.set_aspect("equal", adjustable="box")
 
196
  fmt = FuncFormatter(lambda x, _: f"{x:.2f}")
197
  ax.xaxis.set_major_formatter(fmt); ax.yaxis.set_major_formatter(fmt)
198
  ax.set_xlabel(xlabel, fontweight="bold", fontsize=10, color="black")
@@ -203,10 +483,32 @@ def cross_plot_static(actual, pred, xlabel, ylabel, color="#1f77b4"):
203
  fig.subplots_adjust(left=0.16, bottom=0.16, right=0.98, top=0.98)
204
  return fig
205
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  def track_plot_single(df, pred_col, actual_col=None, title_suffix=""):
207
  y, ylab, y_range = _depth_series(df)
208
  cols = [pred_col] + ([actual_col] if actual_col and actual_col in df.columns else [])
209
  xmin, xmax, tick0 = _x_range_for_tracks(df, cols)
 
210
  fig = go.Figure()
211
  if pred_col in df.columns:
212
  fig.add_trace(go.Scatter(
@@ -218,11 +520,13 @@ def track_plot_single(df, pred_col, actual_col=None, title_suffix=""):
218
  if actual_col and actual_col in df.columns:
219
  fig.add_trace(go.Scatter(
220
  x=df[actual_col], y=y, mode="lines",
221
- line=dict(color=COLORS["actual_bo"] if actual_col.lower().startswith("bo") else COLORS["actual_bd"],
222
  width=2.0, dash="dot"),
223
  name=f"{actual_col} (actual)",
224
  hovertemplate=f"{actual_col}: "+"%{x:.2f}<br>"+ylab+": %{y}<extra></extra>"
225
  ))
 
 
226
  fig.update_layout(
227
  height=TRACK_H, width=TRACK_W, autosize=False, paper_bgcolor="#fff", plot_bgcolor="#fff",
228
  margin=dict(l=64, r=16, t=36, b=48), hovermode="closest",
@@ -233,7 +537,7 @@ def track_plot_single(df, pred_col, actual_col=None, title_suffix=""):
233
  title=title_suffix
234
  )
235
  fig.update_xaxes(
236
- title_text=X_UNITS, title_font=dict(size=20, family=BOLD_FONT, color="#000"),
237
  tickfont=dict(size=15, family=BOLD_FONT, color="#000"), side="top",
238
  range=[xmin, xmax], ticks="outside", tickformat=",.2f", tickmode="auto", tick0=tick0,
239
  showline=True, linewidth=1.2, linecolor="#444", mirror=True,
@@ -247,23 +551,34 @@ def track_plot_single(df, pred_col, actual_col=None, title_suffix=""):
247
  )
248
  return fig
249
 
250
- def track_plot_combined(df, target_bo, target_bd):
 
251
  y, ylab, y_range = _depth_series(df)
252
- cols = [c for c in [PRED_BO, PRED_BD, target_bo, target_bd] if c in df]
253
  xmin, xmax, tick0 = _x_range_for_tracks(df, cols)
254
  fig = go.Figure()
 
255
  if PRED_BO in df.columns:
256
  fig.add_trace(go.Scatter(x=df[PRED_BO], y=y, mode="lines",
257
- line=dict(color=COLORS["pred_bo"], width=1.8), name=PRED_BO))
258
- if target_bo in df.columns:
259
- fig.add_trace(go.Scatter(x=df[target_bo], y=y, mode="lines",
260
- line=dict(color=COLORS["actual_bo"], width=2.0, dash="dot"), name=f"{target_bo} (actual)"))
 
 
 
 
261
  if PRED_BD in df.columns:
262
  fig.add_trace(go.Scatter(x=df[PRED_BD], y=y, mode="lines",
263
- line=dict(color=COLORS["pred_bd"], width=1.8), name=PRED_BD))
264
- if target_bd in df.columns:
265
- fig.add_trace(go.Scatter(x=df[target_bd], y=y, mode="lines",
266
- line=dict(color=COLORS["actual_bd"], width=2.0, dash="dot"), name=f"{target_bd} (actual)"))
 
 
 
 
 
267
  fig.update_layout(
268
  height=TRACK_H, width=TRACK_W, autosize=False, paper_bgcolor="#fff", plot_bgcolor="#fff",
269
  margin=dict(l=64, r=16, t=36, b=48), hovermode="closest",
@@ -272,15 +587,19 @@ def track_plot_combined(df, target_bo, target_bd):
272
  bgcolor="rgba(255,255,255,0.75)", bordercolor="#ccc", borderwidth=1),
273
  legend_title_text="", title="Combined (Breakout / Breakdown)"
274
  )
275
- fig.update_xaxes(title_text=X_UNITS, side="top", range=[xmin, xmax],
276
- ticks="outside", tickformat=",.2f", tickmode="auto", tick0=tick0,
 
 
277
  showline=True, linewidth=1.2, linecolor="#444", mirror=True,
278
  showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True)
279
- fig.update_yaxes(title_text=ylab, range=y_range, ticks="outside", showline=True,
280
- linewidth=1.2, linecolor="#444", mirror=True,
281
- showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True)
 
282
  return fig
283
 
 
284
  def preview_tracks(df: pd.DataFrame, cols: list[str]):
285
  cols = [c for c in cols if c in df.columns]
286
  n = len(cols)
@@ -288,11 +607,13 @@ def preview_tracks(df: pd.DataFrame, cols: list[str]):
288
  fig, ax = plt.subplots(figsize=(4, 2))
289
  ax.text(0.5, 0.5, "No selected columns", ha="center", va="center"); ax.axis("off")
290
  return fig
 
291
  depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
292
  if depth_col is not None:
293
  idx = pd.to_numeric(df[depth_col], errors="coerce"); y_label = depth_col
294
  else:
295
  idx = pd.Series(np.arange(1, len(df) + 1)); y_label = "Point Index"
 
296
  cmap = plt.get_cmap("tab20")
297
  col_colors = {col: cmap(i % cmap.N) for i, col in enumerate(cols)}
298
  fig, axes = plt.subplots(1, n, figsize=(2.3 * n, 7.0), sharey=True, dpi=100)
@@ -314,53 +635,112 @@ def preview_tracks(df: pd.DataFrame, cols: list[str]):
314
  def _ensure_file(p: Path) -> Path|None:
315
  return p if (p.exists() and p.stat().st_size > 0) else None
316
 
317
- bo_model_path = _ensure_file(BO_MODEL_PATH)
318
- bd_model_path = _ensure_file(BD_MODEL_PATH)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
319
  if not (bo_model_path and bd_model_path):
320
- st.error("Models not found. Place bo_model.joblib and bd_model.joblib in models/"); st.stop()
 
321
 
322
- @st.cache_resource(show_spinner=False)
323
- def _load_all():
324
- # models
325
  model_bo = load_model(str(bo_model_path))
326
  model_bd = load_model(str(bd_model_path))
327
- # metas
328
- def _load_meta(p: Path):
329
- if not p or not p.exists(): return {}
330
- try:
331
- return json.loads(p.read_text(encoding="utf-8"))
332
- except Exception:
333
- return {}
334
- meta_bo = _load_meta(BO_META_PATH)
335
- meta_bd = _load_meta(BD_META_PATH)
336
-
337
- # Features: prefer model.feature_names_in_; else from bo meta; else bd meta
338
- FEATURES = []
339
- if hasattr(model_bo, "feature_names_in_"):
340
- FEATURES = [str(x) for x in model_bo.feature_names_in_]
341
- if not FEATURES and meta_bo.get("features"):
342
- FEATURES = [str(x) for x in meta_bo["features"]]
343
- if not FEATURES and meta_bd.get("features"):
344
- FEATURES = [str(x) for x in meta_bd["features"]]
345
 
346
- # Targets from metas (your metas specify BO_Actual and BD_Actual)
347
- TARGET_BO = str(meta_bo.get("target", "BO_Actual"))
348
- TARGET_BD = str(meta_bd.get("target", "BD_Actual"))
349
 
350
- return model_bo, model_bd, meta_bo, meta_bd, FEATURES, TARGET_BO, TARGET_BD
 
 
 
 
 
351
 
352
- model_bo, model_bd, meta_bo, meta_bd, FEATURES, TARGET_BO, TARGET_BD = _load_all()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
353
 
354
- # Session constants
355
- st.session_state["FEATURES"] = FEATURES
356
- st.session_state["TARGET_BO"] = TARGET_BO
357
- st.session_state["TARGET_BD"] = TARGET_BD
 
 
 
 
 
 
 
358
 
359
  # =========================
360
- # App chrome
361
  # =========================
362
  st.sidebar.markdown(f"""
363
  <div class="centered-container">
 
364
  <div style='font-weight:800;font-size:1.2rem;'>{APP_NAME}</div>
365
  <div style='color:#667085;'>{TAGLINE}</div>
366
  </div>
@@ -387,13 +767,17 @@ def sticky_header(title, message):
387
  # =========================
388
  # INTRO
389
  # =========================
390
- if "app_step" not in st.session_state:
391
- st.session_state.app_step = "dev"
392
-
393
  if st.session_state.app_step == "intro":
394
  st.header("Welcome!")
395
- st.markdown("This software estimates **Breakout** and **Breakdown** mud-weight limits from drilling data.")
396
- if st.button("Start ▶", type="primary"):
 
 
 
 
 
 
 
397
  st.session_state.app_step = "dev"; st.rerun()
398
 
399
  # =========================
@@ -401,199 +785,272 @@ if st.session_state.app_step == "intro":
401
  # =========================
402
  if st.session_state.app_step == "dev":
403
  st.sidebar.header("Case Building")
404
- up = st.sidebar.file_uploader("Upload Excel (Train/Test sheets)", type=["xlsx","xls"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
405
  if st.sidebar.button("Proceed to Validation ▶", use_container_width=True): st.session_state.app_step="validate"; st.rerun()
406
  if st.sidebar.button("Proceed to Prediction ▶", use_container_width=True): st.session_state.app_step="predict"; st.rerun()
407
 
408
- sticky_header("Case Building", "Workbook must include **Train** and **Test** sheets. Accepts header aliases; Depth is optional and not used as input.")
409
-
410
- if up is not None:
411
- book = parse_excel(up.getvalue())
412
- if not book:
413
- st.error("Could not read Excel file.")
414
- st.stop()
415
-
416
- # Pick sheet names
417
- def find_sheet(names):
418
- low2orig = {k.lower(): k for k in book.keys()}
419
- for nm in names:
420
- if nm.lower() in low2orig: return low2orig[nm.lower()]
421
- return None
422
- sh_train = find_sheet(["Train","Training","training2","train","training"])
423
- sh_test = find_sheet(["Test","Testing","testing2","test","testing"])
424
 
 
 
 
 
425
  if sh_train is None or sh_test is None:
426
- st.error("Workbook must include Train/Training/training2 **and** Test/Testing/testing2 sheets.")
427
  st.stop()
428
 
429
- # normalize headers with aliasing; targets from metas
430
- def prepare(df_raw, stage):
431
- df = _normalize_columns(df_raw, FEATURES, TARGET_BO, TARGET_BD)
432
- # ensure required inputs exist
433
- missing = [c for c in FEATURES if c not in df.columns]
434
- if missing:
435
- st.error(f"{stage}: Missing required input columns: {missing}\nFound: {list(df.columns)}\nExpected: {FEATURES}")
436
- st.stop()
437
- return df
438
-
439
- tr = prepare(book[sh_train].copy(), "Training")
440
- te = prepare(book[sh_test].copy(), "Testing")
441
-
442
- # Predict
443
- Xtr = _make_X(tr, FEATURES)
444
- Xte = _make_X(te, FEATURES)
445
  tr[PRED_BO] = model_bo.predict(Xtr)
446
  tr[PRED_BD] = model_bd.predict(Xtr)
447
  te[PRED_BO] = model_bo.predict(Xte)
448
  te[PRED_BD] = model_bd.predict(Xte)
449
 
450
- # Metrics (requires BO_Actual / BD_Actual present)
451
- def metrics_block(df, tgt, pred):
452
- return {
453
- "R": pearson_r(df[tgt], df[pred]) if tgt in df else float("nan"),
454
- "RMSE": rmse(df[tgt], df[pred]) if tgt in df else float("nan"),
455
- "MAE": mean_absolute_error(df[tgt], df[pred]) if tgt in df else float("nan"),
456
- }
457
 
458
- m_train_bo = metrics_block(tr, TARGET_BO, PRED_BO)
459
- m_train_bd = metrics_block(tr, TARGET_BD, PRED_BD)
460
- m_test_bo = metrics_block(te, TARGET_BO, PRED_BO)
461
- m_test_bd = metrics_block(te, TARGET_BD, PRED_BD)
462
 
463
- st.subheader("Training")
464
  c1,c2,c3 = st.columns(3)
465
- c1.metric("R (BO)", f"{m_train_bo['R']:.3f}")
466
- c2.metric("RMSE (BO)", f"{m_train_bo['RMSE']:.2f}")
467
- c3.metric("MAE (BO)", f"{m_train_bo['MAE']:.2f}")
468
- c1,c2,c3 = st.columns(3)
469
- c1.metric("R (BD)", f"{m_train_bd['R']:.3f}")
470
- c2.metric("RMSE (BD)", f"{m_train_bd['RMSE']:.2f}")
471
- c3.metric("MAE (BD)", f"{m_train_bd['MAE']:.2f}")
472
-
 
 
 
473
  t1, t2, t3 = st.tabs(["Breakout", "Breakdown", "Combined"])
474
  with t1:
475
- st.plotly_chart(track_plot_single(tr, PRED_BO, actual_col=TARGET_BO, title_suffix="Breakout"), use_container_width=False, config={"displayModeBar": False})
476
- st.pyplot(cross_plot_static(tr[TARGET_BO], tr[PRED_BO], f"Actual {TARGET_BO}", f"Predicted {TARGET_BO}", COLORS["pred_bo"]))
477
  with t2:
478
- st.plotly_chart(track_plot_single(tr, PRED_BD, actual_col=TARGET_BD, title_suffix="Breakdown"), use_container_width=False, config={"displayModeBar": False})
479
- st.pyplot(cross_plot_static(tr[TARGET_BD], tr[PRED_BD], f"Actual {TARGET_BD}", f"Predicted {TARGET_BD}", COLORS["pred_bd"]))
480
  with t3:
481
- st.plotly_chart(track_plot_combined(tr, TARGET_BO, TARGET_BD), use_container_width=False, config={"displayModeBar": False})
482
-
483
- st.subheader("Testing")
484
- c1,c2,c3 = st.columns(3)
485
- c1.metric("R (BO)", f"{m_test_bo['R']:.3f}")
486
- c2.metric("RMSE (BO)", f"{m_test_bo['RMSE']:.2f}")
487
- c3.metric("MAE (BO)", f"{m_test_bo['MAE']:.2f}")
488
- c1,c2,c3 = st.columns(3)
489
- c1.metric("R (BD)", f"{m_test_bd['R']:.3f}")
490
- c2.metric("RMSE (BD)", f"{m_test_bd['RMSE']:.2f}")
491
- c3.metric("MAE (BD)", f"{m_test_bd['MAE']:.2f}")
492
-
493
- t1, t2, t3 = st.tabs(["Breakout ", "Breakdown ", "Combined "])
494
- with t1:
495
- st.plotly_chart(track_plot_single(te, PRED_BO, actual_col=TARGET_BO, title_suffix="Breakout"), use_container_width=False, config={"displayModeBar": False})
496
- st.pyplot(cross_plot_static(te[TARGET_BO], te[PRED_BO], f"Actual {TARGET_BO}", f"Predicted {TARGET_BO}", COLORS["pred_bo"]))
497
- with t2:
498
- st.plotly_chart(track_plot_single(te, PRED_BD, actual_col=TARGET_BD, title_suffix="Breakdown"), use_container_width=False, config={"displayModeBar": False})
499
- st.pyplot(cross_plot_static(te[TARGET_BD], te[PRED_BD], f"Actual {TARGET_BD}", f"Predicted {TARGET_BD}", COLORS["pred_bd"]))
500
- with t3:
501
- st.plotly_chart(track_plot_combined(te, TARGET_BO, TARGET_BD), use_container_width=False, config={"displayModeBar": False})
502
 
503
  # =========================
504
- # VALIDATION
505
  # =========================
506
  if st.session_state.app_step == "validate":
507
  st.sidebar.header("Validate the Models")
508
- up = st.sidebar.file_uploader("Upload Validation Excel (has actual BO/BD)", type=["xlsx","xls"])
 
 
 
 
 
 
 
 
509
  if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
510
  if st.sidebar.button("Proceed to Prediction ▶", use_container_width=True): st.session_state.app_step="predict"; st.rerun()
511
 
512
- sticky_header("Validation", "Upload a dataset with the same **5 feature** columns and **BO_Actual / BD_Actual** targets.")
513
-
514
- if up is not None:
515
- book = parse_excel(up.getvalue())
516
- name = list(book.keys())[0]
517
- df = _normalize_columns(book[name].copy(), FEATURES, TARGET_BO, TARGET_BD)
518
-
519
- missing = [c for c in FEATURES if c not in df.columns]
520
- if missing:
521
- st.error(f"Missing input columns: {missing}\nFound: {list(df.columns)}\nExpected: {FEATURES}")
522
- st.stop()
523
-
524
- df[PRED_BO] = model_bo.predict(_make_X(df, FEATURES))
525
- df[PRED_BD] = model_bd.predict(_make_X(df, FEATURES))
526
-
527
- m_bo = {"R": pearson_r(df[TARGET_BO], df[PRED_BO]),
528
- "RMSE": rmse(df[TARGET_BO], df[PRED_BO]),
529
- "MAE": mean_absolute_error(df[TARGET_BO], df[PRED_BO])}
530
- m_bd = {"R": pearson_r(df[TARGET_BD], df[PRED_BD]),
531
- "RMSE": rmse(df[TARGET_BD], df[PRED_BD]),
532
- "MAE": mean_absolute_error(df[TARGET_BD], df[PRED_BD])}
533
-
 
 
 
 
 
 
 
 
 
 
534
  c1,c2,c3 = st.columns(3)
535
  c1.metric("R (BO)", f"{m_bo['R']:.3f}"); c2.metric("RMSE (BO)", f"{m_bo['RMSE']:.2f}"); c3.metric("MAE (BO)", f"{m_bo['MAE']:.2f}")
536
  c1,c2,c3 = st.columns(3)
537
  c1.metric("R (BD)", f"{m_bd['R']:.3f}"); c2.metric("RMSE (BD)", f"{m_bd['RMSE']:.2f}"); c3.metric("MAE (BD)", f"{m_bd['MAE']:.2f}")
 
538
 
539
  t1, t2, t3 = st.tabs(["Breakout", "Breakdown", "Combined"])
540
  with t1:
541
- st.plotly_chart(track_plot_single(df, PRED_BO, actual_col=TARGET_BO, title_suffix="Breakout"),
542
- use_container_width=False, config={"displayModeBar": False})
543
- st.pyplot(cross_plot_static(df[TARGET_BO], df[PRED_BO], f"Actual {TARGET_BO}", f"Predicted {TARGET_BO}", COLORS["pred_bo"]))
 
544
  with t2:
545
- st.plotly_chart(track_plot_single(df, PRED_BD, actual_col=TARGET_BD, title_suffix="Breakdown"),
546
- use_container_width=False, config={"displayModeBar": False})
547
- st.pyplot(cross_plot_static(df[TARGET_BD], df[PRED_BD], f"Actual {TARGET_BD}", f"Predicted {TARGET_BD}", COLORS["pred_bd"]))
 
548
  with t3:
549
- st.plotly_chart(track_plot_combined(df, TARGET_BO, TARGET_BD), use_container_width=False, config={"displayModeBar": False})
 
 
 
 
 
 
 
550
 
551
  # =========================
552
  # PREDICTION (no actuals)
553
  # =========================
554
  if st.session_state.app_step == "predict":
555
  st.sidebar.header("Prediction (No Actual BO/BD)")
556
- up = st.sidebar.file_uploader("Upload Prediction Excel (features only)", type=["xlsx","xls"])
 
 
 
 
 
 
 
 
557
  if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
558
 
559
- sticky_header("Prediction", "Upload a dataset with **only the 5 feature columns**.")
560
 
561
- if up is not None:
562
- book = parse_excel(up.getvalue()); name = list(book.keys())[0]
563
- df = _normalize_columns(book[name].copy(), FEATURES, TARGET_BO, TARGET_BD)
564
- missing = [c for c in FEATURES if c not in df.columns]
565
- if missing:
566
- st.error(f"Missing input columns: {missing}\nFound: {list(df.columns)}\nExpected: {FEATURES}")
567
- st.stop()
568
 
569
- df[PRED_BO] = model_bo.predict(_make_X(df, FEATURES))
570
- df[PRED_BD] = model_bd.predict(_make_X(df, FEATURES))
 
 
 
 
 
 
 
 
 
 
 
 
 
571
 
572
  col_left, col_right = st.columns([2,3], gap="large")
573
  with col_left:
574
- sv = {
575
- "n":len(df),
576
- "bo_min":float(df[PRED_BO].min()), "bo_max":float(df[PRED_BO].max()),
577
- "bd_min":float(df[PRED_BD].min()), "bd_max":float(df[PRED_BD].max()),
578
- "bo_mean":float(df[PRED_BO].mean()), "bo_std":float(df[PRED_BO].std(ddof=0)),
579
- "bd_mean":float(df[PRED_BD].mean()), "bd_std":float(df[PRED_BD].std(ddof=0)),
580
- }
581
  table = pd.DataFrame({
582
- "Metric": ["# points","BO min","BO max","BO mean","BO std","BD min","BD max","BD mean","BD std"],
583
  "Value": [sv["n"], round(sv["bo_min"],2), round(sv["bo_max"],2), round(sv["bo_mean"],2), round(sv["bo_std"],2),
584
- round(sv["bd_min"],2), round(sv["bd_max"],2), round(sv["bd_mean"],2), round(sv["bd_std"],2)]
585
  })
586
  st.markdown('<div class="st-message-box st-success">Predictions ready ✓</div>', unsafe_allow_html=True)
587
  df_centered_rounded(table, hide_index=True)
 
588
  with col_right:
589
  t1, t2 = st.tabs(["Breakout", "Breakdown"])
590
  with t1:
591
  st.plotly_chart(track_plot_single(df, PRED_BO, actual_col=None, title_suffix="Breakout"),
592
- use_container_width=False, config={"displayModeBar": False})
593
  with t2:
594
  st.plotly_chart(track_plot_single(df, PRED_BD, actual_col=None, title_suffix="Breakdown"),
595
- use_container_width=False, config={"displayModeBar": False})
596
- st.plotly_chart(track_plot_combined(df, TARGET_BO, TARGET_BD), use_container_width=False, config={"displayModeBar": False})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
597
 
598
  # =========================
599
  # Footer
@@ -602,6 +1059,7 @@ st.markdown("""
602
  <br><br><br>
603
  <hr>
604
  <div style='text-align:center;color:#6b7280;font-size:1.0em;'>
605
- © 2025 Smart Thinking AI-Solutions Team. All rights reserved.
 
606
  </div>
607
  """, unsafe_allow_html=True)
 
1
+ # app.py — ST_GeoMech_SMW (pcf version, robust IO + aliases)
 
2
  import io, json, os, base64, math
3
  from pathlib import Path
 
 
4
  import streamlit as st
5
  import pandas as pd
6
  import numpy as np
7
  import joblib
8
+ from datetime import datetime
9
 
10
+ # Matplotlib (static)
11
  import matplotlib
12
  matplotlib.use("Agg")
13
  import matplotlib.pyplot as plt
14
  from matplotlib.ticker import FuncFormatter
 
15
 
16
+ import plotly.graph_objects as go
17
  from sklearn.metrics import mean_squared_error, mean_absolute_error
18
 
19
  # =========================
 
22
  APP_NAME = "ST_GeoMech_SMW"
23
  TAGLINE = "Real-Time Upper/Lower Mud Weight (MW) Limits For Safe Drilling"
24
 
25
+ # Defaults (can be overridden by metas or model.feature_names_in_)
26
+ FEATURES_DEFAULT = [
27
+ "WOB (klbf)",
28
+ "Torque (kft.lbf)",
29
+ "SPP (psi)",
30
+ "RPM (1/min)",
31
+ "ROP (ft/h)",
32
+ "Flow Rate (gpm)",
33
+ ]
34
 
35
+ TARGET_BO_DEFAULT = "Breakout MW"
36
+ TARGET_BD_DEFAULT = "Breakdown MW"
37
  PRED_BO = "BO_Pred"
38
  PRED_BD = "BD_Pred"
39
+
40
+ # X-axis title for MW tracks/cross-plots (unit taken from metas, fallback to pcf)
41
+ X_UNITS = None
42
+
43
+ # Model/meta discovery (supports uploads with spaces like "bo_model (1).joblib")
44
+ MODELS_DIR = Path("models")
45
+ ALT_MODELS_DIR = Path("/mnt/data")
46
+ BO_MODEL_CANDIDATES = ["bo_model.joblib", "bo_model (1).joblib", "BO.joblib"]
47
+ BD_MODEL_CANDIDATES = ["bd_model.joblib", "BD.joblib"]
48
+ BO_META_CANDIDATES = ["bo_meta.json", "bo_meta (1).json"]
49
+ BD_META_CANDIDATES = ["bd_meta.json"]
50
 
51
  COLORS = {
52
  "pred_bo": "#1f77b4", # blue
 
56
  "ref": "#5a5a5a"
57
  }
58
 
59
+ STRICT_VERSION_CHECK = False
60
+
61
  # Plot sizing
62
+ CROSS_W = 350
63
+ CROSS_H = 350
64
+ TRACK_H = 1000
65
+ TRACK_W = 500
66
  FONT_SZ = 13
67
  BOLD_FONT = "Arial Black, Arial, sans-serif"
68
 
69
  # =========================
70
  # Page / CSS
71
  # =========================
72
+ st.set_page_config(page_title=APP_NAME, page_icon="logo.png", layout="wide")
73
  st.markdown("""
74
  <style>
75
+ .brand-logo { width: 200px; height: auto; object-fit: contain; }
76
  .centered-container { display:flex; flex-direction:column; align-items:center; text-align:center; }
77
  .st-message-box { background:#f0f2f6; color:#333; padding:10px; border-radius:10px; border:1px solid #e6e9ef; }
78
  .st-message-box.st-success { background:#d4edda; color:#155724; border-color:#c3e6cb; }
 
80
  .st-message-box.st-error { background:#f8d7da; color:#721c24; border-color:#f5c6cb; }
81
  .main .block-container { overflow: unset !important; }
82
  div[data-testid="stVerticalBlock"] { overflow: unset !important; }
83
+ div[data-testid="stExpander"] > details > summary {
84
+ position: sticky; top: 0; z-index: 10; background: #fff; border-bottom: 1px solid #eee;
85
+ }
86
+ div[data-testid="stExpander"] div[data-baseweb="tab-list"] {
87
+ position: sticky; top: 42px; z-index: 9; background: #fff; padding-top: 6px;
88
+ }
89
  </style>
90
  """, unsafe_allow_html=True)
91
 
 
94
  dict(selector="td", props=[("text-align","center")]),
95
  ]
96
 
97
+ # =========================
98
+ # Password gate
99
+ # =========================
100
+ def inline_logo(path="logo.png") -> str:
101
+ try:
102
+ p = Path(path)
103
+ if not p.exists(): return ""
104
+ return f"data:image/png;base64,{base64.b64encode(p.read_bytes()).decode('ascii')}"
105
+ except Exception:
106
+ return ""
107
+
108
+ def add_password_gate() -> None:
109
+ try:
110
+ required = st.secrets.get("APP_PASSWORD", "")
111
+ except Exception:
112
+ required = os.environ.get("APP_PASSWORD", "")
113
+
114
+ if not required:
115
+ st.warning("Set APP_PASSWORD in Secrets (or environment) and restart.")
116
+ st.stop()
117
+
118
+ if st.session_state.get("auth_ok", False):
119
+ return
120
+
121
+ st.sidebar.markdown(f"""
122
+ <div class="centered-container">
123
+ <img src="{inline_logo('logo.png')}" class="brand-logo">
124
+ <div style='font-weight:800;font-size:1.2rem; margin-top:10px;'>{APP_NAME}</div>
125
+ <div style='color:#667085;'>Smart Thinking • Secure Access</div>
126
+ </div>
127
+ """, unsafe_allow_html=True
128
+ )
129
+ pwd = st.sidebar.text_input("Access key", type="password", placeholder="••••••••")
130
+ if st.sidebar.button("Unlock", type="primary"):
131
+ if pwd == required:
132
+ st.session_state.auth_ok = True
133
+ st.rerun()
134
+ else:
135
+ st.error("Incorrect key.")
136
+ st.stop()
137
+
138
+ add_password_gate()
139
+
140
  # =========================
141
  # Utilities
142
  # =========================
 
160
  xl = pd.ExcelFile(bio)
161
  return {sh: xl.parse(sh) for sh in xl.sheet_names}
162
 
163
+ def read_book_bytes(b: bytes):
164
+ return parse_excel(b) if b else {}
165
+
166
  def _nice_tick0(xmin: float, step: float = 0.1) -> float:
167
  return step * math.floor(xmin / step) if np.isfinite(xmin) else xmin
168
 
 
186
 
187
  # ---- Column name normalization (aliases) ----
188
  def _build_alias_map(canonical_features: list[str], tgt_bo: str, tgt_bd: str) -> dict:
 
189
  def pick(expected_list, variants):
190
  for v in variants:
191
  if v in expected_list:
192
  return v
193
  return variants[0]
194
 
195
+ # Canonical picks from what the model actually expects
 
 
196
  can_WOB = pick(canonical_features, ["WOB (klbf)", "WOB, klbf", "WOB(klbf)", "WOB( klbf)"])
197
+ can_TORQUE = pick(canonical_features, ["Torque (kft.lbf)", "Torque(kft.lbf)", "TORQUE(kft.lbf)", "T (kft.lbf)"]) # include "T (kft.lbf)"
198
+ can_SPP = pick(canonical_features, ["SPP (psi)", "SPP(psi)"])
199
+ can_RPM = pick(canonical_features, ["RPM (1/min)", "RPM(1/min)"])
200
  can_ROP = pick(canonical_features, ["ROP (ft/h)", "ROP(ft/h)"])
201
+ can_FR = pick(canonical_features, [
202
+ "Flow Rate (gpm)", "Flow Rate, gpm", "Flow Rate,gpm", "Flow Rate , gpm",
203
+ "Q, gpm" # include meta/training alias
204
+ ])
205
 
206
+ # Depth canonical = whatever is in training FEATURES (Depth or Depth (ft), etc.)
207
+ can_DEPTH = pick(canonical_features, ["Depth", "Depth (ft)", "Depth, ft", "Depth(ft)", "DEPTH, ft"])
208
+
209
+ return {
 
 
 
210
  # WOB
211
  "WOB (klbf)": can_WOB, "WOB, klbf": can_WOB, "WOB(klbf)": can_WOB, "WOB( klbf)": can_WOB,
212
+ # Torque / T
213
+ "Torque (kft.lbf)": can_TORQUE, "Torque(kft.lbf)": can_TORQUE, "TORQUE(kft.lbf)": can_TORQUE, "T (kft.lbf)": can_TORQUE,
214
+ # SPP
215
+ "SPP (psi)": can_SPP, "SPP(psi)": can_SPP,
216
+ # RPM
217
+ "RPM (1/min)": can_RPM, "RPM(1/min)": can_RPM,
218
  # ROP
219
  "ROP (ft/h)": can_ROP, "ROP(ft/h)": can_ROP,
220
+ # Flow / Q
221
+ "Flow Rate (gpm)": can_FR, "Flow Rate, gpm": can_FR, "Flow Rate,gpm": can_FR, "Flow Rate , gpm": can_FR, "Q, gpm": can_FR,
222
+ # Depth
223
+ "Depth": can_DEPTH, "Depth (ft)": can_DEPTH, "Depth, ft": can_DEPTH, "Depth(ft)": can_DEPTH, "DEPTH, ft": can_DEPTH,
224
+ # Targets (accept extra common names)
225
+ "Breakout MW": tgt_bo, "BOMW": tgt_bo, "BO MW": tgt_bo, "BO_Actual": tgt_bo,
226
+ "Breakdown MW": tgt_bd, "BDMW": tgt_bd, "BD MW": tgt_bd, "BD_Actual": tgt_bd,
227
  }
 
228
 
229
  DEPTH_CANDIDATES = ["Depth", "Depth (ft)", "Depth, ft", "Depth(ft)", "DEPTH, ft"]
230
 
 
235
  actual = {k: v for k, v in alias.items() if k in out.columns and k != v}
236
  return out.rename(columns=actual)
237
 
238
+ def _coerce_depth(df: pd.DataFrame, canon_depth: str | None) -> pd.DataFrame:
239
+ """If the model expects a Depth-like column, rename any variant to it."""
240
+ if not canon_depth:
241
+ return df
242
+ if canon_depth in df.columns:
243
+ return df
244
+ for c in DEPTH_CANDIDATES:
245
+ if c in df.columns:
246
+ return df.rename(columns={c: canon_depth})
247
+ return df
248
+
249
+ def _prepare_table(raw_df: pd.DataFrame, stage: str, features: list[str],
250
+ tgt_bo: str, tgt_bd: str, canon_depth: str | None) -> pd.DataFrame:
251
+ """Normalize headers, coerce depth name, and validate required columns."""
252
+ df = _normalize_columns(raw_df, features, tgt_bo, tgt_bd)
253
+ df = _coerce_depth(df, canon_depth)
254
+ missing = [c for c in features if c not in df.columns]
255
+ if missing:
256
+ st.error(
257
+ f"{stage}: Missing required column(s): {missing}\n\n"
258
+ f"Found columns: {list(df.columns)}\n\n"
259
+ f"Expected features: {features}"
260
+ )
261
+ st.stop()
262
+ return df
263
+
264
+ def ensure_cols(df: pd.DataFrame, cols: list[str]) -> bool:
265
+ miss = [c for c in cols if c not in df.columns]
266
+ if miss:
267
+ st.error(f"Missing columns: {miss}\nFound: {list(df.columns)}")
268
+ return False
269
+ return True
270
+
271
+ def find_sheet(book, names):
272
+ low2orig = {k.lower(): k for k in book.keys()}
273
+ for nm in names:
274
+ if nm.lower() in low2orig: return low2orig[nm.lower()]
275
+ return None
276
+
277
+ # === Excel export helpers =================================================
278
+ def _excel_engine() -> str:
279
+ try:
280
+ import xlsxwriter # noqa: F401
281
+ return "xlsxwriter"
282
+ except Exception:
283
+ return "openpyxl"
284
+
285
+ def _excel_safe_name(name: str) -> str:
286
+ bad = '[]:*?/\\'
287
+ safe = ''.join('_' if ch in bad else ch for ch in str(name))
288
+ return safe[:31]
289
+
290
+ def _round_numeric(df: pd.DataFrame, ndigits: int = 3) -> pd.DataFrame:
291
+ out = df.copy()
292
+ for c in out.columns:
293
+ if pd.api.types.is_float_dtype(out[c]) or pd.api.types.is_integer_dtype(out[c]):
294
+ out[c] = pd.to_numeric(out[c], errors="coerce").round(ndigits)
295
+ return out
296
 
297
+ def _summary_table(df: pd.DataFrame, cols: list[str]) -> pd.DataFrame:
298
+ cols = [c for c in cols if c in df.columns]
299
+ if not cols: return pd.DataFrame()
300
+ tbl = (df[cols]
301
+ .agg(['min','max','mean','std'])
302
+ .T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"})
303
+ .reset_index(names="Field"))
304
+ return _round_numeric(tbl, 3)
305
+
306
+ def _train_ranges_df(ranges: dict[str, tuple[float, float]]) -> pd.DataFrame:
307
+ if not ranges:
308
+ return pd.DataFrame()
309
+ df = pd.DataFrame(ranges).T.reset_index()
310
+ df.columns = ["Feature", "Min", "Max"]
311
+ return _round_numeric(df, 3)
312
+
313
+ def _excel_autofit(writer, sheet_name: str, df: pd.DataFrame, min_w: int = 8, max_w: int = 40):
314
+ try:
315
+ import xlsxwriter # noqa: F401
316
+ except Exception:
317
+ return
318
+ ws = writer.sheets[sheet_name]
319
+ for i, col in enumerate(df.columns):
320
+ series = df[col].astype(str)
321
+ max_len = max([len(str(col))] + series.map(len).tolist())
322
+ ws.set_column(i, i, max(min_w, min(max_len + 2, max_w)))
323
+ ws.freeze_panes(1, 0)
324
+
325
+ def _add_sheet(sheets: dict, order: list, name: str, df: pd.DataFrame, ndigits: int):
326
+ if df is None or df.empty: return
327
+ sheets[name] = _round_numeric(df, ndigits)
328
+ order.append(name)
329
+
330
+ def _available_sections() -> list[str]:
331
+ res = st.session_state.get("results", {})
332
+ sections = []
333
+ if "Train" in res: sections += ["Training","Training_Metrics_BO","Training_Metrics_BD","Training_Summary"]
334
+ if "Test" in res: sections += ["Testing","Testing_Metrics_BO","Testing_Metrics_BD","Testing_Summary"]
335
+ if "Validate" in res: sections += ["Validation","Validation_Metrics_BO","Validation_Metrics_BD","Validation_Summary","Validation_OOR"]
336
+ if "PredictOnly" in res: sections += ["Prediction","Prediction_Summary"]
337
+ if st.session_state.get("train_ranges"): sections += ["Training_Ranges"]
338
+ sections += ["Info"]
339
+ return sections
340
+
341
+ def build_export_workbook(selected: list[str], ndigits: int = 3, do_autofit: bool = True) -> tuple[bytes|None, str|None, list[str]]:
342
+ res = st.session_state.get("results", {})
343
+ if not res: return None, None, []
344
+
345
+ sheets: dict[str, pd.DataFrame] = {}
346
+ order: list[str] = []
347
+
348
+ if "Training" in selected and "Train" in res:
349
+ _add_sheet(sheets, order, "Training", res["Train"], ndigits)
350
+ if "Training_Metrics_BO" in selected and res.get("m_train_bo"):
351
+ _add_sheet(sheets, order, "Training_Metrics_BO", pd.DataFrame([res["m_train_bo"]]), ndigits)
352
+ if "Training_Metrics_BD" in selected and res.get("m_train_bd"):
353
+ _add_sheet(sheets, order, "Training_Metrics_BD", pd.DataFrame([res["m_train_bd"]]), ndigits)
354
+ if "Training_Summary" in selected and "Train" in res:
355
+ tr_cols = st.session_state["FEATURES"] + [c for c in [st.session_state["TARGET_BO"], st.session_state["TARGET_BD"], PRED_BO, PRED_BD] if c in res["Train"].columns]
356
+ _add_sheet(sheets, order, "Training_Summary", _summary_table(res["Train"], tr_cols), ndigits)
357
+
358
+ if "Testing" in selected and "Test" in res:
359
+ _add_sheet(sheets, order, "Testing", res["Test"], ndigits)
360
+ if "Testing_Metrics_BO" in selected and res.get("m_test_bo"):
361
+ _add_sheet(sheets, order, "Testing_Metrics_BO", pd.DataFrame([res["m_test_bo"]]), ndigits)
362
+ if "Testing_Metrics_BD" in selected and res.get("m_test_bd"):
363
+ _add_sheet(sheets, order, "Testing_Metrics_BD", pd.DataFrame([res["m_test_bd"]]), ndigits)
364
+ if "Testing_Summary" in selected and "Test" in res:
365
+ te_cols = st.session_state["FEATURES"] + [c for c in [st.session_state["TARGET_BO"], st.session_state["TARGET_BD"], PRED_BO, PRED_BD] if c in res["Test"].columns]
366
+ _add_sheet(sheets, order, "Testing_Summary", _summary_table(res["Test"], te_cols), ndigits)
367
+
368
+ if "Validation" in selected and "Validate" in res:
369
+ _add_sheet(sheets, order, "Validation", res["Validate"], ndigits)
370
+ if "Validation_Metrics_BO" in selected and res.get("m_val_bo"):
371
+ _add_sheet(sheets, order, "Validation_Metrics_BO", pd.DataFrame([res["m_val_bo"]]), ndigits)
372
+ if "Validation_Metrics_BD" in selected and res.get("m_val_bd"):
373
+ _add_sheet(sheets, order, "Validation_Metrics_BD", pd.DataFrame([res["m_val_bd"]]), ndigits)
374
+ if "Validation_Summary" in selected and res.get("sv_val"):
375
+ _add_sheet(sheets, order, "Validation_Summary", pd.DataFrame([res["sv_val"]]), ndigits)
376
+ if "Validation_OOR" in selected and isinstance(res.get("oor_tbl"), pd.DataFrame) and not res["oor_tbl"].empty:
377
+ _add_sheet(sheets, order, "Validation_OOR", res["oor_tbl"].reset_index(drop=True), ndigits)
378
+
379
+ if "Prediction" in selected and "PredictOnly" in res:
380
+ _add_sheet(sheets, order, "Prediction", res["PredictOnly"], ndigits)
381
+ if "Prediction_Summary" in selected and res.get("sv_pred"):
382
+ _add_sheet(sheets, order, "Prediction_Summary", pd.DataFrame([res["sv_pred"]]), ndigits)
383
+
384
+ if "Training_Ranges" in selected and st.session_state.get("train_ranges"):
385
+ rr = _train_ranges_df(st.session_state["train_ranges"])
386
+ _add_sheet(sheets, order, "Training_Ranges", rr, ndigits)
387
+
388
+ if "Info" in selected:
389
+ info = pd.DataFrame([
390
+ {"Key": "AppName", "Value": APP_NAME},
391
+ {"Key": "Tagline", "Value": TAGLINE},
392
+ {"Key": "Targets", "Value": f'{st.session_state["TARGET_BO"]}, {st.session_state["TARGET_BD"]}'},
393
+ {"Key": "PredColumns","Value": f'{PRED_BO}, {PRED_BD}'},
394
+ {"Key": "Features", "Value": ", ".join(st.session_state["FEATURES"])},
395
+ {"Key": "ExportedAt", "Value": datetime.now().strftime("%Y-%m-%d %H:%M:%S")},
396
+ {"Key": "Units", "Value": st.session_state.get("X_UNITS","MW (pcf)")},
397
+ ])
398
+ _add_sheet(sheets, order, "Info", info, ndigits)
399
+
400
+ if not order: return None, None, []
401
+
402
+ bio = io.BytesIO()
403
+ engine = _excel_engine()
404
+ with pd.ExcelWriter(bio, engine=engine) as writer:
405
+ for name in order:
406
+ df = sheets[name]
407
+ sheet = _excel_safe_name(name)
408
+ df.to_excel(writer, sheet_name=sheet, index=False)
409
+ if do_autofit:
410
+ _excel_autofit(writer, sheet, df)
411
+ bio.seek(0)
412
+ fname = f"MW_Export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
413
+ return bio.getvalue(), fname, order
414
+
415
+ def render_export_button(phase_key: str) -> None:
416
+ res = st.session_state.get("results", {})
417
+ if not res: return
418
+ st.divider()
419
+ st.markdown("### Export to Excel")
420
+
421
+ options = _available_sections()
422
+ selected_sheets = st.multiselect(
423
+ "Sheets to include",
424
+ options=options,
425
+ default=[],
426
+ placeholder="Choose option(s)",
427
+ key=f"sheets_{phase_key}",
428
+ )
429
+
430
+ if not selected_sheets:
431
+ st.caption("Select one or more sheets above to enable the export.")
432
+ st.download_button(
433
+ label="⬇️ Export Excel",
434
+ data=b"",
435
+ file_name="MW_Export.xlsx",
436
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
437
+ disabled=True,
438
+ key=f"download_{phase_key}",
439
+ )
440
+ return
441
+
442
+ data, fname, names = build_export_workbook(selected=selected_sheets, ndigits=3, do_autofit=True)
443
+ if names:
444
+ st.caption("Will include: " + ", ".join(names))
445
+ st.download_button(
446
+ "⬇️ Export Excel",
447
+ data=(data or b""),
448
+ file_name=(fname or "MW_Export.xlsx"),
449
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
450
+ disabled=(data is None),
451
+ key=f"download_{phase_key}",
452
+ )
453
 
454
  # =========================
455
+ # Cross plots (Matplotlib)
456
  # =========================
457
  def cross_plot_static(actual, pred, xlabel, ylabel, color="#1f77b4"):
458
  a = pd.Series(actual, dtype=float)
459
  p = pd.Series(pred, dtype=float)
460
+
461
  lo = float(min(a.min(), p.min()))
462
  hi = float(max(a.max(), p.max()))
463
  pad = 0.03 * (hi - lo if hi > lo else 1.0)
464
  lo2, hi2 = lo - pad, hi + pad
465
  ticks = np.linspace(lo2, hi2, 5)
466
+
467
  dpi = 110
468
  fig, ax = plt.subplots(figsize=(CROSS_W/dpi, CROSS_H/dpi), dpi=dpi, constrained_layout=False)
469
  ax.scatter(a, p, s=14, c=color, alpha=0.9, linewidths=0)
470
  ax.plot([lo2, hi2], [lo2, hi2], linestyle="--", linewidth=1.2, color=COLORS["ref"])
471
+
472
  ax.set_xlim(lo2, hi2); ax.set_ylim(lo2, hi2)
473
  ax.set_xticks(ticks); ax.set_yticks(ticks)
474
  ax.set_aspect("equal", adjustable="box")
475
+
476
  fmt = FuncFormatter(lambda x, _: f"{x:.2f}")
477
  ax.xaxis.set_major_formatter(fmt); ax.yaxis.set_major_formatter(fmt)
478
  ax.set_xlabel(xlabel, fontweight="bold", fontsize=10, color="black")
 
483
  fig.subplots_adjust(left=0.16, bottom=0.16, right=0.98, top=0.98)
484
  return fig
485
 
486
+ # =========================
487
+ # Track plots (Plotly)
488
+ # =========================
489
+ def _depth_series(df):
490
+ depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
491
+ if depth_col is not None:
492
+ y = pd.to_numeric(df[depth_col], errors="coerce"); ylab = depth_col
493
+ rng = [float(y.max()), float(y.min())] # reversed
494
+ else:
495
+ y = pd.Series(np.arange(1, len(df) + 1)); ylab = "Point Index"
496
+ rng = [float(y.max()), float(y.min())]
497
+ return y, ylab, rng
498
+
499
+ def _x_range_for_tracks(df, cols):
500
+ x_series = pd.concat([pd.to_numeric(df[c], errors="coerce") for c in cols if c in df], ignore_index=True)
501
+ x_lo, x_hi = float(x_series.min()), float(x_series.max())
502
+ pad = 0.03 * (x_hi - x_lo if x_hi > x_lo else 1.0)
503
+ xmin, xmax = x_lo - pad, x_hi + pad
504
+ tick0 = _nice_tick0(xmin, step=max((xmax - xmin)/10.0, 0.1))
505
+ return xmin, xmax, tick0
506
+
507
  def track_plot_single(df, pred_col, actual_col=None, title_suffix=""):
508
  y, ylab, y_range = _depth_series(df)
509
  cols = [pred_col] + ([actual_col] if actual_col and actual_col in df.columns else [])
510
  xmin, xmax, tick0 = _x_range_for_tracks(df, cols)
511
+
512
  fig = go.Figure()
513
  if pred_col in df.columns:
514
  fig.add_trace(go.Scatter(
 
520
  if actual_col and actual_col in df.columns:
521
  fig.add_trace(go.Scatter(
522
  x=df[actual_col], y=y, mode="lines",
523
+ line=dict(color=COLORS["actual_bo"] if actual_col==st.session_state["TARGET_BO"] else COLORS["actual_bd"],
524
  width=2.0, dash="dot"),
525
  name=f"{actual_col} (actual)",
526
  hovertemplate=f"{actual_col}: "+"%{x:.2f}<br>"+ylab+": %{y}<extra></extra>"
527
  ))
528
+
529
+ title_text = st.session_state.get("X_UNITS", "MW (pcf)")
530
  fig.update_layout(
531
  height=TRACK_H, width=TRACK_W, autosize=False, paper_bgcolor="#fff", plot_bgcolor="#fff",
532
  margin=dict(l=64, r=16, t=36, b=48), hovermode="closest",
 
537
  title=title_suffix
538
  )
539
  fig.update_xaxes(
540
+ title_text=title_text, title_font=dict(size=20, family=BOLD_FONT, color="#000"),
541
  tickfont=dict(size=15, family=BOLD_FONT, color="#000"), side="top",
542
  range=[xmin, xmax], ticks="outside", tickformat=",.2f", tickmode="auto", tick0=tick0,
543
  showline=True, linewidth=1.2, linecolor="#444", mirror=True,
 
551
  )
552
  return fig
553
 
554
+ def track_plot_combined(df):
555
+ """Overlay BO & BD predictions (+ actuals if present) on same depth axis."""
556
  y, ylab, y_range = _depth_series(df)
557
+ cols = [c for c in [PRED_BO, PRED_BD, st.session_state["TARGET_BO"], st.session_state["TARGET_BD"]] if c in df]
558
  xmin, xmax, tick0 = _x_range_for_tracks(df, cols)
559
  fig = go.Figure()
560
+
561
  if PRED_BO in df.columns:
562
  fig.add_trace(go.Scatter(x=df[PRED_BO], y=y, mode="lines",
563
+ line=dict(color=COLORS["pred_bo"], width=1.8), name=PRED_BO,
564
+ hovertemplate=f"{PRED_BO}: "+"%{x:.2f}<br>"+ylab+": %{y}<extra></extra>"))
565
+ if st.session_state["TARGET_BO"] in df.columns:
566
+ col = st.session_state["TARGET_BO"]
567
+ fig.add_trace(go.Scatter(x=df[col], y=y, mode="lines",
568
+ line=dict(color=COLORS["actual_bo"], width=2.0, dash="dot"), name=f"{col} (actual)",
569
+ hovertemplate=f"{col}: "+"%{x:.2f}<br>"+ylab+": %{y}<extra></extra>"))
570
+
571
  if PRED_BD in df.columns:
572
  fig.add_trace(go.Scatter(x=df[PRED_BD], y=y, mode="lines",
573
+ line=dict(color=COLORS["pred_bd"], width=1.8), name=PRED_BD,
574
+ hovertemplate=f"{PRED_BD}: "+"%{x:.2f}<br>"+ylab+": %{y}<extra></extra>"))
575
+ if st.session_state["TARGET_BD"] in df.columns:
576
+ col = st.session_state["TARGET_BD"]
577
+ fig.add_trace(go.Scatter(x=df[col], y=y, mode="lines",
578
+ line=dict(color=COLORS["actual_bd"], width=2.0, dash="dot"), name=f"{col} (actual)",
579
+ hovertemplate=f"{col}: "+"%{x:.2f}<br>"+ylab+": %{y}<extra></extra>"))
580
+
581
+ title_text = st.session_state.get("X_UNITS", "MW (pcf)")
582
  fig.update_layout(
583
  height=TRACK_H, width=TRACK_W, autosize=False, paper_bgcolor="#fff", plot_bgcolor="#fff",
584
  margin=dict(l=64, r=16, t=36, b=48), hovermode="closest",
 
587
  bgcolor="rgba(255,255,255,0.75)", bordercolor="#ccc", borderwidth=1),
588
  legend_title_text="", title="Combined (Breakout / Breakdown)"
589
  )
590
+ fig.update_xaxes(title_text=title_text, title_font=dict(size=20, family=BOLD_FONT, color="#000"),
591
+ tickfont=dict(size=15, family=BOLD_FONT, color="#000"),
592
+ side="top", range=[xmin, xmax], ticks="outside",
593
+ tickformat=",.2f", tickmode="auto", tick0=tick0,
594
  showline=True, linewidth=1.2, linecolor="#444", mirror=True,
595
  showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True)
596
+ fig.update_yaxes(title_text=ylab, title_font=dict(size=20, family=BOLD_FONT, color="#000"),
597
+ tickfont=dict(size=15, family=BOLD_FONT, color="#000"),
598
+ range=y_range, ticks="outside", showline=True, linewidth=1.2, linecolor="#444",
599
+ mirror=True, showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True)
600
  return fig
601
 
602
+ # ---------- Preview (matplotlib) ----------
603
  def preview_tracks(df: pd.DataFrame, cols: list[str]):
604
  cols = [c for c in cols if c in df.columns]
605
  n = len(cols)
 
607
  fig, ax = plt.subplots(figsize=(4, 2))
608
  ax.text(0.5, 0.5, "No selected columns", ha="center", va="center"); ax.axis("off")
609
  return fig
610
+
611
  depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
612
  if depth_col is not None:
613
  idx = pd.to_numeric(df[depth_col], errors="coerce"); y_label = depth_col
614
  else:
615
  idx = pd.Series(np.arange(1, len(df) + 1)); y_label = "Point Index"
616
+
617
  cmap = plt.get_cmap("tab20")
618
  col_colors = {col: cmap(i % cmap.N) for i, col in enumerate(cols)}
619
  fig, axes = plt.subplots(1, n, figsize=(2.3 * n, 7.0), sharey=True, dpi=100)
 
635
  def _ensure_file(p: Path) -> Path|None:
636
  return p if (p.exists() and p.stat().st_size > 0) else None
637
 
638
+ def _first_existing(*paths: Path) -> Path|None:
639
+ for p in paths:
640
+ if _ensure_file(p): return p
641
+ return None
642
+
643
+ def _find_in_dirs(names: list[str], *dirs: Path) -> Path|None:
644
+ for d in dirs:
645
+ for nm in names:
646
+ p = d / nm
647
+ if _ensure_file(p): return p
648
+ return None
649
+
650
+ def _load_meta(p: Path) -> dict:
651
+ if not p or not p.exists(): return {}
652
+ try:
653
+ return json.loads(p.read_text(encoding="utf-8"))
654
+ except Exception:
655
+ return {}
656
+
657
+ bo_model_path = _find_in_dirs(BO_MODEL_CANDIDATES, MODELS_DIR, ALT_MODELS_DIR)
658
+ bd_model_path = _find_in_dirs(BD_MODEL_CANDIDATES, MODELS_DIR, ALT_MODELS_DIR)
659
+ bo_meta_path = _find_in_dirs(BO_META_CANDIDATES, MODELS_DIR, ALT_MODELS_DIR)
660
+ bd_meta_path = _find_in_dirs(BD_META_CANDIDATES, MODELS_DIR, ALT_MODELS_DIR)
661
+
662
  if not (bo_model_path and bd_model_path):
663
+ st.error("Models not found. Place bo_model.joblib and bd_model.joblib in models/ or upload to /mnt/data/.")
664
+ st.stop()
665
 
666
+ try:
 
 
667
  model_bo = load_model(str(bo_model_path))
668
  model_bd = load_model(str(bd_model_path))
669
+ except Exception as e:
670
+ st.error(f"Failed to load models: {e}"); st.stop()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
671
 
672
+ meta_bo = _load_meta(bo_meta_path)
673
+ meta_bd = _load_meta(bd_meta_path)
 
674
 
675
+ # ---------------------------
676
+ # Resolve FEATURES & targets
677
+ # ---------------------------
678
+ FEATURES = FEATURES_DEFAULT[:] # fallback
679
+ TARGET_BO = TARGET_BO_DEFAULT
680
+ TARGET_BD = TARGET_BD_DEFAULT
681
 
682
+ # Prefer the model's trained feature names (golden source)
683
+ try:
684
+ if hasattr(model_bo, "feature_names_in_"):
685
+ FEATURES = [str(x) for x in model_bo.feature_names_in_]
686
+ except Exception:
687
+ pass
688
+
689
+ # If still not set, fall back to BO meta, then BD meta, then defaults
690
+ if not FEATURES and meta_bo.get("features"):
691
+ FEATURES = [str(x) for x in meta_bo["features"]]
692
+ elif not FEATURES and meta_bd.get("features"):
693
+ FEATURES = [str(x) for x in meta_bd["features"]]
694
+
695
+ # Targets (from metas if provided)
696
+ if meta_bo.get("target"): TARGET_BO = str(meta_bo["target"])
697
+ if meta_bd.get("target"): TARGET_BD = str(meta_bd["target"])
698
+
699
+ # Units from metas; fallback to pcf (not ppg) + normalize spelling
700
+ X_UNITS = meta_bo.get("units") or meta_bd.get("units") or "MW (pcf)"
701
+ if isinstance(X_UNITS, str) and X_UNITS.strip().lower() in {"pcf", "mw (pcf)", "mw pcf"}:
702
+ X_UNITS = "MW (pcf)"
703
+
704
+ # Canonical Depth name the model expects (if any)
705
+ CANON_DEPTH = next((c for c in FEATURES if str(c).strip().lower().startswith("depth")), None)
706
+
707
+ # Session constants for easy access elsewhere
708
+ st.session_state["FEATURES"] = FEATURES
709
+ st.session_state["TARGET_BO"] = TARGET_BO
710
+ st.session_state["TARGET_BD"] = TARGET_BD
711
+ st.session_state["CANON_DEPTH"] = CANON_DEPTH
712
+ st.session_state["X_UNITS"] = X_UNITS
713
+
714
+ # Optional strict version banner
715
+ if STRICT_VERSION_CHECK:
716
+ import numpy as _np, sklearn as _skl
717
+ msgs=[]
718
+ for nm, meta in [("BO", meta_bo), ("BD", meta_bd)]:
719
+ v = meta.get("versions", {})
720
+ if v.get("numpy") and v["numpy"] != _np.__version__:
721
+ msgs.append(f"[{nm}] NumPy {v['numpy']} expected, running {_np.__version__}")
722
+ if v.get("scikit_learn") and v["scikit_learn"] != _skl.__version__:
723
+ msgs.append(f"[{nm}] scikit-learn {v['scikit_learn']} expected, running {_skl.__version__}")
724
+ if msgs: st.warning("Environment mismatch: " + " | ".join(msgs))
725
 
726
+ # =========================
727
+ # Session state
728
+ # =========================
729
+ st.session_state.setdefault("app_step", "intro")
730
+ st.session_state.setdefault("results", {})
731
+ st.session_state.setdefault("train_ranges", None)
732
+ st.session_state.setdefault("dev_file_name","")
733
+ st.session_state.setdefault("dev_file_bytes",b"")
734
+ st.session_state.setdefault("dev_file_loaded",False)
735
+ st.session_state.setdefault("dev_preview",False)
736
+ st.session_state.setdefault("show_preview_modal", False)
737
 
738
  # =========================
739
+ # Sidebar branding
740
  # =========================
741
  st.sidebar.markdown(f"""
742
  <div class="centered-container">
743
+ <img src="{inline_logo('logo.png')}" class="brand-logo">
744
  <div style='font-weight:800;font-size:1.2rem;'>{APP_NAME}</div>
745
  <div style='color:#667085;'>{TAGLINE}</div>
746
  </div>
 
767
  # =========================
768
  # INTRO
769
  # =========================
 
 
 
770
  if st.session_state.app_step == "intro":
771
  st.header("Welcome!")
772
+ st.markdown("This software is developed by *Smart Thinking AI-Solutions Team* to estimate **Breakout** and **Breakdown** mud weight limits from drilling data.")
773
+ st.subheader("How It Works")
774
+ st.markdown(
775
+ "1) **Upload your data** and preview.\n"
776
+ "2) Click **Run Model** to compute metrics and plots (Train/Test).\n"
777
+ "3) Proceed to **Validation** (with actual BO/BD) or **Prediction** (no actuals).\n"
778
+ "4) Use the **Combined** tab to see both limits on one track."
779
+ )
780
+ if st.button("Start Showcase", type="primary"):
781
  st.session_state.app_step = "dev"; st.rerun()
782
 
783
  # =========================
 
785
  # =========================
786
  if st.session_state.app_step == "dev":
787
  st.sidebar.header("Case Building")
788
+ up = st.sidebar.file_uploader("Upload Your Data File", type=["xlsx","xls"])
789
+ if up is not None:
790
+ st.session_state.dev_file_bytes = up.getvalue()
791
+ st.session_state.dev_file_name = up.name
792
+ st.session_state.dev_file_loaded = True
793
+ st.session_state.dev_preview = False
794
+ if st.session_state.dev_file_loaded:
795
+ tmp = read_book_bytes(st.session_state.dev_file_bytes)
796
+ if tmp:
797
+ df0 = next(iter(tmp.values()))
798
+ st.sidebar.caption(f"**Data loaded:** {st.session_state.dev_file_name} • {df0.shape[0]} rows × {df0.shape[1]} cols")
799
+
800
+ if st.sidebar.button("Preview data", use_container_width=True, disabled=not st.session_state.dev_file_loaded):
801
+ st.session_state.show_preview_modal = True
802
+ st.session_state.dev_preview = True
803
+
804
+ run = st.sidebar.button("Run Model", type="primary", use_container_width=True)
805
  if st.sidebar.button("Proceed to Validation ▶", use_container_width=True): st.session_state.app_step="validate"; st.rerun()
806
  if st.sidebar.button("Proceed to Prediction ▶", use_container_width=True): st.session_state.app_step="predict"; st.rerun()
807
 
808
+ if st.session_state.dev_file_loaded and st.session_state.dev_preview:
809
+ sticky_header("Case Building", "Previewed ✓ — now click **Run Model**.")
810
+ elif st.session_state.dev_file_loaded:
811
+ sticky_header("Case Building", "📄 **Preview uploaded data** using the sidebar button, then click **Run Model**.")
812
+ else:
813
+ sticky_header("Case Building", "**Upload your data to build a case, then run the model to review development performance.**")
 
 
 
 
 
 
 
 
 
 
814
 
815
+ if run and st.session_state.dev_file_bytes:
816
+ book = read_book_bytes(st.session_state.dev_file_bytes)
817
+ sh_train = find_sheet(book, ["Train","Training","training2","train","training"])
818
+ sh_test = find_sheet(book, ["Test","Testing","testing2","test","testing"])
819
  if sh_train is None or sh_test is None:
820
+ st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training/training2 and Test/Testing/testing2 sheets.</div>', unsafe_allow_html=True)
821
  st.stop()
822
 
823
+ # Normalize + ensure cols (robust depth handling)
824
+ tr = _prepare_table(book[sh_train].copy(), "Training", st.session_state["FEATURES"], st.session_state["TARGET_BO"], st.session_state["TARGET_BD"], st.session_state["CANON_DEPTH"])
825
+ te = _prepare_table(book[sh_test].copy(), "Testing", st.session_state["FEATURES"], st.session_state["TARGET_BO"], st.session_state["TARGET_BD"], st.session_state["CANON_DEPTH"])
826
+
827
+ # Predict with exact training feature order
828
+ Xtr = _make_X(tr, st.session_state["FEATURES"]) # only features
829
+ Xte = _make_X(te, st.session_state["FEATURES"])
 
 
 
 
 
 
 
 
 
830
  tr[PRED_BO] = model_bo.predict(Xtr)
831
  tr[PRED_BD] = model_bd.predict(Xtr)
832
  te[PRED_BO] = model_bo.predict(Xte)
833
  te[PRED_BD] = model_bd.predict(Xte)
834
 
835
+ st.session_state.results["Train"]=tr; st.session_state.results["Test"]=te
836
+ st.session_state.results["m_train_bo"]={"R": pearson_r(tr[st.session_state["TARGET_BO"]], tr[PRED_BO]), "RMSE": rmse(tr[st.session_state["TARGET_BO"]], tr[PRED_BO]), "MAE": mean_absolute_error(tr[st.session_state["TARGET_BO"]], tr[PRED_BO])}
837
+ st.session_state.results["m_train_bd"]={"R": pearson_r(tr[st.session_state["TARGET_BD"]], tr[PRED_BD]), "RMSE": rmse(tr[st.session_state["TARGET_BD"]], tr[PRED_BD]), "MAE": mean_absolute_error(tr[st.session_state["TARGET_BD"]], tr[PRED_BD])}
838
+ st.session_state.results["m_test_bo"] ={"R": pearson_r(te[st.session_state["TARGET_BO"]], te[PRED_BO]), "RMSE": rmse(te[st.session_state["TARGET_BO"]], te[PRED_BO]), "MAE": mean_absolute_error(te[st.session_state["TARGET_BO"]], te[PRED_BO])}
839
+ st.session_state.results["m_test_bd"] ={"R": pearson_r(te[st.session_state["TARGET_BD"]], te[PRED_BD]), "RMSE": rmse(te[st.session_state["TARGET_BD"]], te[PRED_BD]), "MAE": mean_absolute_error(te[st.session_state["TARGET_BD"]], te[PRED_BD])}
 
 
840
 
841
+ tr_min = tr[st.session_state["FEATURES"]].min().to_dict(); tr_max = tr[st.session_state["FEATURES"]].max().to_dict()
842
+ st.session_state.train_ranges = {f:(float(tr_min[f]), float(tr_max[f])) for f in st.session_state["FEATURES"]}
843
+ st.markdown('<div class="st-message-box st-success">Case has been built and results are displayed below.</div>', unsafe_allow_html=True)
 
844
 
845
+ def _metrics_block(lbl, m):
846
  c1,c2,c3 = st.columns(3)
847
+ c1.metric(f"R ({lbl})", f"{m['R']:.3f}")
848
+ c2.metric(f"RMSE ({lbl})", f"{m['RMSE']:.2f}")
849
+ c3.metric(f"MAE ({lbl})", f"{m['MAE']:.2f}")
850
+
851
+ def _dev_block(df, mbo, mbd):
852
+ _metrics_block("BO", mbo); _metrics_block("BD", mbd)
853
+ st.markdown("""
854
+ <div style='text-align:left;font-size:0.8em;color:#6b7280;margin-top:-16px;margin-bottom:8px;'>
855
+ <strong>R:</strong> Pearson correlation • <strong>RMSE</strong> / <strong>MAE</strong> in MW units (pcf)
856
+ </div>
857
+ """, unsafe_allow_html=True)
858
  t1, t2, t3 = st.tabs(["Breakout", "Breakdown", "Combined"])
859
  with t1:
860
+ st.plotly_chart(track_plot_single(df, PRED_BO, actual_col=st.session_state["TARGET_BO"], title_suffix="Breakout"), use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
861
+ st.pyplot(cross_plot_static(df[st.session_state["TARGET_BO"]], df[PRED_BO], xlabel=f"Actual {st.session_state['TARGET_BO']}", ylabel=f"Predicted {st.session_state['TARGET_BO']}", color=COLORS["pred_bo"]), use_container_width=False)
862
  with t2:
863
+ st.plotly_chart(track_plot_single(df, PRED_BD, actual_col=st.session_state["TARGET_BD"], title_suffix="Breakdown"), use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
864
+ st.pyplot(cross_plot_static(df[st.session_state["TARGET_BD"]], df[PRED_BD], xlabel=f"Actual {st.session_state['TARGET_BD']}", ylabel=f"Predicted {st.session_state['TARGET_BD']}", color=COLORS["pred_bd"]), use_container_width=False)
865
  with t3:
866
+ st.plotly_chart(track_plot_combined(df), use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
867
+
868
+ if "Train" in st.session_state.results or "Test" in st.session_state.results:
869
+ tab1, tab2 = st.tabs(["Training", "Testing"])
870
+ if "Train" in st.session_state.results:
871
+ with tab1:
872
+ _dev_block(st.session_state.results["Train"], st.session_state.results["m_train_bo"], st.session_state.results["m_train_bd"])
873
+ if "Test" in st.session_state.results:
874
+ with tab2:
875
+ _dev_block(st.session_state.results["Test"], st.session_state.results["m_test_bo"], st.session_state.results["m_test_bd"])
876
+ render_export_button(phase_key="dev")
 
 
 
 
 
 
 
 
 
 
877
 
878
  # =========================
879
+ # VALIDATION (with actual BO/BD)
880
  # =========================
881
  if st.session_state.app_step == "validate":
882
  st.sidebar.header("Validate the Models")
883
+ up = st.sidebar.file_uploader("Upload Validation Excel", type=["xlsx","xls"])
884
+ if up is not None:
885
+ book = read_book_bytes(up.getvalue())
886
+ if book:
887
+ df0 = next(iter(book.values()))
888
+ st.sidebar.caption(f"**Data loaded:** {up.name} • {df0.shape[0]} rows × {df0.shape[1]} cols")
889
+ if st.sidebar.button("Preview data", use_container_width=True, disabled=(up is None)):
890
+ st.session_state.show_preview_modal = True
891
+ go_btn = st.sidebar.button("Predict & Validate", type="primary", use_container_width=True)
892
  if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
893
  if st.sidebar.button("Proceed to Prediction ▶", use_container_width=True): st.session_state.app_step="predict"; st.rerun()
894
 
895
+ sticky_header("Validate the Models", "Upload a dataset with the same **features** and **BO/BD MW** to evaluate performance.")
896
+
897
+ if go_btn and up is not None:
898
+ book = read_book_bytes(up.getvalue())
899
+ name = find_sheet(book, ["Validation","Validate","validation2","Val","val"]) or list(book.keys())[0]
900
+ df = _prepare_table(book[name].copy(), "Validation", st.session_state["FEATURES"], st.session_state["TARGET_BO"], st.session_state["TARGET_BD"], st.session_state["CANON_DEPTH"])
901
+
902
+ df[PRED_BO] = model_bo.predict(_make_X(df, st.session_state["FEATURES"]))
903
+ df[PRED_BD] = model_bd.predict(_make_X(df, st.session_state["FEATURES"]))
904
+ st.session_state.results["Validate"]=df
905
+
906
+ ranges = st.session_state.train_ranges; oor_pct = 0.0; tbl=None
907
+ if ranges:
908
+ any_viol = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in st.session_state["FEATURES"]}).any(axis=1)
909
+ oor_pct = float(any_viol.mean()*100.0)
910
+ if any_viol.any():
911
+ tbl = df.loc[any_viol, st.session_state["FEATURES"]].copy()
912
+ for c in st.session_state["FEATURES"]:
913
+ if pd.api.types.is_numeric_dtype(tbl[c]): tbl[c] = tbl[c].round(2)
914
+ tbl["Violations"] = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in st.session_state["FEATURES"]}).loc[any_viol].apply(
915
+ lambda r:", ".join([c for c,v in r.items() if v]), axis=1
916
+ )
917
+
918
+ st.session_state.results["m_val_bo"]={"R": pearson_r(df[st.session_state["TARGET_BO"]], df[PRED_BO]), "RMSE": rmse(df[st.session_state["TARGET_BO"]], df[PRED_BO]), "MAE": mean_absolute_error(df[st.session_state["TARGET_BO"]], df[PRED_BO])}
919
+ st.session_state.results["m_val_bd"]={"R": pearson_r(df[st.session_state["TARGET_BD"]], df[PRED_BD]), "RMSE": rmse(df[st.session_state["TARGET_BD"]], df[PRED_BD]), "MAE": mean_absolute_error(df[st.session_state["TARGET_BD"]], df[PRED_BD])}
920
+ st.session_state.results["sv_val"]={"n":len(df), "bo_min":float(df[PRED_BO].min()), "bo_max":float(df[PRED_BO].max()),
921
+ "bd_min":float(df[PRED_BD].min()), "bd_max":float(df[PRED_BD].max()), "oor":oor_pct}
922
+ st.session_state.results["oor_tbl"]=tbl
923
+
924
+ if "Validate" in st.session_state.results:
925
+ df = st.session_state.results["Validate"]
926
+ m_bo, m_bd = st.session_state.results["m_val_bo"], st.session_state.results["m_val_bd"]
927
  c1,c2,c3 = st.columns(3)
928
  c1.metric("R (BO)", f"{m_bo['R']:.3f}"); c2.metric("RMSE (BO)", f"{m_bo['RMSE']:.2f}"); c3.metric("MAE (BO)", f"{m_bo['MAE']:.2f}")
929
  c1,c2,c3 = st.columns(3)
930
  c1.metric("R (BD)", f"{m_bd['R']:.3f}"); c2.metric("RMSE (BD)", f"{m_bd['RMSE']:.2f}"); c3.metric("MAE (BD)", f"{m_bd['MAE']:.2f}")
931
+ st.markdown("<div style='text-align:left;font-size:0.8em;color:#6b7280;margin-top:-16px;margin-bottom:8px;'>R = Pearson correlation</div>", unsafe_allow_html=True)
932
 
933
  t1, t2, t3 = st.tabs(["Breakout", "Breakdown", "Combined"])
934
  with t1:
935
+ st.plotly_chart(track_plot_single(df, PRED_BO, actual_col=st.session_state["TARGET_BO"], title_suffix="Breakout"),
936
+ use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
937
+ st.pyplot(cross_plot_static(df[st.session_state["TARGET_BO"]], df[PRED_BO], f"Actual {st.session_state['TARGET_BO']}", f"Predicted {st.session_state['TARGET_BO']}", COLORS["pred_bo"]),
938
+ use_container_width=False)
939
  with t2:
940
+ st.plotly_chart(track_plot_single(df, PRED_BD, actual_col=st.session_state["TARGET_BD"], title_suffix="Breakdown"),
941
+ use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
942
+ st.pyplot(cross_plot_static(df[st.session_state["TARGET_BD"]], df[PRED_BD], f"Actual {st.session_state['TARGET_BD']}", f"Predicted {st.session_state['TARGET_BD']}", COLORS["pred_bd"]),
943
+ use_container_width=False)
944
  with t3:
945
+ st.plotly_chart(track_plot_combined(df), use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
946
+
947
+ render_export_button(phase_key="validate")
948
+
949
+ sv = st.session_state.results["sv_val"]
950
+ if sv["oor"] > 0: st.markdown('<div class="st-message-box st-warning">Some inputs fall outside **training min–max** ranges.</div>', unsafe_allow_html=True)
951
+ if st.session_state.results["oor_tbl"] is not None:
952
+ st.write("*Out-of-range rows (vs. Training min–max):*"); df_centered_rounded(st.session_state.results["oor_tbl"])
953
 
954
  # =========================
955
  # PREDICTION (no actuals)
956
  # =========================
957
  if st.session_state.app_step == "predict":
958
  st.sidebar.header("Prediction (No Actual BO/BD)")
959
+ up = st.sidebar.file_uploader("Upload Prediction Excel", type=["xlsx","xls"])
960
+ if up is not None:
961
+ book = read_book_bytes(up.getvalue())
962
+ if book:
963
+ df0 = next(iter(book.values()))
964
+ st.sidebar.caption(f"**Data loaded:** {up.name} • {df0.shape[0]} rows × {df0.shape[1]} cols")
965
+ if st.sidebar.button("Preview data", use_container_width=True, disabled=(up is None)):
966
+ st.session_state.show_preview_modal = True
967
+ go_btn = st.sidebar.button("Predict", type="primary", use_container_width=True)
968
  if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
969
 
970
+ sticky_header("Prediction", "Upload a dataset with **feature columns only** (no BO/BD actuals).")
971
 
972
+ if go_btn and up is not None:
973
+ book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
974
+ df = _prepare_table(book[name].copy(), "Prediction", st.session_state["FEATURES"], st.session_state["TARGET_BO"], st.session_state["TARGET_BD"], st.session_state["CANON_DEPTH"])
975
+
976
+ df[PRED_BO] = model_bo.predict(_make_X(df, st.session_state["FEATURES"]))
977
+ df[PRED_BD] = model_bd.predict(_make_X(df, st.session_state["FEATURES"]))
978
+ st.session_state.results["PredictOnly"]=df
979
 
980
+ ranges = st.session_state.train_ranges; oor_pct = 0.0
981
+ if ranges:
982
+ any_viol = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in st.session_state["FEATURES"]}).any(axis=1)
983
+ oor_pct = float(any_viol.mean()*100.0)
984
+ st.session_state.results["sv_pred"]={
985
+ "n":len(df),
986
+ "bo_min":float(df[PRED_BO].min()), "bo_max":float(df[PRED_BO].max()),
987
+ "bd_min":float(df[PRED_BD].min()), "bd_max":float(df[PRED_BD].max()),
988
+ "bo_mean":float(df[PRED_BO].mean()), "bo_std":float(df[PRED_BO].std(ddof=0)),
989
+ "bd_mean":float(df[PRED_BD].mean()), "bd_std":float(df[PRED_BD].std(ddof=0)),
990
+ "oor":oor_pct
991
+ }
992
+
993
+ if "PredictOnly" in st.session_state.results:
994
+ df = st.session_state.results["PredictOnly"]; sv = st.session_state.results["sv_pred"]
995
 
996
  col_left, col_right = st.columns([2,3], gap="large")
997
  with col_left:
 
 
 
 
 
 
 
998
  table = pd.DataFrame({
999
+ "Metric": ["# points","BO min","BO max","BO mean","BO std","BD min","BD max","BD mean","BD std","OOR %"],
1000
  "Value": [sv["n"], round(sv["bo_min"],2), round(sv["bo_max"],2), round(sv["bo_mean"],2), round(sv["bo_std"],2),
1001
+ round(sv["bd_min"],2), round(sv["bd_max"],2), round(sv["bd_mean"],2), round(sv["bd_std"],2), f'{sv["oor"]:.1f}%']
1002
  })
1003
  st.markdown('<div class="st-message-box st-success">Predictions ready ✓</div>', unsafe_allow_html=True)
1004
  df_centered_rounded(table, hide_index=True)
1005
+ st.caption("**★ OOR** = % of rows whose input features fall outside the training min–max range.")
1006
  with col_right:
1007
  t1, t2 = st.tabs(["Breakout", "Breakdown"])
1008
  with t1:
1009
  st.plotly_chart(track_plot_single(df, PRED_BO, actual_col=None, title_suffix="Breakout"),
1010
+ use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
1011
  with t2:
1012
  st.plotly_chart(track_plot_single(df, PRED_BD, actual_col=None, title_suffix="Breakdown"),
1013
+ use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
1014
+ st.plotly_chart(track_plot_combined(df), use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
1015
+
1016
+ render_export_button(phase_key="predict")
1017
+
1018
+ # =========================
1019
+ # Preview modal
1020
+ # =========================
1021
+ if st.session_state.show_preview_modal:
1022
+ book_to_preview = {}
1023
+ if st.session_state.app_step == "dev":
1024
+ book_to_preview = read_book_bytes(st.session_state.dev_file_bytes)
1025
+ elif st.session_state.app_step in ["validate", "predict"] and 'up' in locals() and up is not None:
1026
+ book_to_preview = read_book_bytes(up.getvalue())
1027
+
1028
+ with st.expander("Preview data", expanded=True):
1029
+ if not book_to_preview:
1030
+ st.markdown('<div class="st-message-box">No data loaded yet.</div>', unsafe_allow_html=True)
1031
+ else:
1032
+ names = list(book_to_preview.keys())
1033
+ tabs = st.tabs(names)
1034
+ for t, name in zip(tabs, names):
1035
+ with t:
1036
+ df = _prepare_table(book_to_preview[name], "Preview", st.session_state["FEATURES"], st.session_state["TARGET_BO"], st.session_state["TARGET_BD"], st.session_state["CANON_DEPTH"])
1037
+ t1, t2 = st.tabs(["Tracks", "Summary"])
1038
+ with t1:
1039
+ st.pyplot(preview_tracks(df, st.session_state["FEATURES"]), use_container_width=True)
1040
+ with t2:
1041
+ feat_present = [c for c in st.session_state["FEATURES"] if c in df.columns]
1042
+ if not feat_present:
1043
+ st.info("No feature columns found to summarize.")
1044
+ else:
1045
+ tbl = (
1046
+ df[feat_present]
1047
+ .agg(['min','max','mean','std'])
1048
+ .T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"})
1049
+ .reset_index(names="Feature")
1050
+ )
1051
+ df_centered_rounded(tbl)
1052
+
1053
+ st.session_state.show_preview_modal = False
1054
 
1055
  # =========================
1056
  # Footer
 
1059
  <br><br><br>
1060
  <hr>
1061
  <div style='text-align:center;color:#6b7280;font-size:1.0em;'>
1062
+ © 2025 Smart Thinking AI-Solutions Team. All rights reserved.<br>
1063
+ Website: <a href="https://smartthinking.com.sa" target="_blank" rel="noopener noreferrer">smartthinking.com.sa</a>
1064
  </div>
1065
  """, unsafe_allow_html=True)