UCS2014 commited on
Commit
5bb7f71
·
verified ·
1 Parent(s): c5591aa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +330 -291
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py — ST_Sonic_Ts (Shear Slowness Ts)
2
 
3
  import io, json, os, base64, math
4
  from pathlib import Path
@@ -8,7 +8,7 @@ import numpy as np
8
  import joblib
9
  from datetime import datetime
10
 
11
- # Matplotlib (static plots)
12
  import matplotlib
13
  matplotlib.use("Agg")
14
  import matplotlib.pyplot as plt
@@ -18,13 +18,13 @@ import plotly.graph_objects as go
18
  from sklearn.metrics import mean_squared_error, mean_absolute_error
19
 
20
  # =========================
21
- # Constants (Ts variant)
22
  # =========================
23
- APP_NAME = "ST_Log_Sonic (Ts)"
24
- TAGLINE = "Real-Time Shear Slowness (Ts) Prediction"
25
 
26
- # Defaults (overridden by ts_meta.json if present)
27
- FEATURES = [
28
  "WOB (klbf)",
29
  "Torque (kft.lbf)",
30
  "SPP (psi)",
@@ -32,18 +32,31 @@ FEATURES = [
32
  "ROP (ft/h)",
33
  "Flow Rate (gpm)",
34
  ]
35
- TARGET = "Ts (us/ft_Actual)"
36
- PRED_COL = "Ts_Pred"
 
 
 
 
37
 
38
  MODELS_DIR = Path("models")
39
- DEFAULT_MODEL = MODELS_DIR / "ts_model.joblib"
 
 
 
 
40
  MODEL_FALLBACKS = [MODELS_DIR / "model.joblib", MODELS_DIR / "model.pkl"]
41
- COLORS = {"pred": "#1f77b4", "actual": "#f2b702", "ref": "#5a5a5a"}
 
 
 
 
 
 
42
 
43
- # Optional env banner from meta
44
  STRICT_VERSION_CHECK = False
45
 
46
- # ---- Plot sizing ----
47
  CROSS_W = 350
48
  CROSS_H = 350
49
  TRACK_H = 1000
@@ -58,11 +71,11 @@ st.set_page_config(page_title=APP_NAME, page_icon="logo.png", layout="wide")
58
  st.markdown("""
59
  <style>
60
  .brand-logo { width: 200px; height: auto; object-fit: contain; }
61
- .centered-container { display: flex; flex-direction: column; align-items: center; text-align: center; }
62
- .st-message-box { background-color: #f0f2f6; color: #333; padding: 10px; border-radius: 10px; border: 1px solid #e6e9ef; }
63
- .st-message-box.st-success { background-color: #d4edda; color: #155724; border-color: #c3e6cb; }
64
- .st-message-box.st-warning { background-color: #fff3cd; color: #856404; border-color: #ffeeba; }
65
- .st-message-box.st-error { background-color: #f8d7da; color: #721c24; border-color: #f5c6cb; }
66
  .main .block-container { overflow: unset !important; }
67
  div[data-testid="stVerticalBlock"] { overflow: unset !important; }
68
  div[data-testid="stExpander"] > details > summary {
@@ -75,8 +88,8 @@ st.markdown("""
75
  """, unsafe_allow_html=True)
76
 
77
  TABLE_CENTER_CSS = [
78
- dict(selector="th", props=[("text-align", "center")]),
79
- dict(selector="td", props=[("text-align", "center")]),
80
  ]
81
 
82
  # =========================
@@ -105,8 +118,8 @@ def add_password_gate() -> None:
105
 
106
  st.sidebar.markdown(f"""
107
  <div class="centered-container">
108
- <img src="{inline_logo('logo.png')}" style="width: 200px; height: auto; object-fit: contain;">
109
- <div style='font-weight:800;font-size:1.2rem; margin-top: 10px;'>{APP_NAME}</div>
110
  <div style='color:#667085;'>Smart Thinking • Secure Access</div>
111
  </div>
112
  """, unsafe_allow_html=True
@@ -148,12 +161,29 @@ def parse_excel(data_bytes: bytes):
148
  def read_book_bytes(b: bytes):
149
  return parse_excel(b) if b else {}
150
 
151
- # ---- Canonical feature aliasing ------------------------------------------
152
- def _build_alias_map(canonical_features: list[str], target_name: str) -> dict:
153
- """
154
- Map common header variants -> the *canonical* names in canonical_features.
155
- Whatever appears in canonical_features (from ts_meta.json) wins.
156
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  def pick(expected_list, variants):
158
  for v in variants:
159
  if v in expected_list:
@@ -165,36 +195,28 @@ def _build_alias_map(canonical_features: list[str], target_name: str) -> dict:
165
  can_SPP = pick(canonical_features, ["SPP (psi)", "SPP(psi)"])
166
  can_RPM = pick(canonical_features, ["RPM (1/min)", "RPM(1/min)"])
167
  can_ROP = pick(canonical_features, ["ROP (ft/h)", "ROP(ft/h)"])
168
- can_FR = pick(canonical_features, ["Flow Rate (gpm)", "Flow Rate, gpm", "Flow Rate,gpm", "Flow Rate , gpm", "Fow Rate, gpm", "Fow Rate, gpm "])
169
  can_DEPTH = "Depth (ft)"
170
 
171
  alias = {
172
- # Features
173
  "WOB (klbf)": can_WOB, "WOB, klbf": can_WOB, "WOB(klbf)": can_WOB, "WOB( klbf)": can_WOB,
174
  "Torque (kft.lbf)": can_TORQUE, "Torque(kft.lbf)": can_TORQUE, "TORQUE(kft.lbf)": can_TORQUE,
175
  "SPP (psi)": can_SPP, "SPP(psi)": can_SPP,
176
  "RPM (1/min)": can_RPM, "RPM(1/min)": can_RPM,
177
  "ROP (ft/h)": can_ROP, "ROP(ft/h)": can_ROP,
178
  "Flow Rate (gpm)": can_FR, "Flow Rate, gpm": can_FR, "Flow Rate,gpm": can_FR, "Flow Rate , gpm": can_FR,
179
- "Fow Rate, gpm": can_FR, "Fow Rate, gpm ": can_FR,
180
-
181
- # Depth (plot only)
182
  "Depth (ft)": can_DEPTH, "Depth, ft": can_DEPTH, "Depth(ft)": can_DEPTH, "DEPTH, ft": can_DEPTH,
183
 
184
- # Target family
185
- "Ts (us/ft_Actual)": target_name,
186
- "Ts,us/ft_Actual": target_name,
187
- "Ts, us/ft_Actual": target_name,
188
- "Ts": target_name,
189
- "TS_Actual": target_name,
190
- "Ts (us/ft)_Actual": target_name,
191
  }
192
  return alias
193
 
194
- def _normalize_columns(df: pd.DataFrame, canonical_features: list[str], target_name: str) -> pd.DataFrame:
195
  out = df.copy()
196
  out.columns = [str(c).strip().replace(" ,", ",").replace(", ", ", ").replace(" ", " ") for c in out.columns]
197
- alias = _build_alias_map(canonical_features, target_name)
198
  actual = {k: v for k, v in alias.items() if k in out.columns and k != v}
199
  return out.rename(columns=actual)
200
 
@@ -211,31 +233,6 @@ def find_sheet(book, names):
211
  if nm.lower() in low2orig: return low2orig[nm.lower()]
212
  return None
213
 
214
- def _nice_tick0(xmin: float, step: float = 0.1) -> float:
215
- return step * math.floor(xmin / step) if np.isfinite(xmin) else xmin
216
-
217
- def df_centered_rounded(df: pd.DataFrame, hide_index=True):
218
- out = df.copy()
219
- numcols = out.select_dtypes(include=[np.number]).columns
220
- styler = (
221
- out.style
222
- .format({c: "{:.2f}" for c in numcols})
223
- .set_properties(**{"text-align": "center"})
224
- .set_table_styles(TABLE_CENTER_CSS)
225
- )
226
- st.dataframe(styler, use_container_width=True, hide_index=hide_index)
227
-
228
- # ---------- Build X exactly as trained ----------
229
- def _make_X(df: pd.DataFrame, features: list[str]) -> pd.DataFrame:
230
- """
231
- Reindex columns to the exact training feature order and coerce to numeric.
232
- Prevents scikit-learn 'feature names should match' errors.
233
- """
234
- X = df.reindex(columns=features, copy=False)
235
- for c in X.columns:
236
- X[c] = pd.to_numeric(X[c], errors="coerce")
237
- return X
238
-
239
  # === Excel export helpers =================================================
240
  def _excel_engine() -> str:
241
  try:
@@ -258,8 +255,7 @@ def _round_numeric(df: pd.DataFrame, ndigits: int = 3) -> pd.DataFrame:
258
 
259
  def _summary_table(df: pd.DataFrame, cols: list[str]) -> pd.DataFrame:
260
  cols = [c for c in cols if c in df.columns]
261
- if not cols:
262
- return pd.DataFrame()
263
  tbl = (df[cols]
264
  .agg(['min','max','mean','std'])
265
  .T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"})
@@ -293,9 +289,9 @@ def _add_sheet(sheets: dict, order: list, name: str, df: pd.DataFrame, ndigits:
293
  def _available_sections() -> list[str]:
294
  res = st.session_state.get("results", {})
295
  sections = []
296
- if "Train" in res: sections += ["Training","Training_Metrics","Training_Summary"]
297
- if "Test" in res: sections += ["Testing","Testing_Metrics","Testing_Summary"]
298
- if "Validate" in res: sections += ["Validation","Validation_Metrics","Validation_Summary","Validation_OOR"]
299
  if "PredictOnly" in res: sections += ["Prediction","Prediction_Summary"]
300
  if st.session_state.get("train_ranges"): sections += ["Training_Ranges"]
301
  sections += ["Info"]
@@ -310,24 +306,30 @@ def build_export_workbook(selected: list[str], ndigits: int = 3, do_autofit: boo
310
 
311
  if "Training" in selected and "Train" in res:
312
  _add_sheet(sheets, order, "Training", res["Train"], ndigits)
313
- if "Training_Metrics" in selected and res.get("m_train"):
314
- _add_sheet(sheets, order, "Training_Metrics", pd.DataFrame([res["m_train"]]), ndigits)
 
 
315
  if "Training_Summary" in selected and "Train" in res:
316
- tr_cols = FEATURES + [c for c in [TARGET, PRED_COL] if c in res["Train"].columns]
317
  _add_sheet(sheets, order, "Training_Summary", _summary_table(res["Train"], tr_cols), ndigits)
318
 
319
  if "Testing" in selected and "Test" in res:
320
  _add_sheet(sheets, order, "Testing", res["Test"], ndigits)
321
- if "Testing_Metrics" in selected and res.get("m_test"):
322
- _add_sheet(sheets, order, "Testing_Metrics", pd.DataFrame([res["m_test"]]), ndigits)
 
 
323
  if "Testing_Summary" in selected and "Test" in res:
324
- te_cols = FEATURES + [c for c in [TARGET, PRED_COL] if c in res["Test"].columns]
325
  _add_sheet(sheets, order, "Testing_Summary", _summary_table(res["Test"], te_cols), ndigits)
326
 
327
  if "Validation" in selected and "Validate" in res:
328
  _add_sheet(sheets, order, "Validation", res["Validate"], ndigits)
329
- if "Validation_Metrics" in selected and res.get("m_val"):
330
- _add_sheet(sheets, order, "Validation_Metrics", pd.DataFrame([res["m_val"]]), ndigits)
 
 
331
  if "Validation_Summary" in selected and res.get("sv_val"):
332
  _add_sheet(sheets, order, "Validation_Summary", pd.DataFrame([res["sv_val"]]), ndigits)
333
  if "Validation_OOR" in selected and isinstance(res.get("oor_tbl"), pd.DataFrame) and not res["oor_tbl"].empty:
@@ -346,9 +348,9 @@ def build_export_workbook(selected: list[str], ndigits: int = 3, do_autofit: boo
346
  info = pd.DataFrame([
347
  {"Key": "AppName", "Value": APP_NAME},
348
  {"Key": "Tagline", "Value": TAGLINE},
349
- {"Key": "Target", "Value": TARGET},
350
- {"Key": "PredColumn", "Value": PRED_COL},
351
- {"Key": "Features", "Value": ", ".join(FEATURES)},
352
  {"Key": "ExportedAt", "Value": datetime.now().strftime("%Y-%m-%d %H:%M:%S")},
353
  ])
354
  _add_sheet(sheets, order, "Info", info, ndigits)
@@ -365,10 +367,9 @@ def build_export_workbook(selected: list[str], ndigits: int = 3, do_autofit: boo
365
  if do_autofit:
366
  _excel_autofit(writer, sheet, df)
367
  bio.seek(0)
368
- fname = f"TS_Export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
369
  return bio.getvalue(), fname, order
370
 
371
- # --------- SIMPLE export UI ----------
372
  def render_export_button(phase_key: str) -> None:
373
  res = st.session_state.get("results", {})
374
  if not res: return
@@ -381,7 +382,6 @@ def render_export_button(phase_key: str) -> None:
381
  options=options,
382
  default=[],
383
  placeholder="Choose option(s)",
384
- help="Pick the sheets you want to include in the Excel export.",
385
  key=f"sheets_{phase_key}",
386
  )
387
 
@@ -390,7 +390,7 @@ def render_export_button(phase_key: str) -> None:
390
  st.download_button(
391
  label="⬇️ Export Excel",
392
  data=b"",
393
- file_name="TS_Export.xlsx",
394
  mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
395
  disabled=True,
396
  key=f"download_{phase_key}",
@@ -403,16 +403,16 @@ def render_export_button(phase_key: str) -> None:
403
  st.download_button(
404
  "⬇️ Export Excel",
405
  data=(data or b""),
406
- file_name=(fname or "TS_Export.xlsx"),
407
  mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
408
  disabled=(data is None),
409
  key=f"download_{phase_key}",
410
  )
411
 
412
  # =========================
413
- # Cross plot (Matplotlib)
414
  # =========================
415
- def cross_plot_static(actual, pred, xlabel="Actual Ts (µs/ft)", ylabel="Predicted Ts (µs/ft)"):
416
  a = pd.Series(actual, dtype=float)
417
  p = pd.Series(pred, dtype=float)
418
 
@@ -420,204 +420,227 @@ def cross_plot_static(actual, pred, xlabel="Actual Ts (µs/ft)", ylabel="Predict
420
  hi = float(max(a.max(), p.max()))
421
  pad = 0.03 * (hi - lo if hi > lo else 1.0)
422
  lo2, hi2 = lo - pad, hi + pad
423
-
424
  ticks = np.linspace(lo2, hi2, 5)
425
 
426
  dpi = 110
427
- fig, ax = plt.subplots(figsize=(CROSS_W / dpi, CROSS_H / dpi), dpi=dpi, constrained_layout=False)
428
-
429
- ax.scatter(a, p, s=14, c=COLORS["pred"], alpha=0.9, linewidths=0)
430
  ax.plot([lo2, hi2], [lo2, hi2], linestyle="--", linewidth=1.2, color=COLORS["ref"])
431
 
432
- ax.set_xlim(lo2, hi2)
433
- ax.set_ylim(lo2, hi2)
434
  ax.set_xticks(ticks); ax.set_yticks(ticks)
435
  ax.set_aspect("equal", adjustable="box")
436
 
437
  fmt = FuncFormatter(lambda x, _: f"{x:.2f}")
438
  ax.xaxis.set_major_formatter(fmt); ax.yaxis.set_major_formatter(fmt)
439
-
440
  ax.set_xlabel(xlabel, fontweight="bold", fontsize=10, color="black")
441
  ax.set_ylabel(ylabel, fontweight="bold", fontsize=10, color="black")
442
  ax.tick_params(labelsize=6, colors="black")
443
-
444
  ax.grid(True, linestyle=":", alpha=0.3)
445
- for spine in ax.spines.values():
446
- spine.set_linewidth(1.1); spine.set_color("#444")
447
-
448
  fig.subplots_adjust(left=0.16, bottom=0.16, right=0.98, top=0.98)
449
  return fig
450
 
451
  # =========================
452
- # Track plot (Plotly)
453
  # =========================
454
- def track_plot(df, include_actual=True):
455
  depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
456
  if depth_col is not None:
457
- y = pd.Series(df[depth_col]).astype(float); ylab = depth_col
458
- y_range = [float(y.max()), float(y.min())]
459
  else:
460
  y = pd.Series(np.arange(1, len(df) + 1)); ylab = "Point Index"
461
- y_range = [float(y.max()), float(y.min())]
 
462
 
463
- x_series = pd.Series(df.get(PRED_COL, pd.Series(dtype=float))).astype(float)
464
- if include_actual and TARGET in df.columns:
465
- x_series = pd.concat([x_series, pd.Series(df[TARGET]).astype(float)], ignore_index=True)
466
  x_lo, x_hi = float(x_series.min()), float(x_series.max())
467
- x_pad = 0.03 * (x_hi - x_lo if x_hi > x_lo else 1.0)
468
- xmin, xmax = x_lo - x_pad, x_hi + x_pad
469
- tick0 = _nice_tick0(xmin, step=max((xmax - xmin) / 10.0, 0.1))
 
 
 
 
 
 
470
 
471
  fig = go.Figure()
472
- if PRED_COL in df.columns:
473
  fig.add_trace(go.Scatter(
474
- x=df[PRED_COL], y=y, mode="lines",
475
- line=dict(color=COLORS["pred"], width=1.8),
476
- name=PRED_COL,
477
- hovertemplate=f"{PRED_COL}: "+"%{x:.0f}<br>"+ylab+": %{y}<extra></extra>"
478
  ))
479
- if include_actual and TARGET in df.columns:
480
  fig.add_trace(go.Scatter(
481
- x=df[TARGET], y=y, mode="lines",
482
- line=dict(color=COLORS["actual"], width=2.0, dash="dot"),
483
- name=f"{TARGET} (actual)",
484
- hovertemplate=f"{TARGET}: "+"%{x:.0f}<br>"+ylab+": %{y}<extra></extra>"
 
485
  ))
486
 
487
  fig.update_layout(
488
- height=TRACK_H, width=TRACK_W, autosize=False,
489
- paper_bgcolor="#fff", plot_bgcolor="#fff",
490
  margin=dict(l=64, r=16, t=36, b=48), hovermode="closest",
491
  font=dict(size=FONT_SZ, color="#000"),
492
  legend=dict(x=0.98, y=0.05, xanchor="right", yanchor="bottom",
493
  bgcolor="rgba(255,255,255,0.75)", bordercolor="#ccc", borderwidth=1),
494
- legend_title_text=""
 
495
  )
496
  fig.update_xaxes(
497
- title_text="Ts (μs/ft)",
498
- title_font=dict(size=20, family=BOLD_FONT, color="#000"),
499
- tickfont=dict(size=15, family=BOLD_FONT, color="#000"),
500
- side="top", range=[xmin, xmax],
501
- ticks="outside", tickformat=",.0f", tickmode="auto", tick0=tick0,
502
  showline=True, linewidth=1.2, linecolor="#444", mirror=True,
503
  showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True
504
  )
505
  fig.update_yaxes(
506
- title_text=ylab,
507
- title_font=dict(size=20, family=BOLD_FONT, color="#000"),
508
  tickfont=dict(size=15, family=BOLD_FONT, color="#000"),
509
- range=y_range, ticks="outside",
510
- showline=True, linewidth=1.2, linecolor="#444", mirror=True,
511
  showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True
512
  )
513
  return fig
514
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
515
  # ---------- Preview (matplotlib) ----------
516
  def preview_tracks(df: pd.DataFrame, cols: list[str]):
517
- """
518
- Quick-look multi-track preview:
519
- - one subplot per selected column
520
- - distinct stable colors per column
521
- - shared & reversed Y-axis (Depth downwards)
522
- """
523
  cols = [c for c in cols if c in df.columns]
524
  n = len(cols)
525
  if n == 0:
526
  fig, ax = plt.subplots(figsize=(4, 2))
527
- ax.text(0.5, 0.5, "No selected columns", ha="center", va="center")
528
- ax.axis("off")
529
  return fig
530
 
531
- # Depth or fallback to index
532
  depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
533
  if depth_col is not None:
534
- idx = pd.to_numeric(df[depth_col], errors="coerce")
535
- y_label = depth_col
536
  else:
537
- idx = pd.Series(np.arange(1, len(df) + 1))
538
- y_label = "Point Index"
539
 
540
- y_min, y_max = float(idx.min()), float(idx.max())
541
-
542
- # Stable qualitative palette
543
  cmap = plt.get_cmap("tab20")
544
  col_colors = {col: cmap(i % cmap.N) for i, col in enumerate(cols)}
545
-
546
  fig, axes = plt.subplots(1, n, figsize=(2.3 * n, 7.0), sharey=True, dpi=100)
547
- if n == 1:
548
- axes = [axes]
549
-
550
  for i, (ax, col) in enumerate(zip(axes, cols)):
551
  x = pd.to_numeric(df[col], errors="coerce")
552
  ax.plot(x, idx, '-', lw=1.8, color=col_colors[col])
553
- ax.set_xlabel(col)
554
- ax.xaxis.set_label_position('top')
555
- ax.xaxis.tick_top()
556
- ax.set_ylim(y_max, y_min) # reversed Y (Depth down)
557
  ax.grid(True, linestyle=":", alpha=0.3)
558
-
559
- if i == 0:
560
- ax.set_ylabel(y_label)
561
- else:
562
- ax.tick_params(labelleft=False)
563
- ax.set_ylabel("")
564
-
565
  fig.tight_layout()
566
  return fig
567
 
568
  # =========================
569
- # Load model + meta
570
  # =========================
571
- def ensure_model() -> Path|None:
572
- for p in [DEFAULT_MODEL, *MODEL_FALLBACKS]:
573
- if p.exists() and p.stat().st_size > 0: return p
574
- url = os.environ.get("MODEL_URL", "")
575
- if not url: return None
576
- try:
577
- import requests
578
- DEFAULT_MODEL.parent.mkdir(parents=True, exist_ok=True)
579
- with requests.get(url, stream=True, timeout=30) as r:
580
- r.raise_for_status()
581
- with open(DEFAULT_MODEL, "wb") as f:
582
- for chunk in r.iter_content(1<<20):
583
- if chunk: f.write(chunk)
584
- return DEFAULT_MODEL
585
- except Exception:
586
- return None
587
 
588
- mpath = ensure_model()
589
- if not mpath:
590
- st.error("Model not found. Upload models/ts_model.joblib (or set MODEL_URL).")
591
- st.stop()
592
  try:
593
- model = load_model(str(mpath))
 
594
  except Exception as e:
595
- st.error(f"Failed to load model: {e}")
596
- st.stop()
597
 
598
- # Load meta (prefer Ts-specific)
599
- meta = {}
600
- meta_candidates = [MODELS_DIR / "ts_meta.json", MODELS_DIR / "meta.json", MODELS_DIR / "ym_meta.json"]
601
- meta_path = next((p for p in meta_candidates if p.exists()), None)
602
- if meta_path:
 
 
 
603
  try:
604
- meta = json.loads(meta_path.read_text(encoding="utf-8"))
605
- FEATURES = meta.get("features", FEATURES)
606
- TARGET = meta.get("target", TARGET)
607
- PRED_COL = meta.get("pred_col", PRED_COL)
608
- except Exception as e:
609
- st.warning(f"Could not parse meta file ({meta_path.name}): {e}")
610
-
611
- # Optional: version banner
612
- if STRICT_VERSION_CHECK and meta.get("versions"):
 
 
 
 
 
 
 
 
 
 
 
 
613
  import numpy as _np, sklearn as _skl
614
- mv = meta["versions"]; msg=[]
615
- if mv.get("numpy") and mv["numpy"] != _np.__version__:
616
- msg.append(f"NumPy {mv['numpy']} expected, running {_np.__version__}")
617
- if mv.get("scikit_learn") and mv["scikit_learn"] != _skl.__version__:
618
- msg.append(f"scikit-learn {mv['scikit_learn']} expected, running {_skl.__version__}")
619
- if msg:
620
- st.warning("Environment mismatch: " + " | ".join(msg))
 
621
 
622
  # =========================
623
  # Session state
@@ -636,7 +659,7 @@ st.session_state.setdefault("show_preview_modal", False)
636
  # =========================
637
  st.sidebar.markdown(f"""
638
  <div class="centered-container">
639
- <img src="{inline_logo('logo.png')}" style="width: 200px; height: auto; object-fit: contain;">
640
  <div style='font-weight:800;font-size:1.2rem;'>{APP_NAME}</div>
641
  <div style='color:#667085;'>{TAGLINE}</div>
642
  </div>
@@ -665,12 +688,13 @@ def sticky_header(title, message):
665
  # =========================
666
  if st.session_state.app_step == "intro":
667
  st.header("Welcome!")
668
- st.markdown("This software is developed by *Smart Thinking AI-Solutions Team* to estimate **Shear Slowness (Ts)** from drilling data.")
669
  st.subheader("How It Works")
670
  st.markdown(
671
- "1) **Upload your data to build the case and preview the model performance.** \n"
672
- "2) Click **Run Model** to compute metrics and plots. \n"
673
- "3) **Proceed to Validation** (with actual Ts) or **Proceed to Prediction** (no Ts)."
 
674
  )
675
  if st.button("Start Showcase", type="primary"):
676
  st.session_state.app_step = "dev"; st.rerun()
@@ -715,62 +739,67 @@ if st.session_state.app_step == "dev":
715
  st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training/training2 and Test/Testing/testing2 sheets.</div>', unsafe_allow_html=True)
716
  st.stop()
717
 
718
- tr = _normalize_columns(book[sh_train].copy(), FEATURES, TARGET)
719
- te = _normalize_columns(book[sh_test].copy(), FEATURES, TARGET)
720
-
721
- if not (ensure_cols(tr, FEATURES+[TARGET]) and ensure_cols(te, FEATURES+[TARGET])):
722
- st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True)
723
- st.stop()
724
 
725
- # Predict with exactly the training feature order
726
- tr[PRED_COL] = model.predict(_make_X(tr, FEATURES))
727
- te[PRED_COL] = model.predict(_make_X(te, FEATURES))
 
 
728
 
729
  st.session_state.results["Train"]=tr; st.session_state.results["Test"]=te
730
- st.session_state.results["m_train"]={
731
- "R": pearson_r(tr[TARGET], tr[PRED_COL]),
732
- "RMSE": rmse(tr[TARGET], tr[PRED_COL]),
733
- "MAE": mean_absolute_error(tr[TARGET], tr[PRED_COL])
734
- }
735
- st.session_state.results["m_test"]={
736
- "R": pearson_r(te[TARGET], te[PRED_COL]),
737
- "RMSE": rmse(te[TARGET], te[PRED_COL]),
738
- "MAE": mean_absolute_error(te[TARGET], te[PRED_COL])
739
- }
740
 
741
  tr_min = tr[FEATURES].min().to_dict(); tr_max = tr[FEATURES].max().to_dict()
742
  st.session_state.train_ranges = {f:(float(tr_min[f]), float(tr_max[f])) for f in FEATURES}
743
  st.markdown('<div class="st-message-box st-success">Case has been built and results are displayed below.</div>', unsafe_allow_html=True)
744
 
745
- def _dev_block(df, m):
746
  c1,c2,c3 = st.columns(3)
747
- c1.metric("R", f"{m['R']:.3f}"); c2.metric("RMSE", f"{m['RMSE']:.2f}"); c3.metric("MAE", f"{m['MAE']:.2f}")
 
 
 
 
 
748
  st.markdown("""
749
- <div style='text-align: left; font-size: 0.8em; color: #6b7280; margin-top: -16px; margin-bottom: 8px;'>
750
- <strong>R:</strong> Pearson Correlation Coefficient<br>
751
- <strong>RMSE:</strong> Root Mean Square Error<br>
752
- <strong>MAE:</strong> Mean Absolute Error
753
  </div>
754
  """, unsafe_allow_html=True)
755
- col_track, col_cross = st.columns([2, 3], gap="large")
756
- with col_track:
757
- st.plotly_chart(track_plot(df, include_actual=True), use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
758
- with col_cross:
759
- st.pyplot(cross_plot_static(df[TARGET], df[PRED_COL]), use_container_width=False)
 
 
 
 
760
 
761
  if "Train" in st.session_state.results or "Test" in st.session_state.results:
762
  tab1, tab2 = st.tabs(["Training", "Testing"])
763
  if "Train" in st.session_state.results:
764
- with tab1: _dev_block(st.session_state.results["Train"], st.session_state.results["m_train"])
 
765
  if "Test" in st.session_state.results:
766
- with tab2: _dev_block(st.session_state.results["Test"], st.session_state.results["m_test"])
 
767
  render_export_button(phase_key="dev")
768
 
769
  # =========================
770
- # VALIDATION (with actual Ts)
771
  # =========================
772
  if st.session_state.app_step == "validate":
773
- st.sidebar.header("Validate the Model")
774
  up = st.sidebar.file_uploader("Upload Validation Excel", type=["xlsx","xls"])
775
  if up is not None:
776
  book = read_book_bytes(up.getvalue())
@@ -783,15 +812,18 @@ if st.session_state.app_step == "validate":
783
  if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
784
  if st.sidebar.button("Proceed to Prediction ▶", use_container_width=True): st.session_state.app_step="predict"; st.rerun()
785
 
786
- sticky_header("Validate the Model", "Upload a dataset with the same **features** and **Ts** to evaluate performance.")
787
 
788
  if go_btn and up is not None:
789
  book = read_book_bytes(up.getvalue())
790
  name = find_sheet(book, ["Validation","Validate","validation2","Val","val"]) or list(book.keys())[0]
791
- df = _normalize_columns(book[name].copy(), FEATURES, TARGET)
792
- if not ensure_cols(df, FEATURES+[TARGET]):
 
793
  st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
794
- df[PRED_COL] = model.predict(_make_X(df, FEATURES))
 
 
795
  st.session_state.results["Validate"]=df
796
 
797
  ranges = st.session_state.train_ranges; oor_pct = 0.0; tbl=None
@@ -805,48 +837,48 @@ if st.session_state.app_step == "validate":
805
  tbl["Violations"] = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in FEATURES}).loc[any_viol].apply(
806
  lambda r:", ".join([c for c,v in r.items() if v]), axis=1
807
  )
808
- st.session_state.results["m_val"]={
809
- "R": pearson_r(df[TARGET], df[PRED_COL]),
810
- "RMSE": rmse(df[TARGET], df[PRED_COL]),
811
- "MAE": mean_absolute_error(df[TARGET], df[PRED_COL])
812
- }
813
- st.session_state.results["sv_val"]={"n":len(df), "pred_min":float(df[PRED_COL].min()), "pred_max":float(df[PRED_COL].max()), "oor":oor_pct}
814
  st.session_state.results["oor_tbl"]=tbl
815
 
816
  if "Validate" in st.session_state.results:
817
- m = st.session_state.results["m_val"]
 
818
  c1,c2,c3 = st.columns(3)
819
- c1.metric("R", f"{m['R']:.3f}"); c2.metric("RMSE", f"{m['RMSE']:.2f}"); c3.metric("MAE", f"{m['MAE']:.2f}")
820
- st.markdown("""
821
- <div style='text-align: left; font-size: 0.8em; color: #6b7280; margin-top: -16px; margin-bottom: 8px;'>
822
- <strong>R:</strong> Pearson Correlation Coefficient<br>
823
- <strong>RMSE:</strong> Root Mean Square Error<br>
824
- <strong>MAE:</strong> Mean Absolute Error
825
- </div>
826
- """, unsafe_allow_html=True)
827
 
828
- col_track, col_cross = st.columns([2, 3], gap="large")
829
- with col_track:
830
- st.plotly_chart(track_plot(st.session_state.results["Validate"], include_actual=True),
831
  use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
832
- with col_cross:
833
- st.pyplot(cross_plot_static(st.session_state.results["Validate"][TARGET],
834
- st.session_state.results["Validate"][PRED_COL]),
835
  use_container_width=False)
 
 
 
 
 
 
 
836
 
837
  render_export_button(phase_key="validate")
838
 
839
  sv = st.session_state.results["sv_val"]
840
  if sv["oor"] > 0: st.markdown('<div class="st-message-box st-warning">Some inputs fall outside **training min–max** ranges.</div>', unsafe_allow_html=True)
841
  if st.session_state.results["oor_tbl"] is not None:
842
- st.write("*Out-of-range rows (vs. Training min–max):*")
843
- df_centered_rounded(st.session_state.results["oor_tbl"])
844
 
845
  # =========================
846
- # PREDICTION (no actual Ts)
847
  # =========================
848
  if st.session_state.app_step == "predict":
849
- st.sidebar.header("Prediction (No Actual Ts)")
850
  up = st.sidebar.file_uploader("Upload Prediction Excel", type=["xlsx","xls"])
851
  if up is not None:
852
  book = read_book_bytes(up.getvalue())
@@ -858,14 +890,15 @@ if st.session_state.app_step == "predict":
858
  go_btn = st.sidebar.button("Predict", type="primary", use_container_width=True)
859
  if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
860
 
861
- sticky_header("Prediction", "Upload a dataset with the feature columns (no **Ts**).")
862
 
863
  if go_btn and up is not None:
864
  book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
865
- df = _normalize_columns(book[name].copy(), FEATURES, TARGET)
866
  if not ensure_cols(df, FEATURES):
867
  st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
868
- df[PRED_COL] = model.predict(_make_X(df, FEATURES))
 
869
  st.session_state.results["PredictOnly"]=df
870
 
871
  ranges = st.session_state.train_ranges; oor_pct = 0.0
@@ -874,10 +907,10 @@ if st.session_state.app_step == "predict":
874
  oor_pct = float(any_viol.mean()*100.0)
875
  st.session_state.results["sv_pred"]={
876
  "n":len(df),
877
- "pred_min":float(df[PRED_COL].min()),
878
- "pred_max":float(df[PRED_COL].max()),
879
- "pred_mean":float(df[PRED_COL].mean()),
880
- "pred_std":float(df[PRED_COL].std(ddof=0)),
881
  "oor":oor_pct
882
  }
883
 
@@ -887,16 +920,22 @@ if st.session_state.app_step == "predict":
887
  col_left, col_right = st.columns([2,3], gap="large")
888
  with col_left:
889
  table = pd.DataFrame({
890
- "Metric": ["# points","Pred min","Pred max","Pred mean","Pred std","OOR %"],
891
- "Value": [sv["n"], round(sv["pred_min"],3), round(sv["pred_max"],3),
892
- round(sv["pred_mean"],3), round(sv["pred_std"],3), f'{sv["oor"]:.1f}%']
893
  })
894
  st.markdown('<div class="st-message-box st-success">Predictions ready ✓</div>', unsafe_allow_html=True)
895
  df_centered_rounded(table, hide_index=True)
896
  st.caption("**★ OOR** = % of rows whose input features fall outside the training min–max range.")
897
  with col_right:
898
- st.plotly_chart(track_plot(df, include_actual=False),
899
- use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
 
 
 
 
 
 
900
 
901
  render_export_button(phase_key="predict")
902
 
@@ -918,7 +957,7 @@ if st.session_state.show_preview_modal:
918
  tabs = st.tabs(names)
919
  for t, name in zip(tabs, names):
920
  with t:
921
- df = _normalize_columns(book_to_preview[name], FEATURES, TARGET)
922
  t1, t2 = st.tabs(["Tracks", "Summary"])
923
  with t1:
924
  st.pyplot(preview_tracks(df, FEATURES), use_container_width=True)
 
1
+ # app.py — ST_GeoMech_SMW
2
 
3
  import io, json, os, base64, math
4
  from pathlib import Path
 
8
  import joblib
9
  from datetime import datetime
10
 
11
+ # Matplotlib (static)
12
  import matplotlib
13
  matplotlib.use("Agg")
14
  import matplotlib.pyplot as plt
 
18
  from sklearn.metrics import mean_squared_error, mean_absolute_error
19
 
20
  # =========================
21
+ # Constants / Config
22
  # =========================
23
+ APP_NAME = "ST_GeoMech_SMW"
24
+ TAGLINE = "Real-Time Upper/Lower Mud Weight (MW) Limits For Safe Drilling"
25
 
26
+ # Defaults (can be overridden by meta files)
27
+ FEATURES_DEFAULT = [
28
  "WOB (klbf)",
29
  "Torque (kft.lbf)",
30
  "SPP (psi)",
 
32
  "ROP (ft/h)",
33
  "Flow Rate (gpm)",
34
  ]
35
+
36
+ TARGET_BO_DEFAULT = "Breakout MW"
37
+ TARGET_BD_DEFAULT = "Breakdown MW"
38
+ PRED_BO = "BO_Pred"
39
+ PRED_BD = "BD_Pred"
40
+ X_UNITS = "MW (ppg)" # x-axis title for MW tracks/cross-plots
41
 
42
  MODELS_DIR = Path("models")
43
+ BO_MODEL_PATH = MODELS_DIR / "bo_model.joblib"
44
+ BD_MODEL_PATH = MODELS_DIR / "bd_model.joblib"
45
+ BO_META_PATH = MODELS_DIR / "bo_meta.json"
46
+ BD_META_PATH = MODELS_DIR / "bd_meta.json"
47
+
48
  MODEL_FALLBACKS = [MODELS_DIR / "model.joblib", MODELS_DIR / "model.pkl"]
49
+ COLORS = {
50
+ "pred_bo": "#1f77b4", # blue
51
+ "pred_bd": "#d62728", # red
52
+ "actual_bo": "#f2b702",# amber
53
+ "actual_bd": "#2ca02c",# green
54
+ "ref": "#5a5a5a"
55
+ }
56
 
 
57
  STRICT_VERSION_CHECK = False
58
 
59
+ # Plot sizing
60
  CROSS_W = 350
61
  CROSS_H = 350
62
  TRACK_H = 1000
 
71
  st.markdown("""
72
  <style>
73
  .brand-logo { width: 200px; height: auto; object-fit: contain; }
74
+ .centered-container { display:flex; flex-direction:column; align-items:center; text-align:center; }
75
+ .st-message-box { background:#f0f2f6; color:#333; padding:10px; border-radius:10px; border:1px solid #e6e9ef; }
76
+ .st-message-box.st-success { background:#d4edda; color:#155724; border-color:#c3e6cb; }
77
+ .st-message-box.st-warning { background:#fff3cd; color:#856404; border-color:#ffeeba; }
78
+ .st-message-box.st-error { background:#f8d7da; color:#721c24; border-color:#f5c6cb; }
79
  .main .block-container { overflow: unset !important; }
80
  div[data-testid="stVerticalBlock"] { overflow: unset !important; }
81
  div[data-testid="stExpander"] > details > summary {
 
88
  """, unsafe_allow_html=True)
89
 
90
  TABLE_CENTER_CSS = [
91
+ dict(selector="th", props=[("text-align","center")]),
92
+ dict(selector="td", props=[("text-align","center")]),
93
  ]
94
 
95
  # =========================
 
118
 
119
  st.sidebar.markdown(f"""
120
  <div class="centered-container">
121
+ <img src="{inline_logo('logo.png')}" class="brand-logo">
122
+ <div style='font-weight:800;font-size:1.2rem; margin-top:10px;'>{APP_NAME}</div>
123
  <div style='color:#667085;'>Smart Thinking • Secure Access</div>
124
  </div>
125
  """, unsafe_allow_html=True
 
161
  def read_book_bytes(b: bytes):
162
  return parse_excel(b) if b else {}
163
 
164
+ def _nice_tick0(xmin: float, step: float = 0.1) -> float:
165
+ return step * math.floor(xmin / step) if np.isfinite(xmin) else xmin
166
+
167
+ def df_centered_rounded(df: pd.DataFrame, hide_index=True, ndigits=2):
168
+ out = df.copy()
169
+ numcols = out.select_dtypes(include=[np.number]).columns
170
+ styler = (
171
+ out.style
172
+ .format({c: f"{{:.{ndigits}f}}" for c in numcols})
173
+ .set_properties(**{"text-align":"center"})
174
+ .set_table_styles(TABLE_CENTER_CSS)
175
+ )
176
+ st.dataframe(styler, use_container_width=True, hide_index=hide_index)
177
+
178
+ # ---------- Build X exactly as trained ----------
179
+ def _make_X(df: pd.DataFrame, features: list[str]) -> pd.DataFrame:
180
+ X = df.reindex(columns=features, copy=False)
181
+ for c in X.columns:
182
+ X[c] = pd.to_numeric(X[c], errors="coerce")
183
+ return X
184
+
185
+ # ---- Column name normalization (aliases) ----
186
+ def _build_alias_map(canonical_features: list[str], tgt_bo: str, tgt_bd: str) -> dict:
187
  def pick(expected_list, variants):
188
  for v in variants:
189
  if v in expected_list:
 
195
  can_SPP = pick(canonical_features, ["SPP (psi)", "SPP(psi)"])
196
  can_RPM = pick(canonical_features, ["RPM (1/min)", "RPM(1/min)"])
197
  can_ROP = pick(canonical_features, ["ROP (ft/h)", "ROP(ft/h)"])
198
+ can_FR = pick(canonical_features, ["Flow Rate (gpm)", "Flow Rate, gpm", "Flow Rate,gpm", "Flow Rate , gpm"])
199
  can_DEPTH = "Depth (ft)"
200
 
201
  alias = {
 
202
  "WOB (klbf)": can_WOB, "WOB, klbf": can_WOB, "WOB(klbf)": can_WOB, "WOB( klbf)": can_WOB,
203
  "Torque (kft.lbf)": can_TORQUE, "Torque(kft.lbf)": can_TORQUE, "TORQUE(kft.lbf)": can_TORQUE,
204
  "SPP (psi)": can_SPP, "SPP(psi)": can_SPP,
205
  "RPM (1/min)": can_RPM, "RPM(1/min)": can_RPM,
206
  "ROP (ft/h)": can_ROP, "ROP(ft/h)": can_ROP,
207
  "Flow Rate (gpm)": can_FR, "Flow Rate, gpm": can_FR, "Flow Rate,gpm": can_FR, "Flow Rate , gpm": can_FR,
 
 
 
208
  "Depth (ft)": can_DEPTH, "Depth, ft": can_DEPTH, "Depth(ft)": can_DEPTH, "DEPTH, ft": can_DEPTH,
209
 
210
+ # Targets aliases
211
+ "Breakout MW": tgt_bo, "BOMW": tgt_bo, "BO MW": tgt_bo,
212
+ "Breakdown MW": tgt_bd, "BDMW": tgt_bd, "BD MW": tgt_bd,
 
 
 
 
213
  }
214
  return alias
215
 
216
+ def _normalize_columns(df: pd.DataFrame, canonical_features: list[str], tgt_bo: str, tgt_bd: str) -> pd.DataFrame:
217
  out = df.copy()
218
  out.columns = [str(c).strip().replace(" ,", ",").replace(", ", ", ").replace(" ", " ") for c in out.columns]
219
+ alias = _build_alias_map(canonical_features, tgt_bo, tgt_bd)
220
  actual = {k: v for k, v in alias.items() if k in out.columns and k != v}
221
  return out.rename(columns=actual)
222
 
 
233
  if nm.lower() in low2orig: return low2orig[nm.lower()]
234
  return None
235
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
  # === Excel export helpers =================================================
237
  def _excel_engine() -> str:
238
  try:
 
255
 
256
  def _summary_table(df: pd.DataFrame, cols: list[str]) -> pd.DataFrame:
257
  cols = [c for c in cols if c in df.columns]
258
+ if not cols: return pd.DataFrame()
 
259
  tbl = (df[cols]
260
  .agg(['min','max','mean','std'])
261
  .T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"})
 
289
  def _available_sections() -> list[str]:
290
  res = st.session_state.get("results", {})
291
  sections = []
292
+ if "Train" in res: sections += ["Training","Training_Metrics_BO","Training_Metrics_BD","Training_Summary"]
293
+ if "Test" in res: sections += ["Testing","Testing_Metrics_BO","Testing_Metrics_BD","Testing_Summary"]
294
+ if "Validate" in res: sections += ["Validation","Validation_Metrics_BO","Validation_Metrics_BD","Validation_Summary","Validation_OOR"]
295
  if "PredictOnly" in res: sections += ["Prediction","Prediction_Summary"]
296
  if st.session_state.get("train_ranges"): sections += ["Training_Ranges"]
297
  sections += ["Info"]
 
306
 
307
  if "Training" in selected and "Train" in res:
308
  _add_sheet(sheets, order, "Training", res["Train"], ndigits)
309
+ if "Training_Metrics_BO" in selected and res.get("m_train_bo"):
310
+ _add_sheet(sheets, order, "Training_Metrics_BO", pd.DataFrame([res["m_train_bo"]]), ndigits)
311
+ if "Training_Metrics_BD" in selected and res.get("m_train_bd"):
312
+ _add_sheet(sheets, order, "Training_Metrics_BD", pd.DataFrame([res["m_train_bd"]]), ndigits)
313
  if "Training_Summary" in selected and "Train" in res:
314
+ tr_cols = st.session_state["FEATURES"] + [c for c in [st.session_state["TARGET_BO"], st.session_state["TARGET_BD"], PRED_BO, PRED_BD] if c in res["Train"].columns]
315
  _add_sheet(sheets, order, "Training_Summary", _summary_table(res["Train"], tr_cols), ndigits)
316
 
317
  if "Testing" in selected and "Test" in res:
318
  _add_sheet(sheets, order, "Testing", res["Test"], ndigits)
319
+ if "Testing_Metrics_BO" in selected and res.get("m_test_bo"):
320
+ _add_sheet(sheets, order, "Testing_Metrics_BO", pd.DataFrame([res["m_test_bo"]]), ndigits)
321
+ if "Testing_Metrics_BD" in selected and res.get("m_test_bd"):
322
+ _add_sheet(sheets, order, "Testing_Metrics_BD", pd.DataFrame([res["m_test_bd"]]), ndigits)
323
  if "Testing_Summary" in selected and "Test" in res:
324
+ te_cols = st.session_state["FEATURES"] + [c for c in [st.session_state["TARGET_BO"], st.session_state["TARGET_BD"], PRED_BO, PRED_BD] if c in res["Test"].columns]
325
  _add_sheet(sheets, order, "Testing_Summary", _summary_table(res["Test"], te_cols), ndigits)
326
 
327
  if "Validation" in selected and "Validate" in res:
328
  _add_sheet(sheets, order, "Validation", res["Validate"], ndigits)
329
+ if "Validation_Metrics_BO" in selected and res.get("m_val_bo"):
330
+ _add_sheet(sheets, order, "Validation_Metrics_BO", pd.DataFrame([res["m_val_bo"]]), ndigits)
331
+ if "Validation_Metrics_BD" in selected and res.get("m_val_bd"):
332
+ _add_sheet(sheets, order, "Validation_Metrics_BD", pd.DataFrame([res["m_val_bd"]]), ndigits)
333
  if "Validation_Summary" in selected and res.get("sv_val"):
334
  _add_sheet(sheets, order, "Validation_Summary", pd.DataFrame([res["sv_val"]]), ndigits)
335
  if "Validation_OOR" in selected and isinstance(res.get("oor_tbl"), pd.DataFrame) and not res["oor_tbl"].empty:
 
348
  info = pd.DataFrame([
349
  {"Key": "AppName", "Value": APP_NAME},
350
  {"Key": "Tagline", "Value": TAGLINE},
351
+ {"Key": "Targets", "Value": f'{st.session_state["TARGET_BO"]}, {st.session_state["TARGET_BD"]}'},
352
+ {"Key": "PredColumns","Value": f'{PRED_BO}, {PRED_BD}'},
353
+ {"Key": "Features", "Value": ", ".join(st.session_state["FEATURES"])},
354
  {"Key": "ExportedAt", "Value": datetime.now().strftime("%Y-%m-%d %H:%M:%S")},
355
  ])
356
  _add_sheet(sheets, order, "Info", info, ndigits)
 
367
  if do_autofit:
368
  _excel_autofit(writer, sheet, df)
369
  bio.seek(0)
370
+ fname = f"MW_Export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
371
  return bio.getvalue(), fname, order
372
 
 
373
  def render_export_button(phase_key: str) -> None:
374
  res = st.session_state.get("results", {})
375
  if not res: return
 
382
  options=options,
383
  default=[],
384
  placeholder="Choose option(s)",
 
385
  key=f"sheets_{phase_key}",
386
  )
387
 
 
390
  st.download_button(
391
  label="⬇️ Export Excel",
392
  data=b"",
393
+ file_name="MW_Export.xlsx",
394
  mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
395
  disabled=True,
396
  key=f"download_{phase_key}",
 
403
  st.download_button(
404
  "⬇️ Export Excel",
405
  data=(data or b""),
406
+ file_name=(fname or "MW_Export.xlsx"),
407
  mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
408
  disabled=(data is None),
409
  key=f"download_{phase_key}",
410
  )
411
 
412
  # =========================
413
+ # Cross plots (Matplotlib)
414
  # =========================
415
+ def cross_plot_static(actual, pred, xlabel, ylabel, color="#1f77b4"):
416
  a = pd.Series(actual, dtype=float)
417
  p = pd.Series(pred, dtype=float)
418
 
 
420
  hi = float(max(a.max(), p.max()))
421
  pad = 0.03 * (hi - lo if hi > lo else 1.0)
422
  lo2, hi2 = lo - pad, hi + pad
 
423
  ticks = np.linspace(lo2, hi2, 5)
424
 
425
  dpi = 110
426
+ fig, ax = plt.subplots(figsize=(CROSS_W/dpi, CROSS_H/dpi), dpi=dpi, constrained_layout=False)
427
+ ax.scatter(a, p, s=14, c=color, alpha=0.9, linewidths=0)
 
428
  ax.plot([lo2, hi2], [lo2, hi2], linestyle="--", linewidth=1.2, color=COLORS["ref"])
429
 
430
+ ax.set_xlim(lo2, hi2); ax.set_ylim(lo2, hi2)
 
431
  ax.set_xticks(ticks); ax.set_yticks(ticks)
432
  ax.set_aspect("equal", adjustable="box")
433
 
434
  fmt = FuncFormatter(lambda x, _: f"{x:.2f}")
435
  ax.xaxis.set_major_formatter(fmt); ax.yaxis.set_major_formatter(fmt)
 
436
  ax.set_xlabel(xlabel, fontweight="bold", fontsize=10, color="black")
437
  ax.set_ylabel(ylabel, fontweight="bold", fontsize=10, color="black")
438
  ax.tick_params(labelsize=6, colors="black")
 
439
  ax.grid(True, linestyle=":", alpha=0.3)
440
+ for s in ax.spines.values(): s.set_linewidth(1.1); s.set_color("#444")
 
 
441
  fig.subplots_adjust(left=0.16, bottom=0.16, right=0.98, top=0.98)
442
  return fig
443
 
444
  # =========================
445
+ # Track plots (Plotly)
446
  # =========================
447
+ def _depth_series(df):
448
  depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
449
  if depth_col is not None:
450
+ y = pd.to_numeric(df[depth_col], errors="coerce"); ylab = depth_col
451
+ rng = [float(y.max()), float(y.min())] # reversed
452
  else:
453
  y = pd.Series(np.arange(1, len(df) + 1)); ylab = "Point Index"
454
+ rng = [float(y.max()), float(y.min())]
455
+ return y, ylab, rng
456
 
457
+ def _x_range_for_tracks(df, cols):
458
+ x_series = pd.concat([pd.to_numeric(df[c], errors="coerce") for c in cols if c in df], ignore_index=True)
 
459
  x_lo, x_hi = float(x_series.min()), float(x_series.max())
460
+ pad = 0.03 * (x_hi - x_lo if x_hi > x_lo else 1.0)
461
+ xmin, xmax = x_lo - pad, x_hi + pad
462
+ tick0 = _nice_tick0(xmin, step=max((xmax - xmin)/10.0, 0.1))
463
+ return xmin, xmax, tick0
464
+
465
+ def track_plot_single(df, pred_col, actual_col=None, title_suffix=""):
466
+ y, ylab, y_range = _depth_series(df)
467
+ cols = [pred_col] + ([actual_col] if actual_col and actual_col in df.columns else [])
468
+ xmin, xmax, tick0 = _x_range_for_tracks(df, cols)
469
 
470
  fig = go.Figure()
471
+ if pred_col in df.columns:
472
  fig.add_trace(go.Scatter(
473
+ x=df[pred_col], y=y, mode="lines",
474
+ line=dict(color=COLORS["pred_bo"] if pred_col==PRED_BO else COLORS["pred_bd"], width=1.8),
475
+ name=pred_col,
476
+ hovertemplate=f"{pred_col}: "+"%{x:.2f}<br>"+ylab+": %{y}<extra></extra>"
477
  ))
478
+ if actual_col and actual_col in df.columns:
479
  fig.add_trace(go.Scatter(
480
+ x=df[actual_col], y=y, mode="lines",
481
+ line=dict(color=COLORS["actual_bo"] if actual_col==st.session_state["TARGET_BO"] else COLORS["actual_bd"],
482
+ width=2.0, dash="dot"),
483
+ name=f"{actual_col} (actual)",
484
+ hovertemplate=f"{actual_col}: "+"%{x:.2f}<br>"+ylab+": %{y}<extra></extra>"
485
  ))
486
 
487
  fig.update_layout(
488
+ height=TRACK_H, width=TRACK_W, autosize=False, paper_bgcolor="#fff", plot_bgcolor="#fff",
 
489
  margin=dict(l=64, r=16, t=36, b=48), hovermode="closest",
490
  font=dict(size=FONT_SZ, color="#000"),
491
  legend=dict(x=0.98, y=0.05, xanchor="right", yanchor="bottom",
492
  bgcolor="rgba(255,255,255,0.75)", bordercolor="#ccc", borderwidth=1),
493
+ legend_title_text="",
494
+ title=title_suffix
495
  )
496
  fig.update_xaxes(
497
+ title_text=X_UNITS, title_font=dict(size=20, family=BOLD_FONT, color="#000"),
498
+ tickfont=dict(size=15, family=BOLD_FONT, color="#000"), side="top",
499
+ range=[xmin, xmax], ticks="outside", tickformat=",.2f", tickmode="auto", tick0=tick0,
 
 
500
  showline=True, linewidth=1.2, linecolor="#444", mirror=True,
501
  showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True
502
  )
503
  fig.update_yaxes(
504
+ title_text=ylab, title_font=dict(size=20, family=BOLD_FONT, color="#000"),
 
505
  tickfont=dict(size=15, family=BOLD_FONT, color="#000"),
506
+ range=y_range, ticks="outside", showline=True, linewidth=1.2, linecolor="#444", mirror=True,
 
507
  showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True
508
  )
509
  return fig
510
 
511
+ def track_plot_combined(df):
512
+ """Overlay BO & BD predictions (+ actuals if present) on same depth axis."""
513
+ y, ylab, y_range = _depth_series(df)
514
+ cols = [c for c in [PRED_BO, PRED_BD, st.session_state["TARGET_BO"], st.session_state["TARGET_BD"]] if c in df]
515
+ xmin, xmax, tick0 = _x_range_for_tracks(df, cols)
516
+ fig = go.Figure()
517
+
518
+ if PRED_BO in df.columns:
519
+ fig.add_trace(go.Scatter(x=df[PRED_BO], y=y, mode="lines",
520
+ line=dict(color=COLORS["pred_bo"], width=1.8), name=PRED_BO,
521
+ hovertemplate=f"{PRED_BO}: "+"%{x:.2f}<br>"+ylab+": %{y}<extra></extra>"))
522
+ if st.session_state["TARGET_BO"] in df.columns:
523
+ col = st.session_state["TARGET_BO"]
524
+ fig.add_trace(go.Scatter(x=df[col], y=y, mode="lines",
525
+ line=dict(color=COLORS["actual_bo"], width=2.0, dash="dot"), name=f"{col} (actual)",
526
+ hovertemplate=f"{col}: "+"%{x:.2f}<br>"+ylab+": %{y}<extra></extra>"))
527
+
528
+ if PRED_BD in df.columns:
529
+ fig.add_trace(go.Scatter(x=df[PRED_BD], y=y, mode="lines",
530
+ line=dict(color=COLORS["pred_bd"], width=1.8), name=PRED_BD,
531
+ hovertemplate=f"{PRED_BD}: "+"%{x:.2f}<br>"+ylab+": %{y}<extra></extra>"))
532
+ if st.session_state["TARGET_BD"] in df.columns:
533
+ col = st.session_state["TARGET_BD"]
534
+ fig.add_trace(go.Scatter(x=df[col], y=y, mode="lines",
535
+ line=dict(color=COLORS["actual_bd"], width=2.0, dash="dot"), name=f"{col} (actual)",
536
+ hovertemplate=f"{col}: "+"%{x:.2f}<br>"+ylab+": %{y}<extra></extra>"))
537
+
538
+ fig.update_layout(
539
+ height=TRACK_H, width=TRACK_W, autosize=False, paper_bgcolor="#fff", plot_bgcolor="#fff",
540
+ margin=dict(l=64, r=16, t=36, b=48), hovermode="closest",
541
+ font=dict(size=FONT_SZ, color="#000"),
542
+ legend=dict(x=0.98, y=0.05, xanchor="right", yanchor="bottom",
543
+ bgcolor="rgba(255,255,255,0.75)", bordercolor="#ccc", borderwidth=1),
544
+ legend_title_text="", title="Combined (Breakout / Breakdown)"
545
+ )
546
+ fig.update_xaxes(title_text=X_UNITS, title_font=dict(size=20, family=BOLD_FONT, color="#000"),
547
+ tickfont=dict(size=15, family=BOLD_FONT, color="#000"),
548
+ side="top", range=[xmin, xmax], ticks="outside",
549
+ tickformat=",.2f", tickmode="auto", tick0=tick0,
550
+ showline=True, linewidth=1.2, linecolor="#444", mirror=True,
551
+ showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True)
552
+ fig.update_yaxes(title_text=ylab, title_font=dict(size=20, family=BOLD_FONT, color="#000"),
553
+ tickfont=dict(size=15, family=BOLD_FONT, color="#000"),
554
+ range=y_range, ticks="outside", showline=True, linewidth=1.2, linecolor="#444",
555
+ mirror=True, showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True)
556
+ return fig
557
+
558
  # ---------- Preview (matplotlib) ----------
559
  def preview_tracks(df: pd.DataFrame, cols: list[str]):
 
 
 
 
 
 
560
  cols = [c for c in cols if c in df.columns]
561
  n = len(cols)
562
  if n == 0:
563
  fig, ax = plt.subplots(figsize=(4, 2))
564
+ ax.text(0.5, 0.5, "No selected columns", ha="center", va="center"); ax.axis("off")
 
565
  return fig
566
 
 
567
  depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
568
  if depth_col is not None:
569
+ idx = pd.to_numeric(df[depth_col], errors="coerce"); y_label = depth_col
 
570
  else:
571
+ idx = pd.Series(np.arange(1, len(df) + 1)); y_label = "Point Index"
 
572
 
 
 
 
573
  cmap = plt.get_cmap("tab20")
574
  col_colors = {col: cmap(i % cmap.N) for i, col in enumerate(cols)}
 
575
  fig, axes = plt.subplots(1, n, figsize=(2.3 * n, 7.0), sharey=True, dpi=100)
576
+ if n == 1: axes = [axes]
 
 
577
  for i, (ax, col) in enumerate(zip(axes, cols)):
578
  x = pd.to_numeric(df[col], errors="coerce")
579
  ax.plot(x, idx, '-', lw=1.8, color=col_colors[col])
580
+ ax.set_xlabel(col); ax.xaxis.set_label_position('top'); ax.xaxis.tick_top()
581
+ ax.set_ylim(float(idx.max()), float(idx.min()))
 
 
582
  ax.grid(True, linestyle=":", alpha=0.3)
583
+ if i == 0: ax.set_ylabel(y_label)
584
+ else: ax.tick_params(labelleft=False); ax.set_ylabel("")
 
 
 
 
 
585
  fig.tight_layout()
586
  return fig
587
 
588
  # =========================
589
+ # Load models + metas
590
  # =========================
591
+ def _ensure_file(p: Path) -> Path|None:
592
+ return p if (p.exists() and p.stat().st_size > 0) else None
593
+
594
+ bo_model_path = _ensure_file(BO_MODEL_PATH)
595
+ bd_model_path = _ensure_file(BD_MODEL_PATH)
596
+ if not (bo_model_path and bd_model_path):
597
+ st.error("Models not found. Place bo_model.joblib and bd_model.joblib in models/"); st.stop()
 
 
 
 
 
 
 
 
 
598
 
 
 
 
 
599
  try:
600
+ model_bo = load_model(str(bo_model_path))
601
+ model_bd = load_model(str(bd_model_path))
602
  except Exception as e:
603
+ st.error(f"Failed to load models: {e}"); st.stop()
 
604
 
605
+ # Defaults
606
+ FEATURES = FEATURES_DEFAULT[:]
607
+ TARGET_BO = TARGET_BO_DEFAULT
608
+ TARGET_BD = TARGET_BD_DEFAULT
609
+
610
+ # Meta overrides
611
+ def _load_meta(p: Path):
612
+ if not p.exists(): return {}
613
  try:
614
+ return json.loads(p.read_text(encoding="utf-8"))
615
+ except Exception:
616
+ return {}
617
+
618
+ meta_bo = _load_meta(BO_META_PATH)
619
+ meta_bd = _load_meta(BD_META_PATH)
620
+
621
+ # Use BO meta as primary feature source (or BD if BO missing metas)
622
+ if meta_bo.get("features"): FEATURES = meta_bo["features"]
623
+ elif meta_bd.get("features"): FEATURES = meta_bd["features"]
624
+
625
+ if meta_bo.get("target"): TARGET_BO = meta_bo["target"]
626
+ if meta_bd.get("target"): TARGET_BD = meta_bd["target"]
627
+
628
+ # Session constants for easy access elsewhere
629
+ st.session_state["FEATURES"] = FEATURES
630
+ st.session_state["TARGET_BO"] = TARGET_BO
631
+ st.session_state["TARGET_BD"] = TARGET_BD
632
+
633
+ # Optional: strict version banner
634
+ if STRICT_VERSION_CHECK:
635
  import numpy as _np, sklearn as _skl
636
+ msgs=[]
637
+ for nm, meta in [("BO", meta_bo), ("BD", meta_bd)]:
638
+ v = meta.get("versions", {})
639
+ if v.get("numpy") and v["numpy"] != _np.__version__:
640
+ msgs.append(f"[{nm}] NumPy {v['numpy']} expected, running {_np.__version__}")
641
+ if v.get("scikit_learn") and v["scikit_learn"] != _skl.__version__:
642
+ msgs.append(f"[{nm}] scikit-learn {v['scikit_learn']} expected, running {_skl.__version__}")
643
+ if msgs: st.warning("Environment mismatch: " + " | ".join(msgs))
644
 
645
  # =========================
646
  # Session state
 
659
  # =========================
660
  st.sidebar.markdown(f"""
661
  <div class="centered-container">
662
+ <img src="{inline_logo('logo.png')}" class="brand-logo">
663
  <div style='font-weight:800;font-size:1.2rem;'>{APP_NAME}</div>
664
  <div style='color:#667085;'>{TAGLINE}</div>
665
  </div>
 
688
  # =========================
689
  if st.session_state.app_step == "intro":
690
  st.header("Welcome!")
691
+ st.markdown("This software is developed by *Smart Thinking AI-Solutions Team* to estimate **Breakout** and **Breakdown** mud weight limits from drilling data.")
692
  st.subheader("How It Works")
693
  st.markdown(
694
+ "1) **Upload your data** and preview.\n"
695
+ "2) Click **Run Model** to compute metrics and plots (Train/Test).\n"
696
+ "3) Proceed to **Validation** (with actual BO/BD) or **Prediction** (no actuals).\n"
697
+ "4) Use the **Combined** tab to see both limits on one track."
698
  )
699
  if st.button("Start Showcase", type="primary"):
700
  st.session_state.app_step = "dev"; st.rerun()
 
739
  st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training/training2 and Test/Testing/testing2 sheets.</div>', unsafe_allow_html=True)
740
  st.stop()
741
 
742
+ # Normalize + ensure cols
743
+ tr = _normalize_columns(book[sh_train].copy(), FEATURES, TARGET_BO, TARGET_BD)
744
+ te = _normalize_columns(book[sh_test].copy(), FEATURES, TARGET_BO, TARGET_BD)
745
+ need = FEATURES + [TARGET_BO, TARGET_BD]
746
+ if not (ensure_cols(tr, need) and ensure_cols(te, need)):
747
+ st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
748
 
749
+ # Predict with exact training feature order
750
+ tr[PRED_BO] = model_bo.predict(_make_X(tr, FEATURES))
751
+ tr[PRED_BD] = model_bd.predict(_make_X(tr, FEATURES))
752
+ te[PRED_BO] = model_bo.predict(_make_X(te, FEATURES))
753
+ te[PRED_BD] = model_bd.predict(_make_X(te, FEATURES))
754
 
755
  st.session_state.results["Train"]=tr; st.session_state.results["Test"]=te
756
+ st.session_state.results["m_train_bo"]={"R": pearson_r(tr[TARGET_BO], tr[PRED_BO]), "RMSE": rmse(tr[TARGET_BO], tr[PRED_BO]), "MAE": mean_absolute_error(tr[TARGET_BO], tr[PRED_BO])}
757
+ st.session_state.results["m_train_bd"]={"R": pearson_r(tr[TARGET_BD], tr[PRED_BD]), "RMSE": rmse(tr[TARGET_BD], tr[PRED_BD]), "MAE": mean_absolute_error(tr[TARGET_BD], tr[PRED_BD])}
758
+ st.session_state.results["m_test_bo"] ={"R": pearson_r(te[TARGET_BO], te[PRED_BO]), "RMSE": rmse(te[TARGET_BO], te[PRED_BO]), "MAE": mean_absolute_error(te[TARGET_BO], te[PRED_BO])}
759
+ st.session_state.results["m_test_bd"] ={"R": pearson_r(te[TARGET_BD], te[PRED_BD]), "RMSE": rmse(te[TARGET_BD], te[PRED_BD]), "MAE": mean_absolute_error(te[TARGET_BD], te[PRED_BD])}
 
 
 
 
 
 
760
 
761
  tr_min = tr[FEATURES].min().to_dict(); tr_max = tr[FEATURES].max().to_dict()
762
  st.session_state.train_ranges = {f:(float(tr_min[f]), float(tr_max[f])) for f in FEATURES}
763
  st.markdown('<div class="st-message-box st-success">Case has been built and results are displayed below.</div>', unsafe_allow_html=True)
764
 
765
+ def _metrics_block(lbl, m):
766
  c1,c2,c3 = st.columns(3)
767
+ c1.metric(f"R ({lbl})", f"{m['R']:.3f}")
768
+ c2.metric(f"RMSE ({lbl})", f"{m['RMSE']:.2f}")
769
+ c3.metric(f"MAE ({lbl})", f"{m['MAE']:.2f}")
770
+
771
+ def _dev_block(df, mbo, mbd):
772
+ _metrics_block("BO", mbo); _metrics_block("BD", mbd)
773
  st.markdown("""
774
+ <div style='text-align:left;font-size:0.8em;color:#6b7280;margin-top:-16px;margin-bottom:8px;'>
775
+ <strong>R:</strong> Pearson correlation <strong>RMSE</strong> / <strong>MAE</strong> in MW units
 
 
776
  </div>
777
  """, unsafe_allow_html=True)
778
+ t1, t2, t3 = st.tabs(["Breakout", "Breakdown", "Combined"])
779
+ with t1:
780
+ st.plotly_chart(track_plot_single(df, PRED_BO, actual_col=TARGET_BO, title_suffix="Breakout"), use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
781
+ st.pyplot(cross_plot_static(df[TARGET_BO], df[PRED_BO], xlabel=f"Actual {TARGET_BO}", ylabel=f"Predicted {TARGET_BO}", color=COLORS["pred_bo"]), use_container_width=False)
782
+ with t2:
783
+ st.plotly_chart(track_plot_single(df, PRED_BD, actual_col=TARGET_BD, title_suffix="Breakdown"), use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
784
+ st.pyplot(cross_plot_static(df[TARGET_BD], df[PRED_BD], xlabel=f"Actual {TARGET_BD}", ylabel=f"Predicted {TARGET_BD}", color=COLORS["pred_bd"]), use_container_width=False)
785
+ with t3:
786
+ st.plotly_chart(track_plot_combined(df), use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
787
 
788
  if "Train" in st.session_state.results or "Test" in st.session_state.results:
789
  tab1, tab2 = st.tabs(["Training", "Testing"])
790
  if "Train" in st.session_state.results:
791
+ with tab1:
792
+ _dev_block(st.session_state.results["Train"], st.session_state.results["m_train_bo"], st.session_state.results["m_train_bd"])
793
  if "Test" in st.session_state.results:
794
+ with tab2:
795
+ _dev_block(st.session_state.results["Test"], st.session_state.results["m_test_bo"], st.session_state.results["m_test_bd"])
796
  render_export_button(phase_key="dev")
797
 
798
  # =========================
799
+ # VALIDATION (with actual BO/BD)
800
  # =========================
801
  if st.session_state.app_step == "validate":
802
+ st.sidebar.header("Validate the Models")
803
  up = st.sidebar.file_uploader("Upload Validation Excel", type=["xlsx","xls"])
804
  if up is not None:
805
  book = read_book_bytes(up.getvalue())
 
812
  if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
813
  if st.sidebar.button("Proceed to Prediction ▶", use_container_width=True): st.session_state.app_step="predict"; st.rerun()
814
 
815
+ sticky_header("Validate the Models", "Upload a dataset with the same **features** and **BO/BD MW** to evaluate performance.")
816
 
817
  if go_btn and up is not None:
818
  book = read_book_bytes(up.getvalue())
819
  name = find_sheet(book, ["Validation","Validate","validation2","Val","val"]) or list(book.keys())[0]
820
+ df = _normalize_columns(book[name].copy(), FEATURES, TARGET_BO, TARGET_BD)
821
+ need = FEATURES + [TARGET_BO, TARGET_BD]
822
+ if not ensure_cols(df, need):
823
  st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
824
+
825
+ df[PRED_BO] = model_bo.predict(_make_X(df, FEATURES))
826
+ df[PRED_BD] = model_bd.predict(_make_X(df, FEATURES))
827
  st.session_state.results["Validate"]=df
828
 
829
  ranges = st.session_state.train_ranges; oor_pct = 0.0; tbl=None
 
837
  tbl["Violations"] = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in FEATURES}).loc[any_viol].apply(
838
  lambda r:", ".join([c for c,v in r.items() if v]), axis=1
839
  )
840
+
841
+ st.session_state.results["m_val_bo"]={"R": pearson_r(df[TARGET_BO], df[PRED_BO]), "RMSE": rmse(df[TARGET_BO], df[PRED_BO]), "MAE": mean_absolute_error(df[TARGET_BO], df[PRED_BO])}
842
+ st.session_state.results["m_val_bd"]={"R": pearson_r(df[TARGET_BD], df[PRED_BD]), "RMSE": rmse(df[TARGET_BD], df[PRED_BD]), "MAE": mean_absolute_error(df[TARGET_BD], df[PRED_BD])}
843
+ st.session_state.results["sv_val"]={"n":len(df), "bo_min":float(df[PRED_BO].min()), "bo_max":float(df[PRED_BO].max()),
844
+ "bd_min":float(df[PRED_BD].min()), "bd_max":float(df[PRED_BD].max()), "oor":oor_pct}
 
845
  st.session_state.results["oor_tbl"]=tbl
846
 
847
  if "Validate" in st.session_state.results:
848
+ df = st.session_state.results["Validate"]
849
+ m_bo, m_bd = st.session_state.results["m_val_bo"], st.session_state.results["m_val_bd"]
850
  c1,c2,c3 = st.columns(3)
851
+ c1.metric("R (BO)", f"{m_bo['R']:.3f}"); c2.metric("RMSE (BO)", f"{m_bo['RMSE']:.2f}"); c3.metric("MAE (BO)", f"{m_bo['MAE']:.2f}")
852
+ c1,c2,c3 = st.columns(3)
853
+ c1.metric("R (BD)", f"{m_bd['R']:.3f}"); c2.metric("RMSE (BD)", f"{m_bd['RMSE']:.2f}"); c3.metric("MAE (BD)", f"{m_bd['MAE']:.2f}")
854
+ st.markdown("<div style='text-align:left;font-size:0.8em;color:#6b7280;margin-top:-16px;margin-bottom:8px;'>R = Pearson correlation</div>", unsafe_allow_html=True)
 
 
 
 
855
 
856
+ t1, t2, t3 = st.tabs(["Breakout", "Breakdown", "Combined"])
857
+ with t1:
858
+ st.plotly_chart(track_plot_single(df, PRED_BO, actual_col=TARGET_BO, title_suffix="Breakout"),
859
  use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
860
+ st.pyplot(cross_plot_static(df[TARGET_BO], df[PRED_BO], f"Actual {TARGET_BO}", f"Predicted {TARGET_BO}", COLORS["pred_bo"]),
 
 
861
  use_container_width=False)
862
+ with t2:
863
+ st.plotly_chart(track_plot_single(df, PRED_BD, actual_col=TARGET_BD, title_suffix="Breakdown"),
864
+ use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
865
+ st.pyplot(cross_plot_static(df[TARGET_BD], df[PRED_BD], f"Actual {TARGET_BD}", f"Predicted {TARGET_BD}", COLORS["pred_bd"]),
866
+ use_container_width=False)
867
+ with t3:
868
+ st.plotly_chart(track_plot_combined(df), use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
869
 
870
  render_export_button(phase_key="validate")
871
 
872
  sv = st.session_state.results["sv_val"]
873
  if sv["oor"] > 0: st.markdown('<div class="st-message-box st-warning">Some inputs fall outside **training min–max** ranges.</div>', unsafe_allow_html=True)
874
  if st.session_state.results["oor_tbl"] is not None:
875
+ st.write("*Out-of-range rows (vs. Training min–max):*"); df_centered_rounded(st.session_state.results["oor_tbl"])
 
876
 
877
  # =========================
878
+ # PREDICTION (no actuals)
879
  # =========================
880
  if st.session_state.app_step == "predict":
881
+ st.sidebar.header("Prediction (No Actual BO/BD)")
882
  up = st.sidebar.file_uploader("Upload Prediction Excel", type=["xlsx","xls"])
883
  if up is not None:
884
  book = read_book_bytes(up.getvalue())
 
890
  go_btn = st.sidebar.button("Predict", type="primary", use_container_width=True)
891
  if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
892
 
893
+ sticky_header("Prediction", "Upload a dataset with **feature columns only** (no BO/BD actuals).")
894
 
895
  if go_btn and up is not None:
896
  book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
897
+ df = _normalize_columns(book[name].copy(), FEATURES, TARGET_BO, TARGET_BD)
898
  if not ensure_cols(df, FEATURES):
899
  st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
900
+ df[PRED_BO] = model_bo.predict(_make_X(df, FEATURES))
901
+ df[PRED_BD] = model_bd.predict(_make_X(df, FEATURES))
902
  st.session_state.results["PredictOnly"]=df
903
 
904
  ranges = st.session_state.train_ranges; oor_pct = 0.0
 
907
  oor_pct = float(any_viol.mean()*100.0)
908
  st.session_state.results["sv_pred"]={
909
  "n":len(df),
910
+ "bo_min":float(df[PRED_BO].min()), "bo_max":float(df[PRED_BO].max()),
911
+ "bd_min":float(df[PRED_BD].min()), "bd_max":float(df[PRED_BD].max()),
912
+ "bo_mean":float(df[PRED_BO].mean()), "bo_std":float(df[PRED_BO].std(ddof=0)),
913
+ "bd_mean":float(df[PRED_BD].mean()), "bd_std":float(df[PRED_BD].std(ddof=0)),
914
  "oor":oor_pct
915
  }
916
 
 
920
  col_left, col_right = st.columns([2,3], gap="large")
921
  with col_left:
922
  table = pd.DataFrame({
923
+ "Metric": ["# points","BO min","BO max","BO mean","BO std","BD min","BD max","BD mean","BD std","OOR %"],
924
+ "Value": [sv["n"], round(sv["bo_min"],2), round(sv["bo_max"],2), round(sv["bo_mean"],2), round(sv["bo_std"],2),
925
+ round(sv["bd_min"],2), round(sv["bd_max"],2), round(sv["bd_mean"],2), round(sv["bd_std"],2), f'{sv["oor"]:.1f}%']
926
  })
927
  st.markdown('<div class="st-message-box st-success">Predictions ready ✓</div>', unsafe_allow_html=True)
928
  df_centered_rounded(table, hide_index=True)
929
  st.caption("**★ OOR** = % of rows whose input features fall outside the training min–max range.")
930
  with col_right:
931
+ t1, t2 = st.tabs(["Breakout", "Breakdown"])
932
+ with t1:
933
+ st.plotly_chart(track_plot_single(df, PRED_BO, actual_col=None, title_suffix="Breakout"),
934
+ use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
935
+ with t2:
936
+ st.plotly_chart(track_plot_single(df, PRED_BD, actual_col=None, title_suffix="Breakdown"),
937
+ use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
938
+ st.plotly_chart(track_plot_combined(df), use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
939
 
940
  render_export_button(phase_key="predict")
941
 
 
957
  tabs = st.tabs(names)
958
  for t, name in zip(tabs, names):
959
  with t:
960
+ df = _normalize_columns(book_to_preview[name], FEATURES, TARGET_BO, TARGET_BD)
961
  t1, t2 = st.tabs(["Tracks", "Summary"])
962
  with t1:
963
  st.pyplot(preview_tracks(df, FEATURES), use_container_width=True)