UCS2014 commited on
Commit
b4ad42d
·
verified ·
1 Parent(s): 1500dd7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +134 -276
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py — ST_Sonic_Ts (mirrors your Ym GUI, adapted for Shear Slowness Ts)
2
  import io, json, os, base64, math
3
  from pathlib import Path
4
  import streamlit as st
@@ -7,7 +7,7 @@ import numpy as np
7
  import joblib
8
  from datetime import datetime
9
 
10
- # Matplotlib for PREVIEW modal and the CROSS-PLOT (static)
11
  import matplotlib
12
  matplotlib.use("Agg")
13
  import matplotlib.pyplot as plt
@@ -19,14 +19,12 @@ from sklearn.metrics import mean_squared_error, mean_absolute_error
19
  # =========================
20
  # Constants (Ts variant)
21
  # =========================
22
- APP_NAME = "ST_Log_Sonic(Ts)"
23
  TAGLINE = "Real-Time Shear Slowness (Ts) Prediction"
24
 
25
- # Keep your drilling features the same (we'll normalize header variants below)
26
  FEATURES = ["WOB(klbf)", "TORQUE(kft.lbf)", "SPP(psi)", "RPM(1/min)", "ROP(ft/h)", "Flow Rate, gpm"]
27
-
28
- # Target and pred column for Ts
29
- TARGET = "Ts" # we'll map your file's "Ts,us/ft_Actual" → "Ts"
30
  PRED_COL = "Ts_Pred"
31
 
32
  MODELS_DIR = Path("models")
@@ -34,8 +32,10 @@ DEFAULT_MODEL = MODELS_DIR / "ts_model.joblib"
34
  MODEL_FALLBACKS = [MODELS_DIR / "model.joblib", MODELS_DIR / "model.pkl"]
35
  COLORS = {"pred": "#1f77b4", "actual": "#f2b702", "ref": "#5a5a5a"}
36
 
 
 
37
 
38
- # ---- Plot sizing controls ----
39
  CROSS_W = 350
40
  CROSS_H = 350
41
  TRACK_H = 1000
@@ -47,7 +47,6 @@ BOLD_FONT = "Arial Black, Arial, sans-serif"
47
  # Page / CSS
48
  # =========================
49
  st.set_page_config(page_title=APP_NAME, page_icon="logo.png", layout="wide")
50
-
51
  st.markdown("""
52
  <style>
53
  .brand-logo { width: 200px; height: auto; object-fit: contain; }
@@ -55,55 +54,26 @@ st.markdown("""
55
  .sidebar-header .text h1 { font-size: 1.05rem; margin:0; line-height:1.1; }
56
  .sidebar-header .text .tag { font-size: .85rem; color:#6b7280; margin:2px 0 0; }
57
  .centered-container { display: flex; flex-direction: column; align-items: center; text-align: center; }
 
 
 
 
 
 
 
 
 
 
 
 
58
  </style>
59
  """, unsafe_allow_html=True)
60
 
61
- # Sticky helpers
62
- st.markdown("""
63
- <style>
64
- .main .block-container { overflow: unset !important; }
65
- div[data-testid="stVerticalBlock"] { overflow: unset !important; }
66
- </style>
67
- """, unsafe_allow_html=True)
68
-
69
- # Hide uploader helper text
70
- st.markdown("""
71
- <style>
72
- section[data-testid="stFileUploader"] div[data-testid="stMarkdownContainer"]{display:none !important;}
73
- section[data-testid="stFileUploader"] [data-testid="stFileUploaderDropzone"] > div:first-child{display:none !important;}
74
- section[data-testid="stFileUploader"] [data-testid="stFileUploaderInstructions"]{display:none !important;}
75
- section[data-testid="stFileUploader"] p, section[data-testid="stFileUploader"] small{display:none !important;}
76
- </style>
77
- """, unsafe_allow_html=True)
78
-
79
- # Make the Preview expander title & tabs sticky (pinned to the top)
80
- st.markdown("""
81
- <style>
82
- div[data-testid="stExpander"] > details > summary {
83
- position: sticky; top: 0; z-index: 10; background: #fff; border-bottom: 1px solid #eee;
84
- }
85
- div[data-testid="stExpander"] div[data-baseweb="tab-list"] {
86
- position: sticky; top: 42px; z-index: 9; background: #fff; padding-top: 6px;
87
- }
88
- </style>
89
- """, unsafe_allow_html=True)
90
-
91
- # Center text in all pandas Styler tables
92
  TABLE_CENTER_CSS = [
93
  dict(selector="th", props=[("text-align", "center")]),
94
  dict(selector="td", props=[("text-align", "center")]),
95
  ]
96
 
97
- # Message box CSS
98
- st.markdown("""
99
- <style>
100
- .st-message-box { background-color: #f0f2f6; color: #333; padding: 10px; border-radius: 10px; border: 1px solid #e6e9ef; }
101
- .st-message-box.st-success { background-color: #d4edda; color: #155724; border-color: #c3e6cb; }
102
- .st-message-box.st-warning { background-color: #fff3cd; color: #856404; border-color: #ffeeba; }
103
- .st-message-box.st-error { background-color: #f8d7da; color: #721c24; border-color: #f5c6cb; }
104
- </style>
105
- """, unsafe_allow_html=True)
106
-
107
  # =========================
108
  # Password gate
109
  # =========================
@@ -157,7 +127,6 @@ def pearson_r(y_true, y_pred) -> float:
157
  a = np.asarray(y_true, dtype=float)
158
  p = np.asarray(y_pred, dtype=float)
159
  if a.size < 2: return float("nan")
160
- # Guard constant series
161
  if np.all(a == a[0]) or np.all(p == p[0]): return float("nan")
162
  return float(np.corrcoef(a, p)[0, 1])
163
 
@@ -171,17 +140,57 @@ def parse_excel(data_bytes: bytes):
171
  xl = pd.ExcelFile(bio)
172
  return {sh: xl.parse(sh) for sh in xl.sheet_names}
173
 
174
- def read_book_bytes(b: bytes): return parse_excel(b) if b else {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
 
176
- def _normalize_columns(df: pd.DataFrame) -> pd.DataFrame:
177
  out = df.copy()
178
- out.columns = [c.strip() for c in out.columns]
179
- # Fix flow-rate typo variants
180
- out = out.rename(columns={
181
- "Fow Rate, gpm": "Flow Rate, gpm",
182
- "Fow Rate, gpm ": "Flow Rate, gpm"
183
- })
184
- return out
185
 
186
  def ensure_cols(df: pd.DataFrame, cols: list[str]) -> bool:
187
  miss = [c for c in cols if c not in df.columns]
@@ -197,7 +206,6 @@ def find_sheet(book, names):
197
  return None
198
 
199
  def _nice_tick0(xmin: float, step: float = 0.1) -> float:
200
- # Rounded start tick for continuous Ts scales (unit-agnostic)
201
  return step * math.floor(xmin / step) if np.isfinite(xmin) else xmin
202
 
203
  def df_centered_rounded(df: pd.DataFrame, hide_index=True):
@@ -219,51 +227,6 @@ def _excel_engine() -> str:
219
  except Exception:
220
  return "openpyxl"
221
 
222
- def _normalize_columns(df: pd.DataFrame) -> pd.DataFrame:
223
- out = df.copy()
224
-
225
- # Trim and collapse inner spaces like ", " → ", " consistently
226
- out.columns = [str(c).strip().replace(" ,", ",").replace(", ", ", ").replace(" ", " ") for c in out.columns]
227
-
228
- # Canonical rename map: map what you have → what the app expects
229
- rename_map = {
230
- # Drilling features
231
- "WOB, klbf": "WOB(klbf)",
232
- "WOB (klbf)": "WOB(klbf)",
233
- "WOB( klbf)": "WOB(klbf)",
234
- "WOB(klbf)": "WOB(klbf)",
235
-
236
- "Torque(kft.lbf)": "TORQUE(kft.lbf)",
237
- "TORQUE(kft.lbf)": "TORQUE(kft.lbf)",
238
-
239
- "SPP(psi)": "SPP(psi)",
240
- "RPM(1/min)": "RPM(1/min)",
241
- "ROP(ft/h)": "ROP(ft/h)",
242
- "Flow Rate, gpm": "Flow Rate, gpm",
243
-
244
- # Target column variants coming from your file
245
- "Ts,us/ft_Actual": "Ts",
246
- "Ts, us/ft_Actual": "Ts",
247
- "TS_Actual": "Ts",
248
- "Ts (us/ft)_Actual":"Ts",
249
- }
250
-
251
- # Also keep Depth variants tidy (used for Y axis only)
252
- depth_variants = {
253
- "Depth, ft": "Depth, ft",
254
- "Depth(ft)": "Depth, ft",
255
- "DEPTH, ft": "Depth, ft",
256
- }
257
-
258
- # Build final mapping
259
- final_map = {**depth_variants}
260
- # Only add keys that actually exist to avoid KeyErrors
261
- final_map.update({k: v for k, v in rename_map.items() if k in out.columns})
262
-
263
- out = out.rename(columns=final_map)
264
- return out
265
-
266
-
267
  def _excel_safe_name(name: str) -> str:
268
  bad = '[]:*?/\\'
269
  safe = ''.join('_' if ch in bad else ch for ch in str(name))
@@ -294,7 +257,6 @@ def _train_ranges_df(ranges: dict[str, tuple[float, float]]) -> pd.DataFrame:
294
  return _round_numeric(df, 3)
295
 
296
  def _excel_autofit(writer, sheet_name: str, df: pd.DataFrame, min_w: int = 8, max_w: int = 40):
297
- """Auto-fit columns when using xlsxwriter."""
298
  try:
299
  import xlsxwriter # noqa: F401
300
  except Exception:
@@ -307,13 +269,11 @@ def _excel_autofit(writer, sheet_name: str, df: pd.DataFrame, min_w: int = 8, ma
307
  ws.freeze_panes(1, 0)
308
 
309
  def _add_sheet(sheets: dict, order: list, name: str, df: pd.DataFrame, ndigits: int):
310
- if df is None or df.empty:
311
- return
312
  sheets[name] = _round_numeric(df, ndigits)
313
  order.append(name)
314
 
315
  def _available_sections() -> list[str]:
316
- """Compute which sections exist (offered in the export dropdown)."""
317
  res = st.session_state.get("results", {})
318
  sections = []
319
  if "Train" in res: sections += ["Training","Training_Metrics","Training_Summary"]
@@ -325,15 +285,12 @@ def _available_sections() -> list[str]:
325
  return sections
326
 
327
  def build_export_workbook(selected: list[str], ndigits: int = 3, do_autofit: bool = True) -> tuple[bytes|None, str|None, list[str]]:
328
- """Builds an in-memory Excel workbook for selected sheets; fixed rounding to 3 decimals."""
329
  res = st.session_state.get("results", {})
330
- if not res:
331
- return None, None, []
332
 
333
  sheets: dict[str, pd.DataFrame] = {}
334
  order: list[str] = []
335
 
336
- # Training
337
  if "Training" in selected and "Train" in res:
338
  _add_sheet(sheets, order, "Training", res["Train"], ndigits)
339
  if "Training_Metrics" in selected and res.get("m_train"):
@@ -342,7 +299,6 @@ def build_export_workbook(selected: list[str], ndigits: int = 3, do_autofit: boo
342
  tr_cols = FEATURES + [c for c in [TARGET, PRED_COL] if c in res["Train"].columns]
343
  _add_sheet(sheets, order, "Training_Summary", _summary_table(res["Train"], tr_cols), ndigits)
344
 
345
- # Testing
346
  if "Testing" in selected and "Test" in res:
347
  _add_sheet(sheets, order, "Testing", res["Test"], ndigits)
348
  if "Testing_Metrics" in selected and res.get("m_test"):
@@ -351,7 +307,6 @@ def build_export_workbook(selected: list[str], ndigits: int = 3, do_autofit: boo
351
  te_cols = FEATURES + [c for c in [TARGET, PRED_COL] if c in res["Test"].columns]
352
  _add_sheet(sheets, order, "Testing_Summary", _summary_table(res["Test"], te_cols), ndigits)
353
 
354
- # Validation
355
  if "Validation" in selected and "Validate" in res:
356
  _add_sheet(sheets, order, "Validation", res["Validate"], ndigits)
357
  if "Validation_Metrics" in selected and res.get("m_val"):
@@ -361,18 +316,15 @@ def build_export_workbook(selected: list[str], ndigits: int = 3, do_autofit: boo
361
  if "Validation_OOR" in selected and isinstance(res.get("oor_tbl"), pd.DataFrame) and not res["oor_tbl"].empty:
362
  _add_sheet(sheets, order, "Validation_OOR", res["oor_tbl"].reset_index(drop=True), ndigits)
363
 
364
- # Prediction
365
  if "Prediction" in selected and "PredictOnly" in res:
366
  _add_sheet(sheets, order, "Prediction", res["PredictOnly"], ndigits)
367
  if "Prediction_Summary" in selected and res.get("sv_pred"):
368
  _add_sheet(sheets, order, "Prediction_Summary", pd.DataFrame([res["sv_pred"]]), ndigits)
369
 
370
- # Training ranges
371
  if "Training_Ranges" in selected and st.session_state.get("train_ranges"):
372
  rr = _train_ranges_df(st.session_state["train_ranges"])
373
  _add_sheet(sheets, order, "Training_Ranges", rr, ndigits)
374
 
375
- # Info
376
  if "Info" in selected:
377
  info = pd.DataFrame([
378
  {"Key": "AppName", "Value": APP_NAME},
@@ -384,8 +336,7 @@ def build_export_workbook(selected: list[str], ndigits: int = 3, do_autofit: boo
384
  ])
385
  _add_sheet(sheets, order, "Info", info, ndigits)
386
 
387
- if not order:
388
- return None, None, []
389
 
390
  bio = io.BytesIO()
391
  engine = _excel_engine()
@@ -397,24 +348,17 @@ def build_export_workbook(selected: list[str], ndigits: int = 3, do_autofit: boo
397
  if do_autofit:
398
  _excel_autofit(writer, sheet, df)
399
  bio.seek(0)
400
-
401
  fname = f"TS_Export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
402
  return bio.getvalue(), fname, order
403
 
404
- # --------- SIMPLE export UI (dropdown checklist, starts empty) ----------
405
  def render_export_button(phase_key: str) -> None:
406
- """
407
- Export UI — one multiselect dropdown that starts EMPTY.
408
- The download button is disabled until at least one sheet is selected.
409
- """
410
  res = st.session_state.get("results", {})
411
- if not res:
412
- return
413
-
414
  st.divider()
415
  st.markdown("### Export to Excel")
416
 
417
- options = _available_sections() # only what exists right now
418
  selected_sheets = st.multiselect(
419
  "Sheets to include",
420
  options=options,
@@ -439,7 +383,6 @@ def render_export_button(phase_key: str) -> None:
439
  data, fname, names = build_export_workbook(selected=selected_sheets, ndigits=3, do_autofit=True)
440
  if names:
441
  st.caption("Will include: " + ", ".join(names))
442
-
443
  st.download_button(
444
  "⬇️ Export Excel",
445
  data=(data or b""),
@@ -450,7 +393,7 @@ def render_export_button(phase_key: str) -> None:
450
  )
451
 
452
  # =========================
453
- # Cross plot (Matplotlib) — auto-scaled for Ts
454
  # =========================
455
  def cross_plot_static(actual, pred, xlabel="Actual Ts (µs/ft)", ylabel="Predicted Ts (µs/ft)"):
456
  a = pd.Series(actual, dtype=float)
@@ -471,14 +414,11 @@ def cross_plot_static(actual, pred, xlabel="Actual Ts (µs/ft)", ylabel="Predict
471
 
472
  ax.set_xlim(lo2, hi2)
473
  ax.set_ylim(lo2, hi2)
474
- ax.set_xticks(ticks)
475
- ax.set_yticks(ticks)
476
  ax.set_aspect("equal", adjustable="box")
477
 
478
- # Generic numeric formatting (2 decimals) in plot; export uses 3 decimals
479
  fmt = FuncFormatter(lambda x, _: f"{x:.2f}")
480
- ax.xaxis.set_major_formatter(fmt)
481
- ax.yaxis.set_major_formatter(fmt)
482
 
483
  ax.set_xlabel(xlabel, fontweight="bold", fontsize=10, color="black")
484
  ax.set_ylabel(ylabel, fontweight="bold", fontsize=10, color="black")
@@ -486,8 +426,7 @@ def cross_plot_static(actual, pred, xlabel="Actual Ts (µs/ft)", ylabel="Predict
486
 
487
  ax.grid(True, linestyle=":", alpha=0.3)
488
  for spine in ax.spines.values():
489
- spine.set_linewidth(1.1)
490
- spine.set_color("#444")
491
 
492
  fig.subplots_adjust(left=0.16, bottom=0.16, right=0.98, top=0.98)
493
  return fig
@@ -496,18 +435,14 @@ def cross_plot_static(actual, pred, xlabel="Actual Ts (µs/ft)", ylabel="Predict
496
  # Track plot (Plotly)
497
  # =========================
498
  def track_plot(df, include_actual=True):
499
- # Depth (or index) on Y
500
  depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
501
  if depth_col is not None:
502
- y = pd.Series(df[depth_col]).astype(float)
503
- ylab = depth_col
504
- y_range = [float(y.max()), float(y.min())] # reverse
505
  else:
506
- y = pd.Series(np.arange(1, len(df) + 1))
507
- ylab = "Point Index"
508
  y_range = [float(y.max()), float(y.min())]
509
 
510
- # X range from prediction/actual
511
  x_series = pd.Series(df.get(PRED_COL, pd.Series(dtype=float))).astype(float)
512
  if include_actual and TARGET in df.columns:
513
  x_series = pd.concat([x_series, pd.Series(df[TARGET]).astype(float)], ignore_index=True)
@@ -533,9 +468,7 @@ def track_plot(df, include_actual=True):
533
  ))
534
 
535
  fig.update_layout(
536
- height=TRACK_H,
537
- width=TRACK_W,
538
- autosize=False,
539
  paper_bgcolor="#fff", plot_bgcolor="#fff",
540
  margin=dict(l=64, r=16, t=36, b=48), hovermode="closest",
541
  font=dict(size=FONT_SZ, color="#000"),
@@ -543,34 +476,26 @@ def track_plot(df, include_actual=True):
543
  bgcolor="rgba(255,255,255,0.75)", bordercolor="#ccc", borderwidth=1),
544
  legend_title_text=""
545
  )
546
-
547
- # X axis with NO decimals (Ts is in µs/ft; typically integer-like)
548
  fig.update_xaxes(
549
- title_text="Ts (μs/ft)",
550
- title_font=dict(size=20, family=BOLD_FONT, color="#000"),
551
- tickfont=dict(size=15, family=BOLD_FONT, color="#000"),
552
- side="top",
553
- range=[xmin, xmax],
554
- ticks="outside",
555
- tickformat=",.0f", # ← integers; change to ",.1f" if you want one decimal
556
- tickmode="auto",
557
- tick0=tick0,
558
- showline=True, linewidth=1.2, linecolor="#444", mirror=True,
559
- showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True
560
- )
561
-
562
  fig.update_yaxes(
563
  title_text=ylab,
564
  title_font=dict(size=20, family=BOLD_FONT, color="#000"),
565
  tickfont=dict(size=15, family=BOLD_FONT, color="#000"),
566
- range=y_range,
567
- ticks="outside",
568
  showline=True, linewidth=1.2, linecolor="#444", mirror=True,
569
  showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True
570
  )
571
  return fig
572
 
573
- # ---------- Preview modal (matplotlib) ----------
574
  def preview_tracks(df: pd.DataFrame, cols: list[str]):
575
  cols = [c for c in cols if c in df.columns]
576
  n = len(cols)
@@ -582,52 +507,22 @@ def preview_tracks(df: pd.DataFrame, cols: list[str]):
582
  if n == 1: axes = [axes]
583
  depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
584
  if depth_col is not None:
585
- idx = pd.to_numeric(df[depth_col], errors="coerce")
586
  else:
587
- idx = pd.Series(np.arange(1, len(df) + 1))
588
- for ax, col in zip(axes, cols):
589
- ax.plot(pd.to_numeric(df[col], errors="coerce"), idx, '-', lw=1.4, color="#333")
590
- ax.set_xlabel(col); ax.xaxis.set_label_position('top'); ax.xaxis.tick_top(); ax.invert_yaxis()
 
591
  ax.grid(True, linestyle=":", alpha=0.3)
592
- for s in ax.spines.values(): s.set_visible(True)
593
- axes[0].set_ylabel(depth_col if depth_col else "Point Index")
 
 
594
  return fig
595
 
596
- # Modal wrapper
597
- try:
598
- dialog = st.dialog
599
- except AttributeError:
600
- def dialog(title):
601
- def deco(fn):
602
- def wrapper(*args, **kwargs):
603
- with st.expander(title, expanded=True):
604
- return fn(*args, **kwargs)
605
- return wrapper
606
- return deco
607
-
608
- def preview_modal(book: dict[str, pd.DataFrame]):
609
- if not book:
610
- st.info("No data loaded yet."); return
611
- names = list(book.keys())
612
- tabs = st.tabs(names)
613
- for t, name in zip(tabs, names):
614
- with t:
615
- df = _normalize_columns(book[name])
616
- t1, t2 = st.tabs(["Tracks", "Summary"])
617
- with t1:
618
- st.pyplot(preview_tracks(df, FEATURES), use_container_width=True)
619
- with t2:
620
- present = [c for c in FEATURES if c in df.columns]
621
- if present:
622
- tbl = (df[present]
623
- .agg(['min','max','mean','std'])
624
- .T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"}))
625
- df_centered_rounded(tbl.reset_index(names="Feature"))
626
- else:
627
- st.info("No expected feature columns found to summarize.")
628
-
629
  # =========================
630
- # Load model
631
  # =========================
632
  def ensure_model() -> Path|None:
633
  for p in [DEFAULT_MODEL, *MODEL_FALLBACKS]:
@@ -656,30 +551,23 @@ except Exception as e:
656
  st.error(f"Failed to load model: {e}")
657
  st.stop()
658
 
659
- # ---------- Load meta (optional) ----------
660
  meta = {}
661
- # Prefer a Ts-specific meta, fall back to a generic one if present
662
- meta_candidates = [
663
- MODELS_DIR / "ts_meta.json", # ← add this
664
- MODELS_DIR / "meta.json",
665
- MODELS_DIR / "ym_meta.json",
666
- ]
667
  meta_path = next((p for p in meta_candidates if p.exists()), None)
668
  if meta_path:
669
  try:
670
  meta = json.loads(meta_path.read_text(encoding="utf-8"))
671
  FEATURES = meta.get("features", FEATURES)
672
  TARGET = meta.get("target", TARGET)
673
- # If meta provides a custom pred column, respect it
674
  PRED_COL = meta.get("pred_col", PRED_COL)
675
  except Exception as e:
676
  st.warning(f"Could not parse meta file ({meta_path.name}): {e}")
677
 
678
- # Optional: version mismatch warning
679
- import numpy as _np, sklearn as _skl
680
- mv = meta.get("versions", {})
681
- if mv:
682
- msg = []
683
  if mv.get("numpy") and mv["numpy"] != _np.__version__:
684
  msg.append(f"NumPy {mv['numpy']} expected, running {_np.__version__}")
685
  if mv.get("scikit_learn") and mv["scikit_learn"] != _skl.__version__:
@@ -700,7 +588,7 @@ st.session_state.setdefault("dev_preview",False)
700
  st.session_state.setdefault("show_preview_modal", False)
701
 
702
  # =========================
703
- # Branding in Sidebar
704
  # =========================
705
  st.sidebar.markdown(f"""
706
  <div class="centered-container">
@@ -711,9 +599,6 @@ st.sidebar.markdown(f"""
711
  """, unsafe_allow_html=True
712
  )
713
 
714
- # =========================
715
- # Reusable Sticky Header Function
716
- # =========================
717
  def sticky_header(title, message):
718
  st.markdown(
719
  f"""
@@ -785,13 +670,16 @@ if st.session_state.app_step == "dev":
785
  if sh_train is None or sh_test is None:
786
  st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training/training2 and Test/Testing/testing2 sheets.</div>', unsafe_allow_html=True)
787
  st.stop()
788
- tr = _normalize_columns(book[sh_train].copy())
789
- te = _normalize_columns(book[sh_test].copy())
 
 
790
 
791
  if not (ensure_cols(tr, FEATURES+[TARGET]) and ensure_cols(te, FEATURES+[TARGET])):
792
  st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True)
793
  st.stop()
794
 
 
795
  tr[PRED_COL] = model.predict(tr[FEATURES])
796
  te[PRED_COL] = model.predict(te[FEATURES])
797
 
@@ -813,10 +701,7 @@ if st.session_state.app_step == "dev":
813
 
814
  def _dev_block(df, m):
815
  c1,c2,c3 = st.columns(3)
816
- c1.metric("R", f"{m['R']:.2f}")
817
- c2.metric("RMSE", f"{m['RMSE']:.2f}")
818
- c3.metric("MAE", f"{m['MAE']:.2f}")
819
-
820
  st.markdown("""
821
  <div style='text-align: left; font-size: 0.8em; color: #6b7280; margin-top: -16px; margin-bottom: 8px;'>
822
  <strong>R:</strong> Pearson Correlation Coefficient<br>
@@ -824,27 +709,18 @@ if st.session_state.app_step == "dev":
824
  <strong>MAE:</strong> Mean Absolute Error
825
  </div>
826
  """, unsafe_allow_html=True)
827
-
828
  col_track, col_cross = st.columns([2, 3], gap="large")
829
  with col_track:
830
- st.plotly_chart(
831
- track_plot(df, include_actual=True),
832
- use_container_width=False,
833
- config={"displayModeBar": False, "scrollZoom": True}
834
- )
835
  with col_cross:
836
  st.pyplot(cross_plot_static(df[TARGET], df[PRED_COL]), use_container_width=False)
837
 
838
  if "Train" in st.session_state.results or "Test" in st.session_state.results:
839
  tab1, tab2 = st.tabs(["Training", "Testing"])
840
  if "Train" in st.session_state.results:
841
- with tab1:
842
- _dev_block(st.session_state.results["Train"], st.session_state.results["m_train"])
843
  if "Test" in st.session_state.results:
844
- with tab2:
845
- _dev_block(st.session_state.results["Test"], st.session_state.results["m_test"])
846
-
847
- # Export UI for this phase (dropdown checklist starts empty)
848
  render_export_button(phase_key="dev")
849
 
850
  # =========================
@@ -869,7 +745,7 @@ if st.session_state.app_step == "validate":
869
  if go_btn and up is not None:
870
  book = read_book_bytes(up.getvalue())
871
  name = find_sheet(book, ["Validation","Validate","validation2","Val","val"]) or list(book.keys())[0]
872
- df = _normalize_columns(book[name].copy())
873
  if not ensure_cols(df, FEATURES+[TARGET]):
874
  st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
875
  df[PRED_COL] = model.predict(df[FEATURES])
@@ -895,10 +771,7 @@ if st.session_state.app_step == "validate":
895
  if "Validate" in st.session_state.results:
896
  m = st.session_state.results["m_val"]
897
  c1,c2,c3 = st.columns(3)
898
- c1.metric("R", f"{m['R']:.2f}")
899
- c2.metric("RMSE", f"{m['RMSE']:.2f}")
900
- c3.metric("MAE", f"{m['MAE']:.2f}")
901
-
902
  st.markdown("""
903
  <div style='text-align: left; font-size: 0.8em; color: #6b7280; margin-top: -16px; margin-bottom: 8px;'>
904
  <strong>R:</strong> Pearson Correlation Coefficient<br>
@@ -909,19 +782,13 @@ if st.session_state.app_step == "validate":
909
 
910
  col_track, col_cross = st.columns([2, 3], gap="large")
911
  with col_track:
912
- st.plotly_chart(
913
- track_plot(st.session_state.results["Validate"], include_actual=True),
914
- use_container_width=False,
915
- config={"displayModeBar": False, "scrollZoom": True}
916
- )
917
  with col_cross:
918
- st.pyplot(
919
- cross_plot_static(st.session_state.results["Validate"][TARGET],
920
- st.session_state.results["Validate"][PRED_COL]),
921
- use_container_width=False
922
- )
923
 
924
- # Export UI for this phase (dropdown checklist starts empty)
925
  render_export_button(phase_key="validate")
926
 
927
  sv = st.session_state.results["sv_val"]
@@ -950,7 +817,7 @@ if st.session_state.app_step == "predict":
950
 
951
  if go_btn and up is not None:
952
  book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
953
- df = _normalize_columns(book[name].copy())
954
  if not ensure_cols(df, FEATURES):
955
  st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
956
  df[PRED_COL] = model.predict(df[FEATURES])
@@ -976,31 +843,22 @@ if st.session_state.app_step == "predict":
976
  with col_left:
977
  table = pd.DataFrame({
978
  "Metric": ["# points","Pred min","Pred max","Pred mean","Pred std","OOR %"],
979
- "Value": [sv["n"],
980
- round(sv["pred_min"],3),
981
- round(sv["pred_max"],3),
982
- round(sv["pred_mean"],3),
983
- round(sv["pred_std"],3),
984
- f'{sv["oor"]:.1f}%']
985
  })
986
  st.markdown('<div class="st-message-box st-success">Predictions ready ✓</div>', unsafe_allow_html=True)
987
  df_centered_rounded(table, hide_index=True)
988
  st.caption("**★ OOR** = % of rows whose input features fall outside the training min–max range.")
989
  with col_right:
990
- st.plotly_chart(
991
- track_plot(df, include_actual=False),
992
- use_container_width=False,
993
- config={"displayModeBar": False, "scrollZoom": True}
994
- )
995
 
996
- # Export UI for this phase (dropdown checklist starts empty)
997
  render_export_button(phase_key="predict")
998
 
999
  # =========================
1000
- # Run preview modal after all other elements
1001
  # =========================
1002
  if st.session_state.show_preview_modal:
1003
- # Select the correct workbook bytes for this step
1004
  book_to_preview = {}
1005
  if st.session_state.app_step == "dev":
1006
  book_to_preview = read_book_bytes(st.session_state.dev_file_bytes)
@@ -1015,7 +873,7 @@ if st.session_state.show_preview_modal:
1015
  tabs = st.tabs(names)
1016
  for t, name in zip(tabs, names):
1017
  with t:
1018
- df = _normalize_columns(book_to_preview[name])
1019
  t1, t2 = st.tabs(["Tracks", "Summary"])
1020
  with t1:
1021
  st.pyplot(preview_tracks(df, FEATURES), use_container_width=True)
 
1
+ # app.py — ST_Sonic_Ts (Shear Slowness Ts)
2
  import io, json, os, base64, math
3
  from pathlib import Path
4
  import streamlit as st
 
7
  import joblib
8
  from datetime import datetime
9
 
10
+ # Matplotlib (static plots)
11
  import matplotlib
12
  matplotlib.use("Agg")
13
  import matplotlib.pyplot as plt
 
19
  # =========================
20
  # Constants (Ts variant)
21
  # =========================
22
+ APP_NAME = "ST_Log_Sonic (Ts)"
23
  TAGLINE = "Real-Time Shear Slowness (Ts) Prediction"
24
 
25
+ # Defaults (will be overridden by meta if present)
26
  FEATURES = ["WOB(klbf)", "TORQUE(kft.lbf)", "SPP(psi)", "RPM(1/min)", "ROP(ft/h)", "Flow Rate, gpm"]
27
+ TARGET = "Ts"
 
 
28
  PRED_COL = "Ts_Pred"
29
 
30
  MODELS_DIR = Path("models")
 
32
  MODEL_FALLBACKS = [MODELS_DIR / "model.joblib", MODELS_DIR / "model.pkl"]
33
  COLORS = {"pred": "#1f77b4", "actual": "#f2b702", "ref": "#5a5a5a"}
34
 
35
+ # Toggle to show strict version banner from meta
36
+ STRICT_VERSION_CHECK = False
37
 
38
+ # ---- Plot sizing ----
39
  CROSS_W = 350
40
  CROSS_H = 350
41
  TRACK_H = 1000
 
47
  # Page / CSS
48
  # =========================
49
  st.set_page_config(page_title=APP_NAME, page_icon="logo.png", layout="wide")
 
50
  st.markdown("""
51
  <style>
52
  .brand-logo { width: 200px; height: auto; object-fit: contain; }
 
54
  .sidebar-header .text h1 { font-size: 1.05rem; margin:0; line-height:1.1; }
55
  .sidebar-header .text .tag { font-size: .85rem; color:#6b7280; margin:2px 0 0; }
56
  .centered-container { display: flex; flex-direction: column; align-items: center; text-align: center; }
57
+ .st-message-box { background-color: #f0f2f6; color: #333; padding: 10px; border-radius: 10px; border: 1px solid #e6e9ef; }
58
+ .st-message-box.st-success { background-color: #d4edda; color: #155724; border-color: #c3e6cb; }
59
+ .st-message-box.st-warning { background-color: #fff3cd; color: #856404; border-color: #ffeeba; }
60
+ .st-message-box.st-error { background-color: #f8d7da; color: #721c24; border-color: #f5c6cb; }
61
+ .main .block-container { overflow: unset !important; }
62
+ div[data-testid="stVerticalBlock"] { overflow: unset !important; }
63
+ div[data-testid="stExpander"] > details > summary {
64
+ position: sticky; top: 0; z-index: 10; background: #fff; border-bottom: 1px solid #eee;
65
+ }
66
+ div[data-testid="stExpander"] div[data-baseweb="tab-list"] {
67
+ position: sticky; top: 42px; z-index: 9; background: #fff; padding-top: 6px;
68
+ }
69
  </style>
70
  """, unsafe_allow_html=True)
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  TABLE_CENTER_CSS = [
73
  dict(selector="th", props=[("text-align", "center")]),
74
  dict(selector="td", props=[("text-align", "center")]),
75
  ]
76
 
 
 
 
 
 
 
 
 
 
 
77
  # =========================
78
  # Password gate
79
  # =========================
 
127
  a = np.asarray(y_true, dtype=float)
128
  p = np.asarray(y_pred, dtype=float)
129
  if a.size < 2: return float("nan")
 
130
  if np.all(a == a[0]) or np.all(p == p[0]): return float("nan")
131
  return float(np.corrcoef(a, p)[0, 1])
132
 
 
140
  xl = pd.ExcelFile(bio)
141
  return {sh: xl.parse(sh) for sh in xl.sheet_names}
142
 
143
+ def read_book_bytes(b: bytes):
144
+ return parse_excel(b) if b else {}
145
+
146
+ # ---- Canonical feature aliasing ------------------------------------------
147
+ def _build_alias_map(canonical_features: list[str], target_name: str) -> dict:
148
+ """
149
+ Returns a dict of common header variants -> canonical names as per the model's FEATURES.
150
+ We choose the canonical for each family by checking which string exists in canonical_features.
151
+ """
152
+ def pick(expected_list, family_variants):
153
+ # pick the first variant that exists in expected_list, else fall back to first in family_variants
154
+ for v in family_variants:
155
+ if v in expected_list:
156
+ return v
157
+ return family_variants[0]
158
+
159
+ can_WOB = pick(canonical_features, ["WOB, klbf","WOB(klbf)","WOB (klbf)"])
160
+ can_TORQUE = pick(canonical_features, ["Torque(kft.lbf)","TORQUE(kft.lbf)"])
161
+ can_SPP = pick(canonical_features, ["SPP(psi)"])
162
+ can_RPM = pick(canonical_features, ["RPM(1/min)","RPM (1/min)"])
163
+ can_ROP = pick(canonical_features, ["ROP(ft/h)","ROP (ft/h)"])
164
+ can_FR = pick(canonical_features, ["Flow Rate, gpm","Flow Rate , gpm","Flow Rate,gpm"])
165
+
166
+ alias = {
167
+ # WOB
168
+ "WOB, klbf": can_WOB, "WOB(klbf)": can_WOB, "WOB (klbf)": can_WOB, "WOB( klbf)": can_WOB, "WOB , klbf": can_WOB,
169
+ # Torque
170
+ "Torque(kft.lbf)": can_TORQUE, "TORQUE(kft.lbf)": can_TORQUE,
171
+ # SPP
172
+ "SPP(psi)": can_SPP,
173
+ # RPM
174
+ "RPM(1/min)": can_RPM, "RPM (1/min)": can_RPM,
175
+ # ROP
176
+ "ROP(ft/h)": can_ROP, "ROP (ft/h)": can_ROP,
177
+ # Flow
178
+ "Flow Rate, gpm": can_FR, "Fow Rate, gpm": can_FR, "Fow Rate, gpm ": can_FR, "Flow Rate , gpm": can_FR, "Flow Rate,gpm": can_FR,
179
+ # Depth (for plotting only)
180
+ "Depth, ft": "Depth, ft", "Depth(ft)": "Depth, ft", "DEPTH, ft": "Depth, ft",
181
+ # Ts targets (map all to the chosen TARGET)
182
+ "Ts": target_name, "Ts,us/ft_Actual": target_name, "Ts, us/ft_Actual": target_name,
183
+ "TS_Actual": target_name, "Ts (us/ft)_Actual": target_name
184
+ }
185
+ return alias
186
 
187
+ def _normalize_columns(df: pd.DataFrame, canonical_features: list[str], target_name: str) -> pd.DataFrame:
188
  out = df.copy()
189
+ out.columns = [str(c).strip().replace(" ,", ",").replace(", ", ", ").replace(" ", " ") for c in out.columns]
190
+ alias = _build_alias_map(canonical_features, target_name)
191
+ # only rename keys that actually exist
192
+ actual = {k: v for k, v in alias.items() if k in out.columns and k != v}
193
+ return out.rename(columns=actual)
 
 
194
 
195
  def ensure_cols(df: pd.DataFrame, cols: list[str]) -> bool:
196
  miss = [c for c in cols if c not in df.columns]
 
206
  return None
207
 
208
  def _nice_tick0(xmin: float, step: float = 0.1) -> float:
 
209
  return step * math.floor(xmin / step) if np.isfinite(xmin) else xmin
210
 
211
  def df_centered_rounded(df: pd.DataFrame, hide_index=True):
 
227
  except Exception:
228
  return "openpyxl"
229
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  def _excel_safe_name(name: str) -> str:
231
  bad = '[]:*?/\\'
232
  safe = ''.join('_' if ch in bad else ch for ch in str(name))
 
257
  return _round_numeric(df, 3)
258
 
259
  def _excel_autofit(writer, sheet_name: str, df: pd.DataFrame, min_w: int = 8, max_w: int = 40):
 
260
  try:
261
  import xlsxwriter # noqa: F401
262
  except Exception:
 
269
  ws.freeze_panes(1, 0)
270
 
271
  def _add_sheet(sheets: dict, order: list, name: str, df: pd.DataFrame, ndigits: int):
272
+ if df is None or df.empty: return
 
273
  sheets[name] = _round_numeric(df, ndigits)
274
  order.append(name)
275
 
276
  def _available_sections() -> list[str]:
 
277
  res = st.session_state.get("results", {})
278
  sections = []
279
  if "Train" in res: sections += ["Training","Training_Metrics","Training_Summary"]
 
285
  return sections
286
 
287
  def build_export_workbook(selected: list[str], ndigits: int = 3, do_autofit: bool = True) -> tuple[bytes|None, str|None, list[str]]:
 
288
  res = st.session_state.get("results", {})
289
+ if not res: return None, None, []
 
290
 
291
  sheets: dict[str, pd.DataFrame] = {}
292
  order: list[str] = []
293
 
 
294
  if "Training" in selected and "Train" in res:
295
  _add_sheet(sheets, order, "Training", res["Train"], ndigits)
296
  if "Training_Metrics" in selected and res.get("m_train"):
 
299
  tr_cols = FEATURES + [c for c in [TARGET, PRED_COL] if c in res["Train"].columns]
300
  _add_sheet(sheets, order, "Training_Summary", _summary_table(res["Train"], tr_cols), ndigits)
301
 
 
302
  if "Testing" in selected and "Test" in res:
303
  _add_sheet(sheets, order, "Testing", res["Test"], ndigits)
304
  if "Testing_Metrics" in selected and res.get("m_test"):
 
307
  te_cols = FEATURES + [c for c in [TARGET, PRED_COL] if c in res["Test"].columns]
308
  _add_sheet(sheets, order, "Testing_Summary", _summary_table(res["Test"], te_cols), ndigits)
309
 
 
310
  if "Validation" in selected and "Validate" in res:
311
  _add_sheet(sheets, order, "Validation", res["Validate"], ndigits)
312
  if "Validation_Metrics" in selected and res.get("m_val"):
 
316
  if "Validation_OOR" in selected and isinstance(res.get("oor_tbl"), pd.DataFrame) and not res["oor_tbl"].empty:
317
  _add_sheet(sheets, order, "Validation_OOR", res["oor_tbl"].reset_index(drop=True), ndigits)
318
 
 
319
  if "Prediction" in selected and "PredictOnly" in res:
320
  _add_sheet(sheets, order, "Prediction", res["PredictOnly"], ndigits)
321
  if "Prediction_Summary" in selected and res.get("sv_pred"):
322
  _add_sheet(sheets, order, "Prediction_Summary", pd.DataFrame([res["sv_pred"]]), ndigits)
323
 
 
324
  if "Training_Ranges" in selected and st.session_state.get("train_ranges"):
325
  rr = _train_ranges_df(st.session_state["train_ranges"])
326
  _add_sheet(sheets, order, "Training_Ranges", rr, ndigits)
327
 
 
328
  if "Info" in selected:
329
  info = pd.DataFrame([
330
  {"Key": "AppName", "Value": APP_NAME},
 
336
  ])
337
  _add_sheet(sheets, order, "Info", info, ndigits)
338
 
339
+ if not order: return None, None, []
 
340
 
341
  bio = io.BytesIO()
342
  engine = _excel_engine()
 
348
  if do_autofit:
349
  _excel_autofit(writer, sheet, df)
350
  bio.seek(0)
 
351
  fname = f"TS_Export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
352
  return bio.getvalue(), fname, order
353
 
354
+ # --------- SIMPLE export UI ----------
355
  def render_export_button(phase_key: str) -> None:
 
 
 
 
356
  res = st.session_state.get("results", {})
357
+ if not res: return
 
 
358
  st.divider()
359
  st.markdown("### Export to Excel")
360
 
361
+ options = _available_sections()
362
  selected_sheets = st.multiselect(
363
  "Sheets to include",
364
  options=options,
 
383
  data, fname, names = build_export_workbook(selected=selected_sheets, ndigits=3, do_autofit=True)
384
  if names:
385
  st.caption("Will include: " + ", ".join(names))
 
386
  st.download_button(
387
  "⬇️ Export Excel",
388
  data=(data or b""),
 
393
  )
394
 
395
  # =========================
396
+ # Cross plot (Matplotlib)
397
  # =========================
398
  def cross_plot_static(actual, pred, xlabel="Actual Ts (µs/ft)", ylabel="Predicted Ts (µs/ft)"):
399
  a = pd.Series(actual, dtype=float)
 
414
 
415
  ax.set_xlim(lo2, hi2)
416
  ax.set_ylim(lo2, hi2)
417
+ ax.set_xticks(ticks); ax.set_yticks(ticks)
 
418
  ax.set_aspect("equal", adjustable="box")
419
 
 
420
  fmt = FuncFormatter(lambda x, _: f"{x:.2f}")
421
+ ax.xaxis.set_major_formatter(fmt); ax.yaxis.set_major_formatter(fmt)
 
422
 
423
  ax.set_xlabel(xlabel, fontweight="bold", fontsize=10, color="black")
424
  ax.set_ylabel(ylabel, fontweight="bold", fontsize=10, color="black")
 
426
 
427
  ax.grid(True, linestyle=":", alpha=0.3)
428
  for spine in ax.spines.values():
429
+ spine.set_linewidth(1.1); spine.set_color("#444")
 
430
 
431
  fig.subplots_adjust(left=0.16, bottom=0.16, right=0.98, top=0.98)
432
  return fig
 
435
  # Track plot (Plotly)
436
  # =========================
437
  def track_plot(df, include_actual=True):
 
438
  depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
439
  if depth_col is not None:
440
+ y = pd.Series(df[depth_col]).astype(float); ylab = depth_col
441
+ y_range = [float(y.max()), float(y.min())]
 
442
  else:
443
+ y = pd.Series(np.arange(1, len(df) + 1)); ylab = "Point Index"
 
444
  y_range = [float(y.max()), float(y.min())]
445
 
 
446
  x_series = pd.Series(df.get(PRED_COL, pd.Series(dtype=float))).astype(float)
447
  if include_actual and TARGET in df.columns:
448
  x_series = pd.concat([x_series, pd.Series(df[TARGET]).astype(float)], ignore_index=True)
 
468
  ))
469
 
470
  fig.update_layout(
471
+ height=TRACK_H, width=TRACK_W, autosize=False,
 
 
472
  paper_bgcolor="#fff", plot_bgcolor="#fff",
473
  margin=dict(l=64, r=16, t=36, b=48), hovermode="closest",
474
  font=dict(size=FONT_SZ, color="#000"),
 
476
  bgcolor="rgba(255,255,255,0.75)", bordercolor="#ccc", borderwidth=1),
477
  legend_title_text=""
478
  )
 
 
479
  fig.update_xaxes(
480
+ title_text="Ts (μs/ft)",
481
+ title_font=dict(size=20, family=BOLD_FONT, color="#000"),
482
+ tickfont=dict(size=15, family=BOLD_FONT, color="#000"),
483
+ side="top", range=[xmin, xmax],
484
+ ticks="outside", tickformat=",.0f", tickmode="auto", tick0=tick0,
485
+ showline=True, linewidth=1.2, linecolor="#444", mirror=True,
486
+ showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True
487
+ )
 
 
 
 
 
488
  fig.update_yaxes(
489
  title_text=ylab,
490
  title_font=dict(size=20, family=BOLD_FONT, color="#000"),
491
  tickfont=dict(size=15, family=BOLD_FONT, color="#000"),
492
+ range=y_range, ticks="outside",
 
493
  showline=True, linewidth=1.2, linecolor="#444", mirror=True,
494
  showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True
495
  )
496
  return fig
497
 
498
+ # ---------- Preview (matplotlib) ----------
499
  def preview_tracks(df: pd.DataFrame, cols: list[str]):
500
  cols = [c for c in cols if c in df.columns]
501
  n = len(cols)
 
507
  if n == 1: axes = [axes]
508
  depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
509
  if depth_col is not None:
510
+ idx = pd.to_numeric(df[depth_col], errors="coerce"); y_label = depth_col
511
  else:
512
+ idx = pd.Series(np.arange(1, len(df) + 1)); y_label = "Point Index"
513
+ for i, (ax, col) in enumerate(zip(axes, cols)):
514
+ ax.plot(pd.to_numeric(df[col], errors="coerce"), idx, '-', lw=1.6, color="#333")
515
+ ax.set_xlabel(col); ax.xaxis.set_label_position('top'); ax.xaxis.tick_top()
516
+ ax.set_ylim(float(idx.max()), float(idx.min()))
517
  ax.grid(True, linestyle=":", alpha=0.3)
518
+ if i == 0: ax.set_ylabel(y_label)
519
+ else:
520
+ ax.tick_params(labelleft=False); ax.set_ylabel("")
521
+ fig.tight_layout()
522
  return fig
523
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
524
  # =========================
525
+ # Load model + meta
526
  # =========================
527
  def ensure_model() -> Path|None:
528
  for p in [DEFAULT_MODEL, *MODEL_FALLBACKS]:
 
551
  st.error(f"Failed to load model: {e}")
552
  st.stop()
553
 
554
+ # Prefer Ts meta
555
  meta = {}
556
+ meta_candidates = [MODELS_DIR / "ts_meta.json", MODELS_DIR / "meta.json", MODELS_DIR / "ym_meta.json"]
 
 
 
 
 
557
  meta_path = next((p for p in meta_candidates if p.exists()), None)
558
  if meta_path:
559
  try:
560
  meta = json.loads(meta_path.read_text(encoding="utf-8"))
561
  FEATURES = meta.get("features", FEATURES)
562
  TARGET = meta.get("target", TARGET)
 
563
  PRED_COL = meta.get("pred_col", PRED_COL)
564
  except Exception as e:
565
  st.warning(f"Could not parse meta file ({meta_path.name}): {e}")
566
 
567
+ # Optional: version banner (silenced by default)
568
+ if STRICT_VERSION_CHECK and meta.get("versions"):
569
+ import numpy as _np, sklearn as _skl
570
+ mv = meta["versions"]; msg=[]
 
571
  if mv.get("numpy") and mv["numpy"] != _np.__version__:
572
  msg.append(f"NumPy {mv['numpy']} expected, running {_np.__version__}")
573
  if mv.get("scikit_learn") and mv["scikit_learn"] != _skl.__version__:
 
588
  st.session_state.setdefault("show_preview_modal", False)
589
 
590
  # =========================
591
+ # Sidebar branding
592
  # =========================
593
  st.sidebar.markdown(f"""
594
  <div class="centered-container">
 
599
  """, unsafe_allow_html=True
600
  )
601
 
 
 
 
602
  def sticky_header(title, message):
603
  st.markdown(
604
  f"""
 
670
  if sh_train is None or sh_test is None:
671
  st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training/training2 and Test/Testing/testing2 sheets.</div>', unsafe_allow_html=True)
672
  st.stop()
673
+
674
+ # Use meta FEATURES as canonical when normalizing
675
+ tr = _normalize_columns(book[sh_train].copy(), FEATURES, TARGET)
676
+ te = _normalize_columns(book[sh_test].copy(), FEATURES, TARGET)
677
 
678
  if not (ensure_cols(tr, FEATURES+[TARGET]) and ensure_cols(te, FEATURES+[TARGET])):
679
  st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True)
680
  st.stop()
681
 
682
+ # Predict with exactly the columns the model was trained on
683
  tr[PRED_COL] = model.predict(tr[FEATURES])
684
  te[PRED_COL] = model.predict(te[FEATURES])
685
 
 
701
 
702
  def _dev_block(df, m):
703
  c1,c2,c3 = st.columns(3)
704
+ c1.metric("R", f"{m['R']:.2f}"); c2.metric("RMSE", f"{m['RMSE']:.2f}"); c3.metric("MAE", f"{m['MAE']:.2f}")
 
 
 
705
  st.markdown("""
706
  <div style='text-align: left; font-size: 0.8em; color: #6b7280; margin-top: -16px; margin-bottom: 8px;'>
707
  <strong>R:</strong> Pearson Correlation Coefficient<br>
 
709
  <strong>MAE:</strong> Mean Absolute Error
710
  </div>
711
  """, unsafe_allow_html=True)
 
712
  col_track, col_cross = st.columns([2, 3], gap="large")
713
  with col_track:
714
+ st.plotly_chart(track_plot(df, include_actual=True), use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
 
 
 
 
715
  with col_cross:
716
  st.pyplot(cross_plot_static(df[TARGET], df[PRED_COL]), use_container_width=False)
717
 
718
  if "Train" in st.session_state.results or "Test" in st.session_state.results:
719
  tab1, tab2 = st.tabs(["Training", "Testing"])
720
  if "Train" in st.session_state.results:
721
+ with tab1: _dev_block(st.session_state.results["Train"], st.session_state.results["m_train"])
 
722
  if "Test" in st.session_state.results:
723
+ with tab2: _dev_block(st.session_state.results["Test"], st.session_state.results["m_test"])
 
 
 
724
  render_export_button(phase_key="dev")
725
 
726
  # =========================
 
745
  if go_btn and up is not None:
746
  book = read_book_bytes(up.getvalue())
747
  name = find_sheet(book, ["Validation","Validate","validation2","Val","val"]) or list(book.keys())[0]
748
+ df = _normalize_columns(book[name].copy(), FEATURES, TARGET)
749
  if not ensure_cols(df, FEATURES+[TARGET]):
750
  st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
751
  df[PRED_COL] = model.predict(df[FEATURES])
 
771
  if "Validate" in st.session_state.results:
772
  m = st.session_state.results["m_val"]
773
  c1,c2,c3 = st.columns(3)
774
+ c1.metric("R", f"{m['R']:.2f}"); c2.metric("RMSE", f"{m['RMSE']:.2f}"); c3.metric("MAE", f"{m['MAE']:.2f}")
 
 
 
775
  st.markdown("""
776
  <div style='text-align: left; font-size: 0.8em; color: #6b7280; margin-top: -16px; margin-bottom: 8px;'>
777
  <strong>R:</strong> Pearson Correlation Coefficient<br>
 
782
 
783
  col_track, col_cross = st.columns([2, 3], gap="large")
784
  with col_track:
785
+ st.plotly_chart(track_plot(st.session_state.results["Validate"], include_actual=True),
786
+ use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
 
 
 
787
  with col_cross:
788
+ st.pyplot(cross_plot_static(st.session_state.results["Validate"][TARGET],
789
+ st.session_state.results["Validate"][PRED_COL]),
790
+ use_container_width=False)
 
 
791
 
 
792
  render_export_button(phase_key="validate")
793
 
794
  sv = st.session_state.results["sv_val"]
 
817
 
818
  if go_btn and up is not None:
819
  book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
820
+ df = _normalize_columns(book[name].copy(), FEATURES, TARGET)
821
  if not ensure_cols(df, FEATURES):
822
  st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
823
  df[PRED_COL] = model.predict(df[FEATURES])
 
843
  with col_left:
844
  table = pd.DataFrame({
845
  "Metric": ["# points","Pred min","Pred max","Pred mean","Pred std","OOR %"],
846
+ "Value": [sv["n"], round(sv["pred_min"],3), round(sv["pred_max"],3),
847
+ round(sv["pred_mean"],3), round(sv["pred_std"],3), f'{sv["oor"]:.1f}%']
 
 
 
 
848
  })
849
  st.markdown('<div class="st-message-box st-success">Predictions ready ✓</div>', unsafe_allow_html=True)
850
  df_centered_rounded(table, hide_index=True)
851
  st.caption("**★ OOR** = % of rows whose input features fall outside the training min–max range.")
852
  with col_right:
853
+ st.plotly_chart(track_plot(df, include_actual=False),
854
+ use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
 
 
 
855
 
 
856
  render_export_button(phase_key="predict")
857
 
858
  # =========================
859
+ # Preview modal
860
  # =========================
861
  if st.session_state.show_preview_modal:
 
862
  book_to_preview = {}
863
  if st.session_state.app_step == "dev":
864
  book_to_preview = read_book_bytes(st.session_state.dev_file_bytes)
 
873
  tabs = st.tabs(names)
874
  for t, name in zip(tabs, names):
875
  with t:
876
+ df = _normalize_columns(book_to_preview[name], FEATURES, TARGET)
877
  t1, t2 = st.tabs(["Tracks", "Summary"])
878
  with t1:
879
  st.pyplot(preview_tracks(df, FEATURES), use_container_width=True)