UCS2014 commited on
Commit
5f36e3f
·
verified ·
1 Parent(s): 83bd4f1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +272 -348
app.py CHANGED
@@ -1,4 +1,5 @@
1
- # app_rhob.py — ST_Log_RHOB (Formation Bulk Density, g/cc) — MAPE version
 
2
 
3
  import io, json, os, base64, math
4
  from pathlib import Path
@@ -7,8 +8,11 @@ import pandas as pd
7
  import numpy as np
8
  import joblib
9
  from datetime import datetime
 
 
 
10
 
11
- # Matplotlib (static plots)
12
  import matplotlib
13
  matplotlib.use("Agg")
14
  import matplotlib.pyplot as plt
@@ -17,31 +21,47 @@ from matplotlib.ticker import FuncFormatter
17
  import plotly.graph_objects as go
18
  from sklearn.metrics import mean_squared_error
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  # =========================
21
- # Constants (RHOB variant)
22
  # =========================
23
- APP_NAME = "ST_Log_RHOB"
24
- TAGLINE = "Real-Time Formation Bulk Density (RHOB) Prediction"
25
 
26
- # Defaults (overridden by rhob_meta.json if present)
27
- FEATURES = [
28
- "WOB (klbf)",
29
- "Torque (kft.lbf)",
30
- "SPP (psi)",
31
- "RPM (1/min)",
32
- "ROP (ft/h)",
33
- "Flow Rate (gpm)",
34
- ]
35
- TARGET = "RHOB" # canonical target name
36
- PRED_COL = "RHOB_Pred"
37
 
38
  MODELS_DIR = Path("models")
39
- DEFAULT_MODEL = MODELS_DIR / "rhob_model.joblib"
 
 
 
 
40
  MODEL_FALLBACKS = [MODELS_DIR / "model.joblib", MODELS_DIR / "model.pkl"]
41
- COLORS = {"pred": "#1f77b4", "actual": "#f2b702", "ref": "#5a5a5a"}
42
 
43
- # Optional env banner from meta
44
- STRICT_VERSION_CHECK = False
45
 
46
  # ---- Plot sizing ----
47
  CROSS_W = 350
@@ -58,11 +78,11 @@ st.set_page_config(page_title=APP_NAME, page_icon="logo.png", layout="wide")
58
  st.markdown("""
59
  <style>
60
  .brand-logo { width: 200px; height: auto; object-fit: contain; }
61
- .centered-container { display: flex; flex-direction: column; align-items: center; text-align: center; }
62
- .st-message-box { background-color: #f0f2f6; color: #333; padding: 10px; border-radius: 10px; border: 1px solid #e6e9ef; }
63
- .st-message-box.st-success { background-color: #d4edda; color: #155724; border-color: #c3e6cb; }
64
- .st-message-box.st-warning { background-color: #fff3cd; color: #856404; border-color: #ffeeba; }
65
- .st-message-box.st-error { background-color: #f8d7da; color: #721c24; border-color: #f5c6cb; }
66
  .main .block-container { overflow: unset !important; }
67
  div[data-testid="stVerticalBlock"] { overflow: unset !important; }
68
  div[data-testid="stExpander"] > details > summary {
@@ -95,22 +115,18 @@ def add_password_gate() -> None:
95
  required = st.secrets.get("APP_PASSWORD", "")
96
  except Exception:
97
  required = os.environ.get("APP_PASSWORD", "")
98
-
99
  if not required:
100
  st.warning("Set APP_PASSWORD in Secrets (or environment) and restart.")
101
  st.stop()
102
-
103
  if st.session_state.get("auth_ok", False):
104
  return
105
-
106
  st.sidebar.markdown(f"""
107
  <div class="centered-container">
108
- <img src="{inline_logo('logo.png')}" style="width: 200px; height: auto; object-fit: contain;">
109
  <div style='font-weight:800;font-size:1.2rem; margin-top: 10px;'>{APP_NAME}</div>
110
  <div style='color:#667085;'>Smart Thinking • Secure Access</div>
111
  </div>
112
- """, unsafe_allow_html=True
113
- )
114
  pwd = st.sidebar.text_input("Access key", type="password", placeholder="••••••••")
115
  if st.sidebar.button("Unlock", type="primary"):
116
  if pwd == required:
@@ -128,6 +144,12 @@ add_password_gate()
128
  def rmse(y_true, y_pred) -> float:
129
  return float(np.sqrt(mean_squared_error(y_true, y_pred)))
130
 
 
 
 
 
 
 
131
  def pearson_r(y_true, y_pred) -> float:
132
  a = np.asarray(y_true, dtype=float)
133
  p = np.asarray(y_pred, dtype=float)
@@ -135,18 +157,6 @@ def pearson_r(y_true, y_pred) -> float:
135
  if np.all(a == a[0]) or np.all(p == p[0]): return float("nan")
136
  return float(np.corrcoef(a, p)[0, 1])
137
 
138
- def mape(y_true, y_pred) -> float:
139
- """
140
- Mean Absolute Percentage Error in PERCENT.
141
- Ignores rows where true==0 or non-finite.
142
- """
143
- a = np.asarray(y_true, dtype=float)
144
- p = np.asarray(y_pred, dtype=float)
145
- mask = np.isfinite(a) & np.isfinite(p) & (a != 0)
146
- if not np.any(mask):
147
- return float("nan")
148
- return float(np.mean(np.abs((p[mask] - a[mask]) / a[mask])) * 100.0)
149
-
150
  @st.cache_resource(show_spinner=False)
151
  def load_model(model_path: str):
152
  return joblib.load(model_path)
@@ -160,62 +170,35 @@ def parse_excel(data_bytes: bytes):
160
  def read_book_bytes(b: bytes):
161
  return parse_excel(b) if b else {}
162
 
163
- # ---- Canonical feature aliasing ------------------------------------------
164
- def _build_alias_map(canonical_features: list[str], target_name: str) -> dict:
165
- """
166
- Map common header variants -> the *canonical* names in canonical_features.
167
- Whatever appears in canonical_features (from rhob_meta.json) wins.
168
- """
169
- def pick(expected_list, variants):
170
- for v in variants:
171
- if v in expected_list:
172
- return v
173
- return variants[0]
174
-
175
- can_WOB = pick(canonical_features, ["WOB (klbf)", "WOB, klbf", "WOB(klbf)", "WOB( klbf)"])
176
- can_TORQUE = pick(canonical_features, ["Torque (kft.lbf)", "Torque(kft.lbf)", "TORQUE(kft.lbf)"])
177
- can_SPP = pick(canonical_features, ["SPP (psi)", "SPP(psi)"])
178
- can_RPM = pick(canonical_features, ["RPM (1/min)", "RPM(1/min)"])
179
- can_ROP = pick(canonical_features, ["ROP (ft/h)", "ROP(ft/h)"])
180
- can_FR = pick(canonical_features, [
181
- "Flow Rate (gpm)","Flow Rate, gpm","Flow Rate,gpm","Flow Rate , gpm","Fow Rate, gpm","Fow Rate, gpm "
182
- ])
183
- can_DEPTH = "Depth (ft)"
184
-
185
- alias = {
186
- # Features
187
- "WOB (klbf)": can_WOB, "WOB, klbf": can_WOB, "WOB(klbf)": can_WOB, "WOB( klbf)": can_WOB,
188
- "Torque (kft.lbf)": can_TORQUE, "Torque(kft.lbf)": can_TORQUE, "TORQUE(kft.lbf)": can_TORQUE,
189
- "SPP (psi)": can_SPP, "SPP(psi)": can_SPP,
190
- "RPM (1/min)": can_RPM, "RPM(1/min)": can_RPM,
191
- "ROP (ft/h)": can_ROP, "ROP(ft/h)": can_ROP,
192
- "Flow Rate (gpm)": can_FR, "Flow Rate, gpm": can_FR, "Flow Rate,gpm": can_FR, "Flow Rate , gpm": can_FR,
193
- "Fow Rate, gpm": can_FR, "Fow Rate, gpm ": can_FR,
194
-
195
- # Depth (plot only)
196
- "Depth (ft)": can_DEPTH, "Depth, ft": can_DEPTH, "Depth(ft)": can_DEPTH, "DEPTH, ft": can_DEPTH,
197
- }
198
-
199
- # ---- Target family (RHOB) ----
200
- target_variants = [
201
- "RHOB", "RHOB (g/cc)", "RHOB (g/cm3)", "RHOB (g/cm³)",
202
- "RHOB_Actual", "RHOB_Actual (g/cc)", "RHOB_Actual (g/cm3)", "RHOB_Actual(g/cc)", "RHOB_Actual(g/cm3)",
203
- "RhoB", "RhoB (g/cc)", "RhoB (g/cm3)",
204
- "RhoB_Actual", "RhoB_Actual (g/cc)", "RhoB_Actual (g/cm3)"
205
- ]
206
- for t in target_variants:
207
- alias[t] = target_name
208
-
209
- return alias
210
-
211
-
212
-
213
- def _normalize_columns(df: pd.DataFrame, canonical_features: list[str], target_name: str) -> pd.DataFrame:
214
  out = df.copy()
215
- out.columns = [str(c).strip().replace(" ,", ",").replace(", ", ", ").replace(" ", " ") for c in out.columns]
216
- alias = _build_alias_map(canonical_features, target_name)
217
- actual = {k: v for k, v in alias.items() if k in out.columns and k != v}
218
- return out.rename(columns=actual)
 
 
 
 
 
 
 
 
 
 
 
219
 
220
  def ensure_cols(df: pd.DataFrame, cols: list[str]) -> bool:
221
  miss = [c for c in cols if c not in df.columns]
@@ -224,70 +207,52 @@ def ensure_cols(df: pd.DataFrame, cols: list[str]) -> bool:
224
  return False
225
  return True
226
 
227
- def find_sheet(book, names):
228
- low2orig = {k.lower(): k for k in book.keys()}
229
- for nm in names:
230
- if nm.lower() in low2orig: return low2orig[nm.lower()]
231
- return None
232
-
233
  def _nice_tick0(xmin: float, step: float = 0.1) -> float:
234
  return step * math.floor(xmin / step) if np.isfinite(xmin) else xmin
235
 
236
- def df_centered_rounded(df: pd.DataFrame, hide_index=True):
 
237
  out = df.copy()
238
- numcols = out.select_dtypes(include=[np.number]).columns
239
- styler = (
240
- out.style
241
- .format({c: "{:.2f}" for c in numcols})
242
- .set_properties(**{"text-align": "center"})
243
- .set_table_styles(TABLE_CENTER_CSS)
244
- )
245
- st.dataframe(styler, use_container_width=True, hide_index=hide_index)
 
 
 
 
 
 
 
 
 
 
246
 
247
  # ---------- Build X exactly as trained ----------
248
  def _make_X(df: pd.DataFrame, features: list[str]) -> pd.DataFrame:
249
- """
250
- Reindex columns to the exact training feature order and coerce to numeric.
251
- Prevents scikit-learn 'feature names should match' errors.
252
- """
253
  X = df.reindex(columns=features, copy=False)
254
  for c in X.columns:
255
  X[c] = pd.to_numeric(X[c], errors="coerce")
256
  return X
257
 
258
- # === Excel export helpers =================================================
259
- def _excel_engine() -> str:
260
- try:
261
- import xlsxwriter # noqa: F401
262
- return "xlsxwriter"
263
- except Exception:
264
- return "openpyxl"
265
-
266
- def _excel_safe_name(name: str) -> str:
267
- bad = '[]:*?/\\'
268
- safe = ''.join('_' if ch in bad else ch for ch in str(name))
269
- return safe[:31]
270
-
271
- def _round_numeric(df: pd.DataFrame, ndigits: int = 3) -> pd.DataFrame:
272
- out = df.copy()
273
- for c in out.columns:
274
- if pd.api.types.is_float_dtype(out[c]) or pd.api.types.is_integer_dtype(out[c]):
275
- out[c] = pd.to_numeric(out[c], errors="coerce").round(ndigits)
276
- return out
277
-
278
  def _summary_table(df: pd.DataFrame, cols: list[str]) -> pd.DataFrame:
279
  cols = [c for c in cols if c in df.columns]
280
- if not cols:
281
- return pd.DataFrame()
282
  tbl = (df[cols]
283
- .agg(['min','max','mean','std'])
284
- .T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"})
285
- .reset_index(names="Field"))
286
  return _round_numeric(tbl, 3)
287
 
288
  def _train_ranges_df(ranges: dict[str, tuple[float, float]]) -> pd.DataFrame:
289
- if not ranges:
290
- return pd.DataFrame()
291
  df = pd.DataFrame(ranges).T.reset_index()
292
  df.columns = ["Feature", "Min", "Max"]
293
  return _round_numeric(df, 3)
@@ -304,18 +269,13 @@ def _excel_autofit(writer, sheet_name: str, df: pd.DataFrame, min_w: int = 8, ma
304
  ws.set_column(i, i, max(min_w, min(max_len + 2, max_w)))
305
  ws.freeze_panes(1, 0)
306
 
307
- def _add_sheet(sheets: dict, order: list, name: str, df: pd.DataFrame, ndigits: int):
308
- if df is None or df.empty: return
309
- sheets[name] = _round_numeric(df, ndigits)
310
- order.append(name)
311
-
312
  def _available_sections() -> list[str]:
313
  res = st.session_state.get("results", {})
314
  sections = []
315
- if "Train" in res: sections += ["Training","Training_Metrics","Training_Summary"]
316
- if "Test" in res: sections += ["Testing","Testing_Metrics","Testing_Summary"]
317
- if "Validate" in res: sections += ["Validation","Validation_Metrics","Validation_Summary","Validation_OOR"]
318
- if "PredictOnly" in res: sections += ["Prediction","Prediction_Summary"]
319
  if st.session_state.get("train_ranges"): sections += ["Training_Ranges"]
320
  sections += ["Info"]
321
  return sections
@@ -323,43 +283,36 @@ def _available_sections() -> list[str]:
323
  def build_export_workbook(selected: list[str], ndigits: int = 3, do_autofit: bool = True) -> tuple[bytes|None, str|None, list[str]]:
324
  res = st.session_state.get("results", {})
325
  if not res: return None, None, []
326
-
327
  sheets: dict[str, pd.DataFrame] = {}
328
  order: list[str] = []
329
 
330
- if "Training" in selected and "Train" in res:
331
- _add_sheet(sheets, order, "Training", res["Train"], ndigits)
332
- if "Training_Metrics" in selected and res.get("m_train"):
333
- _add_sheet(sheets, order, "Training_Metrics", pd.DataFrame([res["m_train"]]), ndigits)
 
 
334
  if "Training_Summary" in selected and "Train" in res:
335
  tr_cols = FEATURES + [c for c in [TARGET, PRED_COL] if c in res["Train"].columns]
336
- _add_sheet(sheets, order, "Training_Summary", _summary_table(res["Train"], tr_cols), ndigits)
337
 
338
- if "Testing" in selected and "Test" in res:
339
- _add_sheet(sheets, order, "Testing", res["Test"], ndigits)
340
- if "Testing_Metrics" in selected and res.get("m_test"):
341
- _add_sheet(sheets, order, "Testing_Metrics", pd.DataFrame([res["m_test"]]), ndigits)
342
  if "Testing_Summary" in selected and "Test" in res:
343
  te_cols = FEATURES + [c for c in [TARGET, PRED_COL] if c in res["Test"].columns]
344
- _add_sheet(sheets, order, "Testing_Summary", _summary_table(res["Test"], te_cols), ndigits)
345
-
346
- if "Validation" in selected and "Validate" in res:
347
- _add_sheet(sheets, order, "Validation", res["Validate"], ndigits)
348
- if "Validation_Metrics" in selected and res.get("m_val"):
349
- _add_sheet(sheets, order, "Validation_Metrics", pd.DataFrame([res["m_val"]]), ndigits)
350
- if "Validation_Summary" in selected and res.get("sv_val"):
351
- _add_sheet(sheets, order, "Validation_Summary", pd.DataFrame([res["sv_val"]]), ndigits)
352
  if "Validation_OOR" in selected and isinstance(res.get("oor_tbl"), pd.DataFrame) and not res["oor_tbl"].empty:
353
- _add_sheet(sheets, order, "Validation_OOR", res["oor_tbl"].reset_index(drop=True), ndigits)
354
 
355
- if "Prediction" in selected and "PredictOnly" in res:
356
- _add_sheet(sheets, order, "Prediction", res["PredictOnly"], ndigits)
357
- if "Prediction_Summary" in selected and res.get("sv_pred"):
358
- _add_sheet(sheets, order, "Prediction_Summary", pd.DataFrame([res["sv_pred"]]), ndigits)
359
 
360
  if "Training_Ranges" in selected and st.session_state.get("train_ranges"):
361
- rr = _train_ranges_df(st.session_state["train_ranges"])
362
- _add_sheet(sheets, order, "Training_Ranges", rr, ndigits)
363
 
364
  if "Info" in selected:
365
  info = pd.DataFrame([
@@ -370,7 +323,7 @@ def build_export_workbook(selected: list[str], ndigits: int = 3, do_autofit: boo
370
  {"Key": "Features", "Value": ", ".join(FEATURES)},
371
  {"Key": "ExportedAt", "Value": datetime.now().strftime("%Y-%m-%d %H:%M:%S")},
372
  ])
373
- _add_sheet(sheets, order, "Info", info, ndigits)
374
 
375
  if not order: return None, None, []
376
 
@@ -378,86 +331,64 @@ def build_export_workbook(selected: list[str], ndigits: int = 3, do_autofit: boo
378
  engine = _excel_engine()
379
  with pd.ExcelWriter(bio, engine=engine) as writer:
380
  for name in order:
381
- df = sheets[name]
382
- sheet = _excel_safe_name(name)
383
  df.to_excel(writer, sheet_name=sheet, index=False)
384
- if do_autofit:
385
- _excel_autofit(writer, sheet, df)
386
  bio.seek(0)
387
- fname = f"RHOB_Export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
388
  return bio.getvalue(), fname, order
389
 
390
- # --------- SIMPLE export UI ----------
391
  def render_export_button(phase_key: str) -> None:
392
  res = st.session_state.get("results", {})
393
  if not res: return
394
  st.divider()
395
  st.markdown("### Export to Excel")
396
-
397
  options = _available_sections()
398
  selected_sheets = st.multiselect(
399
  "Sheets to include",
400
  options=options,
401
  default=[],
402
  placeholder="Choose option(s)",
403
- help="Pick the sheets you want to include in the Excel export.",
404
  key=f"sheets_{phase_key}",
405
  )
406
-
407
  if not selected_sheets:
408
- st.caption("Select one or more sheets above to enable the export.")
409
- st.download_button(
410
- label="⬇️ Export Excel",
411
- data=b"",
412
- file_name="RHOB_Export.xlsx",
413
- mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
414
- disabled=True,
415
- key=f"download_{phase_key}",
416
- )
417
  return
418
-
419
  data, fname, names = build_export_workbook(selected=selected_sheets, ndigits=3, do_autofit=True)
420
- if names:
421
- st.caption("Will include: " + ", ".join(names))
422
- st.download_button(
423
- "⬇️ Export Excel",
424
- data=(data or b""),
425
- file_name=(fname or "RHOB_Export.xlsx"),
426
- mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
427
- disabled=(data is None),
428
- key=f"download_{phase_key}",
429
- )
430
 
431
  # =========================
432
  # Cross plot (Matplotlib)
433
  # =========================
434
- def cross_plot_static(actual, pred, xlabel="Actual RHOB (g/cc)", ylabel="Predicted RHOB (g/cc)"):
435
  a = pd.Series(actual, dtype=float)
436
- p = pd.Series(pred, dtype=float)
437
-
438
- lo = float(min(a.min(), p.min()))
439
- hi = float(max(a.max(), p.max()))
440
  pad = 0.03 * (hi - lo if hi > lo else 1.0)
441
  lo2, hi2 = lo - pad, hi + pad
442
-
443
  ticks = np.linspace(lo2, hi2, 5)
444
 
445
  dpi = 110
446
  fig, ax = plt.subplots(figsize=(CROSS_W / dpi, CROSS_H / dpi), dpi=dpi, constrained_layout=False)
447
-
448
  ax.scatter(a, p, s=14, c=COLORS["pred"], alpha=0.9, linewidths=0)
449
  ax.plot([lo2, hi2], [lo2, hi2], linestyle="--", linewidth=1.2, color=COLORS["ref"])
450
 
451
- ax.set_xlim(lo2, hi2)
452
- ax.set_ylim(lo2, hi2)
453
- ax.set_xticks(ticks); ax.set_yticks(ticks)
454
  ax.set_aspect("equal", adjustable="box")
455
 
456
  fmt = FuncFormatter(lambda x, _: f"{x:.2f}")
457
  ax.xaxis.set_major_formatter(fmt); ax.yaxis.set_major_formatter(fmt)
458
 
459
- ax.set_xlabel(xlabel, fontweight="bold", fontsize=10, color="black")
460
- ax.set_ylabel(ylabel, fontweight="bold", fontsize=10, color="black")
461
  ax.tick_params(labelsize=6, colors="black")
462
 
463
  ax.grid(True, linestyle=":", alpha=0.3)
@@ -473,15 +404,16 @@ def cross_plot_static(actual, pred, xlabel="Actual RHOB (g/cc)", ylabel="Predict
473
  def track_plot(df, include_actual=True):
474
  depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
475
  if depth_col is not None:
476
- y = pd.Series(df[depth_col]).astype(float); ylab = depth_col
477
- y_range = [float(y.max()), float(y.min())]
478
  else:
479
  y = pd.Series(np.arange(1, len(df) + 1)); ylab = "Point Index"
480
  y_range = [float(y.max()), float(y.min())]
481
 
482
  x_series = pd.Series(df.get(PRED_COL, pd.Series(dtype=float))).astype(float)
483
- if include_actual and TARGET in df.columns:
484
- x_series = pd.concat([x_series, pd.Series(df[TARGET]).astype(float)], ignore_index=True)
 
485
  x_lo, x_hi = float(x_series.min()), float(x_series.max())
486
  x_pad = 0.03 * (x_hi - x_lo if x_hi > x_lo else 1.0)
487
  xmin, xmax = x_lo - x_pad, x_hi + x_pad
@@ -493,14 +425,15 @@ def track_plot(df, include_actual=True):
493
  x=df[PRED_COL], y=y, mode="lines",
494
  line=dict(color=COLORS["pred"], width=1.8),
495
  name=PRED_COL,
496
- hovertemplate=f"{PRED_COL}: "+"%{x:.3f}<br>"+ylab+": %{y}<extra></extra>"
497
  ))
498
- if include_actual and TARGET in df.columns:
 
499
  fig.add_trace(go.Scatter(
500
- x=df[TARGET], y=y, mode="lines",
501
  line=dict(color=COLORS["actual"], width=2.0, dash="dot"),
502
- name=f"{TARGET} (actual)",
503
- hovertemplate=f"{TARGET}: "+"%{x:.3f}<br>"+ylab+": %{y}<extra></extra>"
504
  ))
505
 
506
  fig.update_layout(
@@ -512,26 +445,15 @@ def track_plot(df, include_actual=True):
512
  bgcolor="rgba(255,255,255,0.75)", bordercolor="#ccc", borderwidth=1),
513
  legend_title_text=""
514
  )
515
-
516
  fig.update_xaxes(
517
- title_text="RHOB (g/cc)",
518
  title_font=dict(size=20, family=BOLD_FONT, color="#000"),
519
  tickfont=dict(size=15, family=BOLD_FONT, color="#000"),
520
- side="top",
521
- range=[xmin, xmax],
522
- ticks="outside",
523
- tickformat=".2f",
524
- tickmode="auto",
525
- tick0=tick0,
526
- showline=True,
527
- linewidth=1.2,
528
- linecolor="#444",
529
- mirror=True,
530
- showgrid=True,
531
- gridcolor="rgba(0,0,0,0.12)",
532
- automargin=True,
533
  )
534
-
535
  fig.update_yaxes(
536
  title_text=ylab,
537
  title_font=dict(size=20, family=BOLD_FONT, color="#000"),
@@ -542,14 +464,8 @@ def track_plot(df, include_actual=True):
542
  )
543
  return fig
544
 
545
- # ---------- Preview (matplotlib) ----------
546
  def preview_tracks(df: pd.DataFrame, cols: list[str]):
547
- """
548
- Quick-look multi-track preview:
549
- - one subplot per selected column
550
- - distinct stable colors per column
551
- - shared & reversed Y-axis (Depth downwards)
552
- """
553
  cols = [c for c in cols if c in df.columns]
554
  n = len(cols)
555
  if n == 0:
@@ -558,38 +474,36 @@ def preview_tracks(df: pd.DataFrame, cols: list[str]):
558
  ax.axis("off")
559
  return fig
560
 
561
- # Depth or fallback to index
562
  depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
563
  if depth_col is not None:
564
  idx = pd.to_numeric(df[depth_col], errors="coerce")
565
  y_label = depth_col
 
566
  else:
567
  idx = pd.Series(np.arange(1, len(df) + 1))
568
  y_label = "Point Index"
 
569
 
570
- y_min, y_max = float(idx.min()), float(idx.max())
571
-
572
- # Stable qualitative palette
573
  cmap = plt.get_cmap("tab20")
574
  col_colors = {col: cmap(i % cmap.N) for i, col in enumerate(cols)}
575
 
576
- fig, axes = plt.subplots(1, n, figsize=(2.3 * n, 7.0), sharey=True, dpi=100)
577
  if n == 1:
578
  axes = [axes]
579
 
580
  for i, (ax, col) in enumerate(zip(axes, cols)):
581
  x = pd.to_numeric(df[col], errors="coerce")
582
- ax.plot(x, idx, '-', lw=1.8, color=col_colors[col])
583
  ax.set_xlabel(col)
584
  ax.xaxis.set_label_position('top')
585
  ax.xaxis.tick_top()
586
- ax.set_ylim(y_max, y_min) # reversed Y (Depth down)
587
  ax.grid(True, linestyle=":", alpha=0.3)
588
 
589
  if i == 0:
590
  ax.set_ylabel(y_label)
591
  else:
592
- ax.tick_params(labelleft=False)
593
  ax.set_ylabel("")
594
 
595
  fig.tight_layout()
@@ -615,9 +529,10 @@ def ensure_model() -> Path|None:
615
  except Exception:
616
  return None
617
 
 
618
  mpath = ensure_model()
619
  if not mpath:
620
- st.error("Model not found. Upload models/rhob_model.joblib (or set MODEL_URL).")
621
  st.stop()
622
  try:
623
  model = load_model(str(mpath))
@@ -625,39 +540,29 @@ except Exception as e:
625
  st.error(f"Failed to load model: {e}")
626
  st.stop()
627
 
628
- # Load meta (prefer RHOB-specific)
629
  meta = {}
630
- meta_candidates = [MODELS_DIR / "rhob_meta.json", MODELS_DIR / "meta.json"]
631
- meta_path = next((p for p in meta_candidates if p.exists()), None)
632
  if meta_path:
633
  try:
634
  meta = json.loads(meta_path.read_text(encoding="utf-8"))
635
- FEATURES = meta.get("features", FEATURES)
636
- TARGET = meta.get("target", TARGET)
637
- PRED_COL = meta.get("pred_col", PRED_COL)
638
- # if training ranges were saved in meta, seed them so OOR works before any dev step
639
- if isinstance(meta.get("train_ranges"), dict) and "train_ranges" not in st.session_state:
640
- st.session_state["train_ranges"] = meta["train_ranges"]
 
641
  except Exception as e:
642
  st.warning(f"Could not parse meta file ({meta_path.name}): {e}")
643
 
644
- # Optional: version banner
645
- if STRICT_VERSION_CHECK and meta.get("versions"):
646
- import numpy as _np, sklearn as _skl
647
- mv = meta["versions"]; msg=[]
648
- if mv.get("numpy") and mv["numpy"] != _np.__version__:
649
- msg.append(f"NumPy {mv['numpy']} expected, running {_np.__version__}")
650
- if mv.get("scikit_learn") and mv["scikit_learn"] != _skl.__version__:
651
- msg.append(f"scikit-learn {mv['scikit_learn']} expected, running {_skl.__version__}")
652
- if msg:
653
- st.warning("Environment mismatch: " + " | ".join(msg))
654
-
655
  # =========================
656
  # Session state
657
  # =========================
658
  st.session_state.setdefault("app_step", "intro")
659
  st.session_state.setdefault("results", {})
660
- st.session_state.setdefault("train_ranges", st.session_state.get("train_ranges", None))
661
  st.session_state.setdefault("dev_file_name","")
662
  st.session_state.setdefault("dev_file_bytes",b"")
663
  st.session_state.setdefault("dev_file_loaded",False)
@@ -669,12 +574,11 @@ st.session_state.setdefault("show_preview_modal", False)
669
  # =========================
670
  st.sidebar.markdown(f"""
671
  <div class="centered-container">
672
- <img src="{inline_logo('logo.png')}" style="width: 200px; height: auto; object-fit: contain;">
673
  <div style='font-weight:800;font-size:1.2rem;'>{APP_NAME}</div>
674
  <div style='color:#667085;'>{TAGLINE}</div>
675
  </div>
676
- """, unsafe_allow_html=True
677
- )
678
 
679
  def sticky_header(title, message):
680
  st.markdown(
@@ -698,12 +602,12 @@ def sticky_header(title, message):
698
  # =========================
699
  if st.session_state.app_step == "intro":
700
  st.header("Welcome!")
701
- st.markdown("This software is developed by *Smart Thinking AI-Solutions Team* to estimate **RHOB (Formation Bulk Density)** from drilling data.")
702
  st.subheader("How It Works")
703
  st.markdown(
704
- "1) **Upload your data to build the case and preview the model performance.** \n"
705
- "2) Click **Run Model** to compute metrics and plots. \n"
706
- "3) **Proceed to Validation** (with actual RHOB) or **Proceed to Prediction** (no RHOB)."
707
  )
708
  if st.button("Start Showcase", type="primary"):
709
  st.session_state.app_step = "dev"; st.rerun()
@@ -713,7 +617,7 @@ if st.session_state.app_step == "intro":
713
  # =========================
714
  if st.session_state.app_step == "dev":
715
  st.sidebar.header("Case Building")
716
- up = st.sidebar.file_uploader("Upload Your Data File", type=["xlsx","xls"])
717
  if up is not None:
718
  st.session_state.dev_file_bytes = up.getvalue()
719
  st.session_state.dev_file_name = up.name
@@ -738,37 +642,49 @@ if st.session_state.app_step == "dev":
738
  elif st.session_state.dev_file_loaded:
739
  sticky_header("Case Building", "📄 **Preview uploaded data** using the sidebar button, then click **Run Model**.")
740
  else:
741
- sticky_header("Case Building", "**Upload your data to build a case, then run the model to review development performance.**")
742
 
743
  if run and st.session_state.dev_file_bytes:
744
  book = read_book_bytes(st.session_state.dev_file_bytes)
 
 
 
 
 
 
745
  sh_train = find_sheet(book, ["Train","Training","training2","train","training"])
746
  sh_test = find_sheet(book, ["Test","Testing","testing2","test","testing"])
747
  if sh_train is None or sh_test is None:
748
- st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training/training2 and Test/Testing/testing2 sheets.</div>', unsafe_allow_html=True)
749
  st.stop()
750
 
751
- tr = _normalize_columns(book[sh_train].copy(), FEATURES, TARGET)
752
- te = _normalize_columns(book[sh_test].copy(), FEATURES, TARGET)
 
 
 
753
 
754
- if not (ensure_cols(tr, FEATURES+[TARGET]) and ensure_cols(te, FEATURES+[TARGET])):
755
  st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True)
756
  st.stop()
757
 
758
- # Predict with exactly the training feature order
759
- tr[PRED_COL] = model.predict(_make_X(tr, FEATURES))
760
- te[PRED_COL] = model.predict(_make_X(te, FEATURES))
761
-
762
- st.session_state.results["Train"]=tr; st.session_state.results["Test"]=te
763
- st.session_state.results["m_train"]={
764
- "R": pearson_r(tr[TARGET], tr[PRED_COL]),
765
- "RMSE": rmse(tr[TARGET], tr[PRED_COL]),
766
- "MAPE": mape(tr[TARGET], tr[PRED_COL])
 
 
 
767
  }
768
- st.session_state.results["m_test"]={
769
- "R": pearson_r(te[TARGET], te[PRED_COL]),
770
- "RMSE": rmse(te[TARGET], te[PRED_COL]),
771
- "MAPE": mape(te[TARGET], te[PRED_COL])
772
  }
773
 
774
  tr_min = tr[FEATURES].min().to_dict(); tr_max = tr[FEATURES].max().to_dict()
@@ -777,9 +693,9 @@ if st.session_state.app_step == "dev":
777
 
778
  def _dev_block(df, m):
779
  c1,c2,c3 = st.columns(3)
780
- c1.metric("R", f"{m['R']:.3f}")
781
- c2.metric("RMSE", f"{m['RMSE']:.3f}")
782
- c3.metric("MAPE (%)", f"{m['MAPE']:.2f}")
783
  st.markdown("""
784
  <div style='text-align: left; font-size: 0.8em; color: #6b7280; margin-top: -16px; margin-bottom: 8px;'>
785
  <strong>R:</strong> Pearson Correlation Coefficient<br>
@@ -789,20 +705,22 @@ if st.session_state.app_step == "dev":
789
  """, unsafe_allow_html=True)
790
  col_track, col_cross = st.columns([2, 3], gap="large")
791
  with col_track:
792
- st.plotly_chart(track_plot(df, include_actual=True), use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
 
793
  with col_cross:
794
- st.pyplot(cross_plot_static(df[TARGET], df[PRED_COL]), use_container_width=False)
 
795
 
796
  if "Train" in st.session_state.results or "Test" in st.session_state.results:
797
  tab1, tab2 = st.tabs(["Training", "Testing"])
798
- if "Train" in st.session_state.results:
799
  with tab1: _dev_block(st.session_state.results["Train"], st.session_state.results["m_train"])
800
  if "Test" in st.session_state.results:
801
- with tab2: _dev_block(st.session_state.results["Test"], st.session_state.results["m_test"])
802
  render_export_button(phase_key="dev")
803
 
804
  # =========================
805
- # VALIDATION (with actual RHOB)
806
  # =========================
807
  if st.session_state.app_step == "validate":
808
  st.sidebar.header("Validate the Model")
@@ -818,42 +736,48 @@ if st.session_state.app_step == "validate":
818
  if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
819
  if st.sidebar.button("Proceed to Prediction ▶", use_container_width=True): st.session_state.app_step="predict"; st.rerun()
820
 
821
- sticky_header("Validate the Model", "Upload a dataset with the same **features** and **RHOB** to evaluate performance.")
822
 
823
  if go_btn and up is not None:
824
  book = read_book_bytes(up.getvalue())
825
- name = find_sheet(book, ["Validation","Validate","validation2","Val","val"]) or list(book.keys())[0]
826
- df = _normalize_columns(book[name].copy(), FEATURES, TARGET)
827
- if not ensure_cols(df, FEATURES+[TARGET]):
 
 
 
 
828
  st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
829
- df[PRED_COL] = model.predict(_make_X(df, FEATURES))
830
- st.session_state.results["Validate"]=df
831
 
 
 
 
 
 
832
  ranges = st.session_state.train_ranges; oor_pct = 0.0; tbl=None
833
  if ranges:
834
- any_viol = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in FEATURES}).any(axis=1)
835
- oor_pct = float(any_viol.mean()*100.0)
836
  if any_viol.any():
837
  tbl = df.loc[any_viol, FEATURES].copy()
838
  for c in FEATURES:
839
  if pd.api.types.is_numeric_dtype(tbl[c]): tbl[c] = tbl[c].round(3)
840
- tbl["Violations"] = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in FEATURES}).loc[any_viol].apply(
841
  lambda r:", ".join([c for c,v in r.items() if v]), axis=1
842
  )
843
- st.session_state.results["m_val"]={
844
- "R": pearson_r(df[TARGET], df[PRED_COL]),
845
- "RMSE": rmse(df[TARGET], df[PRED_COL]),
846
- "MAPE": mape(df[TARGET], df[PRED_COL])
 
847
  }
848
- st.session_state.results["sv_val"]={"n":len(df), "pred_min":float(df[PRED_COL].min()), "pred_max":float(df[PRED_COL].max()), "oor":oor_pct}
849
- st.session_state.results["oor_tbl"]=tbl
850
 
851
  if "Validate" in st.session_state.results:
852
  m = st.session_state.results["m_val"]
853
  c1,c2,c3 = st.columns(3)
854
- c1.metric("R", f"{m['R']:.3f}")
855
- c2.metric("RMSE", f"{m['RMSE']:.3f}")
856
- c3.metric("MAPE (%)", f"{m['MAPE']:.2f}")
857
  st.markdown("""
858
  <div style='text-align: left; font-size: 0.8em; color: #6b7280; margin-top: -16px; margin-bottom: 8px;'>
859
  <strong>R:</strong> Pearson Correlation Coefficient<br>
@@ -867,7 +791,8 @@ if st.session_state.app_step == "validate":
867
  st.plotly_chart(track_plot(st.session_state.results["Validate"], include_actual=True),
868
  use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
869
  with col_cross:
870
- st.pyplot(cross_plot_static(st.session_state.results["Validate"][TARGET],
 
871
  st.session_state.results["Validate"][PRED_COL]),
872
  use_container_width=False)
873
 
@@ -880,10 +805,10 @@ if st.session_state.app_step == "validate":
880
  df_centered_rounded(st.session_state.results["oor_tbl"])
881
 
882
  # =========================
883
- # PREDICTION (no actual RHOB)
884
  # =========================
885
  if st.session_state.app_step == "predict":
886
- st.sidebar.header("Prediction (No Actual RHOB)")
887
  up = st.sidebar.file_uploader("Upload Prediction Excel", type=["xlsx","xls"])
888
  if up is not None:
889
  book = read_book_bytes(up.getvalue())
@@ -895,21 +820,22 @@ if st.session_state.app_step == "predict":
895
  go_btn = st.sidebar.button("Predict", type="primary", use_container_width=True)
896
  if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
897
 
898
- sticky_header("Prediction", "Upload a dataset with the feature columns (no **RHOB**).")
899
 
900
  if go_btn and up is not None:
901
  book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
902
- df = _normalize_columns(book[name].copy(), FEATURES, TARGET)
903
- if not ensure_cols(df, FEATURES):
904
  st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
905
- df[PRED_COL] = model.predict(_make_X(df, FEATURES))
906
- st.session_state.results["PredictOnly"]=df
 
907
 
908
  ranges = st.session_state.train_ranges; oor_pct = 0.0
909
  if ranges:
910
- any_viol = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in FEATURES}).any(axis=1)
911
- oor_pct = float(any_viol.mean()*100.0)
912
- st.session_state.results["sv_pred"]={
913
  "n":len(df),
914
  "pred_min":float(df[PRED_COL].min()),
915
  "pred_max":float(df[PRED_COL].max()),
@@ -920,7 +846,6 @@ if st.session_state.app_step == "predict":
920
 
921
  if "PredictOnly" in st.session_state.results:
922
  df = st.session_state.results["PredictOnly"]; sv = st.session_state.results["sv_pred"]
923
-
924
  col_left, col_right = st.columns([2,3], gap="large")
925
  with col_left:
926
  table = pd.DataFrame({
@@ -934,7 +859,6 @@ if st.session_state.app_step == "predict":
934
  with col_right:
935
  st.plotly_chart(track_plot(df, include_actual=False),
936
  use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
937
-
938
  render_export_button(phase_key="predict")
939
 
940
  # =========================
@@ -955,7 +879,7 @@ if st.session_state.show_preview_modal:
955
  tabs = st.tabs(names)
956
  for t, name in zip(tabs, names):
957
  with t:
958
- df = _normalize_columns(book_to_preview[name], FEATURES, TARGET)
959
  t1, t2 = st.tabs(["Tracks", "Summary"])
960
  with t1:
961
  st.pyplot(preview_tracks(df, FEATURES), use_container_width=True)
 
1
+ # app.py — ST_Min_Horizontal_Stress (σ_hmin)
2
+ # Mirrors RHOB/TS apps: same flow, preview tracks, export picker, password gate
3
 
4
  import io, json, os, base64, math
5
  from pathlib import Path
 
8
  import numpy as np
9
  import joblib
10
  from datetime import datetime
11
+ import os
12
+ from functools import lru_cache
13
+ from huggingface_hub import hf_hub_download
14
 
15
+ # Matplotlib for static previews & cross-plot
16
  import matplotlib
17
  matplotlib.use("Agg")
18
  import matplotlib.pyplot as plt
 
21
  import plotly.graph_objects as go
22
  from sklearn.metrics import mean_squared_error
23
 
24
+ REPO_ID = "Smart-Thinking/minstress-model" # <-- update this
25
+ FILENAME = "minstress_model.joblib"
26
+
27
+ @lru_cache
28
+ def load_model():
29
+ # if private, we'll add a token in Step 5 and pass it here
30
+ token = os.environ.get("HF_TOKEN", None)
31
+ path = hf_hub_download(
32
+ repo_id=REPO_ID,
33
+ filename=FILENAME,
34
+ repo_type="model",
35
+ token=token
36
+ )
37
+ return joblib.load(path)
38
+
39
+ # get your model once and reuse it
40
+ model = load_model()
41
  # =========================
42
+ # Constants / Defaults
43
  # =========================
44
+ APP_NAME = "ST_Min_Horizontal_Stress"
45
+ TAGLINE = "Real-Time Minimum Horizontal Stress Prediction"
46
 
47
+ # Defaults — can be overridden by meta JSON
48
+ FEATURES = ["WOB (klbf)", "Torque (kft.lbf)", "SPP (psi)", "RPM (1/min)", "ROP (ft/h)", "Flow Rate (gpm)"]
49
+ TARGET = "σhmin (MPa)" # actual column in sheets
50
+ PRED_COL = "σhmin_Pred" # prediction column name to create
51
+ ACTUAL_COL = None # sometimes target is also the actual; you can set a dedicated actual column
52
+ TRANSFORM = "none" # "none" | "log10" | "ln"
53
+ UNITS = "MPa" # only for labels
 
 
 
 
54
 
55
  MODELS_DIR = Path("models")
56
+ DEFAULT_MODEL = MODELS_DIR / "minstress_model.joblib"
57
+ META_CANDIDATES = [
58
+ MODELS_DIR / "minstress_meta.json",
59
+ MODELS_DIR / "meta.json"
60
+ ]
61
  MODEL_FALLBACKS = [MODELS_DIR / "model.joblib", MODELS_DIR / "model.pkl"]
 
62
 
63
+ # Colors
64
+ COLORS = {"pred": "#1f77b4", "actual": "#f2b702", "ref": "#5a5a5a"}
65
 
66
  # ---- Plot sizing ----
67
  CROSS_W = 350
 
78
  st.markdown("""
79
  <style>
80
  .brand-logo { width: 200px; height: auto; object-fit: contain; }
81
+ .centered-container { display:flex; flex-direction:column; align-items:center; text-align:center; }
82
+ .st-message-box { background:#f0f2f6; color:#333; padding:10px; border-radius:10px; border:1px solid #e6e9ef; }
83
+ .st-message-box.st-success { background:#d4edda; color:#155724; border-color:#c3e6cb; }
84
+ .st-message-box.st-warning { background:#fff3cd; color:#856404; border-color:#ffeeba; }
85
+ .st-message-box.st-error { background:#f8d7da; color:#721c24; border-color:#f5c6cb; }
86
  .main .block-container { overflow: unset !important; }
87
  div[data-testid="stVerticalBlock"] { overflow: unset !important; }
88
  div[data-testid="stExpander"] > details > summary {
 
115
  required = st.secrets.get("APP_PASSWORD", "")
116
  except Exception:
117
  required = os.environ.get("APP_PASSWORD", "")
 
118
  if not required:
119
  st.warning("Set APP_PASSWORD in Secrets (or environment) and restart.")
120
  st.stop()
 
121
  if st.session_state.get("auth_ok", False):
122
  return
 
123
  st.sidebar.markdown(f"""
124
  <div class="centered-container">
125
+ <img src="{inline_logo('logo.png')}" class="brand-logo">
126
  <div style='font-weight:800;font-size:1.2rem; margin-top: 10px;'>{APP_NAME}</div>
127
  <div style='color:#667085;'>Smart Thinking • Secure Access</div>
128
  </div>
129
+ """, unsafe_allow_html=True)
 
130
  pwd = st.sidebar.text_input("Access key", type="password", placeholder="••••••••")
131
  if st.sidebar.button("Unlock", type="primary"):
132
  if pwd == required:
 
144
  def rmse(y_true, y_pred) -> float:
145
  return float(np.sqrt(mean_squared_error(y_true, y_pred)))
146
 
147
+ def mape(y_true, y_pred, eps: float = 1e-9) -> float:
148
+ a = np.asarray(y_true, dtype=float)
149
+ p = np.asarray(y_pred, dtype=float)
150
+ den = np.maximum(np.abs(a), eps)
151
+ return float(np.mean(np.abs((a - p) / den)) * 100.0)
152
+
153
  def pearson_r(y_true, y_pred) -> float:
154
  a = np.asarray(y_true, dtype=float)
155
  p = np.asarray(y_pred, dtype=float)
 
157
  if np.all(a == a[0]) or np.all(p == p[0]): return float("nan")
158
  return float(np.corrcoef(a, p)[0, 1])
159
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  @st.cache_resource(show_spinner=False)
161
  def load_model(model_path: str):
162
  return joblib.load(model_path)
 
170
  def read_book_bytes(b: bytes):
171
  return parse_excel(b) if b else {}
172
 
173
+ def _excel_engine() -> str:
174
+ try:
175
+ import xlsxwriter # noqa: F401
176
+ return "xlsxwriter"
177
+ except Exception:
178
+ return "openpyxl"
179
+
180
+ def _excel_safe_name(name: str) -> str:
181
+ bad = '[]:*?/\\'
182
+ safe = ''.join('_' if ch in bad else ch for ch in str(name))
183
+ return safe[:31]
184
+
185
+ def _round_numeric(df: pd.DataFrame, ndigits: int = 3) -> pd.DataFrame:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  out = df.copy()
187
+ for c in out.columns:
188
+ if pd.api.types.is_float_dtype(out[c]) or pd.api.types.is_integer_dtype(out[c]):
189
+ out[c] = pd.to_numeric(out[c], errors="coerce").round(ndigits)
190
+ return out
191
+
192
+ def df_centered_rounded(df: pd.DataFrame, hide_index=True):
193
+ out = df.copy()
194
+ numcols = out.select_dtypes(include=[np.number]).columns
195
+ styler = (
196
+ out.style
197
+ .format({c: "{:.3f}" for c in numcols})
198
+ .set_properties(**{"text-align": "center"})
199
+ .set_table_styles(TABLE_CENTER_CSS)
200
+ )
201
+ st.dataframe(styler, use_container_width=True, hide_index=hide_index)
202
 
203
  def ensure_cols(df: pd.DataFrame, cols: list[str]) -> bool:
204
  miss = [c for c in cols if c not in df.columns]
 
207
  return False
208
  return True
209
 
 
 
 
 
 
 
210
  def _nice_tick0(xmin: float, step: float = 0.1) -> float:
211
  return step * math.floor(xmin / step) if np.isfinite(xmin) else xmin
212
 
213
+ # ---------- Aliasing / Normalization ----------
214
+ def _normalize_columns(df: pd.DataFrame, canonical_features: list[str], target_name: str, aliases: dict|None) -> pd.DataFrame:
215
  out = df.copy()
216
+ out.columns = [str(c).strip().replace(" ,", ",").replace(", ", ", ").replace(" ", " ") for c in out.columns]
217
+ if not aliases:
218
+ return out
219
+ # build mapping for any alias -> canonical
220
+ mapping = {}
221
+ for can, alist in aliases.items():
222
+ for a in alist:
223
+ if a in out.columns and can != a:
224
+ mapping[a] = can
225
+ # also allow direct canonical name
226
+ return out.rename(columns=mapping)
227
+
228
+ # ---------- Transform helpers ----------
229
+ def _inv_transform(x: np.ndarray, transform: str) -> np.ndarray:
230
+ t = (transform or "none").lower()
231
+ if t in ("log10", "log_10", "log10()"): return np.power(10.0, x)
232
+ if t in ("ln", "log", "loge", "log_e", "natural"): return np.exp(x)
233
+ return x
234
 
235
  # ---------- Build X exactly as trained ----------
236
  def _make_X(df: pd.DataFrame, features: list[str]) -> pd.DataFrame:
 
 
 
 
237
  X = df.reindex(columns=features, copy=False)
238
  for c in X.columns:
239
  X[c] = pd.to_numeric(X[c], errors="coerce")
240
  return X
241
 
242
+ # =========================
243
+ # Export helpers
244
+ # =========================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  def _summary_table(df: pd.DataFrame, cols: list[str]) -> pd.DataFrame:
246
  cols = [c for c in cols if c in df.columns]
247
+ if not cols: return pd.DataFrame()
 
248
  tbl = (df[cols]
249
+ .agg(['min','max','mean','std'])
250
+ .T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"})
251
+ .reset_index(names="Field"))
252
  return _round_numeric(tbl, 3)
253
 
254
  def _train_ranges_df(ranges: dict[str, tuple[float, float]]) -> pd.DataFrame:
255
+ if not ranges: return pd.DataFrame()
 
256
  df = pd.DataFrame(ranges).T.reset_index()
257
  df.columns = ["Feature", "Min", "Max"]
258
  return _round_numeric(df, 3)
 
269
  ws.set_column(i, i, max(min_w, min(max_len + 2, max_w)))
270
  ws.freeze_panes(1, 0)
271
 
 
 
 
 
 
272
  def _available_sections() -> list[str]:
273
  res = st.session_state.get("results", {})
274
  sections = []
275
+ if "Train" in res: sections += ["Training","Training_Metrics","Training_Summary"]
276
+ if "Test" in res: sections += ["Testing","Testing_Metrics","Testing_Summary"]
277
+ if "Validate" in res: sections += ["Validation","Validation_Metrics","Validation_Summary","Validation_OOR"]
278
+ if "PredictOnly" in res: sections += ["Prediction","Prediction_Summary"]
279
  if st.session_state.get("train_ranges"): sections += ["Training_Ranges"]
280
  sections += ["Info"]
281
  return sections
 
283
  def build_export_workbook(selected: list[str], ndigits: int = 3, do_autofit: bool = True) -> tuple[bytes|None, str|None, list[str]]:
284
  res = st.session_state.get("results", {})
285
  if not res: return None, None, []
 
286
  sheets: dict[str, pd.DataFrame] = {}
287
  order: list[str] = []
288
 
289
+ def _add(name: str, df: pd.DataFrame):
290
+ if df is None or (isinstance(df, pd.DataFrame) and df.empty): return
291
+ sheets[name] = _round_numeric(df, ndigits); order.append(name)
292
+
293
+ if "Training" in selected and "Train" in res: _add("Training", res["Train"])
294
+ if "Training_Metrics" in selected and res.get("m_train"): _add("Training_Metrics", pd.DataFrame([res["m_train"]]))
295
  if "Training_Summary" in selected and "Train" in res:
296
  tr_cols = FEATURES + [c for c in [TARGET, PRED_COL] if c in res["Train"].columns]
297
+ _add("Training_Summary", _summary_table(res["Train"], tr_cols))
298
 
299
+ if "Testing" in selected and "Test" in res: _add("Testing", res["Test"])
300
+ if "Testing_Metrics" in selected and res.get("m_test"): _add("Testing_Metrics", pd.DataFrame([res["m_test"]]))
 
 
301
  if "Testing_Summary" in selected and "Test" in res:
302
  te_cols = FEATURES + [c for c in [TARGET, PRED_COL] if c in res["Test"].columns]
303
+ _add("Testing_Summary", _summary_table(res["Test"], te_cols))
304
+
305
+ if "Validation" in selected and "Validate" in res: _add("Validation", res["Validate"])
306
+ if "Validation_Metrics" in selected and res.get("m_val"): _add("Validation_Metrics", pd.DataFrame([res["m_val"]]))
307
+ if "Validation_Summary" in selected and res.get("sv_val"): _add("Validation_Summary", pd.DataFrame([res["sv_val"]]))
 
 
 
308
  if "Validation_OOR" in selected and isinstance(res.get("oor_tbl"), pd.DataFrame) and not res["oor_tbl"].empty:
309
+ _add("Validation_OOR", res["oor_tbl"].reset_index(drop=True))
310
 
311
+ if "Prediction" in selected and "PredictOnly" in res: _add("Prediction", res["PredictOnly"])
312
+ if "Prediction_Summary" in selected and res.get("sv_pred"): _add("Prediction_Summary", pd.DataFrame([res["sv_pred"]]))
 
 
313
 
314
  if "Training_Ranges" in selected and st.session_state.get("train_ranges"):
315
+ _add("Training_Ranges", _train_ranges_df(st.session_state["train_ranges"]))
 
316
 
317
  if "Info" in selected:
318
  info = pd.DataFrame([
 
323
  {"Key": "Features", "Value": ", ".join(FEATURES)},
324
  {"Key": "ExportedAt", "Value": datetime.now().strftime("%Y-%m-%d %H:%M:%S")},
325
  ])
326
+ _add("Info", info)
327
 
328
  if not order: return None, None, []
329
 
 
331
  engine = _excel_engine()
332
  with pd.ExcelWriter(bio, engine=engine) as writer:
333
  for name in order:
334
+ df = sheets[name]; sheet = _excel_safe_name(name)
 
335
  df.to_excel(writer, sheet_name=sheet, index=False)
336
+ if do_autofit: _excel_autofit(writer, sheet, df)
 
337
  bio.seek(0)
338
+ fname = f"MinStress_Export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
339
  return bio.getvalue(), fname, order
340
 
 
341
  def render_export_button(phase_key: str) -> None:
342
  res = st.session_state.get("results", {})
343
  if not res: return
344
  st.divider()
345
  st.markdown("### Export to Excel")
 
346
  options = _available_sections()
347
  selected_sheets = st.multiselect(
348
  "Sheets to include",
349
  options=options,
350
  default=[],
351
  placeholder="Choose option(s)",
352
+ help="Pick the sheets you want in the Excel export.",
353
  key=f"sheets_{phase_key}",
354
  )
 
355
  if not selected_sheets:
356
+ st.caption("Select one or more sheets above to enable export.")
357
+ st.download_button("⬇️ Export Excel", data=b"", file_name="MinStress_Export.xlsx",
358
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
359
+ disabled=True, key=f"download_{phase_key}")
 
 
 
 
 
360
  return
 
361
  data, fname, names = build_export_workbook(selected=selected_sheets, ndigits=3, do_autofit=True)
362
+ if names: st.caption("Will include: " + ", ".join(names))
363
+ st.download_button("⬇️ Export Excel", data=(data or b""), file_name=(fname or "MinStress_Export.xlsx"),
364
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
365
+ disabled=(data is None), key=f"download_{phase_key}")
 
 
 
 
 
 
366
 
367
  # =========================
368
  # Cross plot (Matplotlib)
369
  # =========================
370
+ def cross_plot_static(actual, pred):
371
  a = pd.Series(actual, dtype=float)
372
+ p = pd.Series(pred, dtype=float)
373
+ lo = float(min(a.min(), p.min())); hi = float(max(a.max(), p.max()))
 
 
374
  pad = 0.03 * (hi - lo if hi > lo else 1.0)
375
  lo2, hi2 = lo - pad, hi + pad
 
376
  ticks = np.linspace(lo2, hi2, 5)
377
 
378
  dpi = 110
379
  fig, ax = plt.subplots(figsize=(CROSS_W / dpi, CROSS_H / dpi), dpi=dpi, constrained_layout=False)
 
380
  ax.scatter(a, p, s=14, c=COLORS["pred"], alpha=0.9, linewidths=0)
381
  ax.plot([lo2, hi2], [lo2, hi2], linestyle="--", linewidth=1.2, color=COLORS["ref"])
382
 
383
+ ax.set_xlim(lo2, hi2); ax.set_ylim(lo2, hi2)
384
+ ax.set_xticks(ticks); ax.set_yticks(ticks)
 
385
  ax.set_aspect("equal", adjustable="box")
386
 
387
  fmt = FuncFormatter(lambda x, _: f"{x:.2f}")
388
  ax.xaxis.set_major_formatter(fmt); ax.yaxis.set_major_formatter(fmt)
389
 
390
+ ax.set_xlabel(f"Actual Min Stress ({UNITS})", fontweight="bold", fontsize=10, color="black")
391
+ ax.set_ylabel(f"Predicted Min Stress ({UNITS})", fontweight="bold", fontsize=10, color="black")
392
  ax.tick_params(labelsize=6, colors="black")
393
 
394
  ax.grid(True, linestyle=":", alpha=0.3)
 
404
  def track_plot(df, include_actual=True):
405
  depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
406
  if depth_col is not None:
407
+ y = pd.to_numeric(df[depth_col], errors="coerce"); ylab = depth_col
408
+ y_range = [float(np.nanmax(y)), float(np.nanmin(y))] # reversed
409
  else:
410
  y = pd.Series(np.arange(1, len(df) + 1)); ylab = "Point Index"
411
  y_range = [float(y.max()), float(y.min())]
412
 
413
  x_series = pd.Series(df.get(PRED_COL, pd.Series(dtype=float))).astype(float)
414
+ if include_actual and (ACTUAL_COL or TARGET) in df.columns:
415
+ act_col = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in df.columns) else TARGET
416
+ x_series = pd.concat([x_series, pd.Series(df[act_col]).astype(float)], ignore_index=True)
417
  x_lo, x_hi = float(x_series.min()), float(x_series.max())
418
  x_pad = 0.03 * (x_hi - x_lo if x_hi > x_lo else 1.0)
419
  xmin, xmax = x_lo - x_pad, x_hi + x_pad
 
425
  x=df[PRED_COL], y=y, mode="lines",
426
  line=dict(color=COLORS["pred"], width=1.8),
427
  name=PRED_COL,
428
+ hovertemplate=f"{PRED_COL}: "+"%{x:.2f}<br>"+ylab+": %{y}<extra></extra>"
429
  ))
430
+ if include_actual and ((ACTUAL_COL and ACTUAL_COL in df.columns) or TARGET in df.columns):
431
+ act_col = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in df.columns) else TARGET
432
  fig.add_trace(go.Scatter(
433
+ x=df[act_col], y=y, mode="lines",
434
  line=dict(color=COLORS["actual"], width=2.0, dash="dot"),
435
+ name=f"{act_col} (actual)",
436
+ hovertemplate=f"{act_col}: "+"%{x:.2f}<br>"+ylab+": %{y}<extra></extra>"
437
  ))
438
 
439
  fig.update_layout(
 
445
  bgcolor="rgba(255,255,255,0.75)", bordercolor="#ccc", borderwidth=1),
446
  legend_title_text=""
447
  )
 
448
  fig.update_xaxes(
449
+ title_text=f"Min Stress ({UNITS})",
450
  title_font=dict(size=20, family=BOLD_FONT, color="#000"),
451
  tickfont=dict(size=15, family=BOLD_FONT, color="#000"),
452
+ side="top", range=[xmin, xmax],
453
+ ticks="outside", tickformat=",.2f", tickmode="auto", tick0=tick0,
454
+ showline=True, linewidth=1.2, linecolor="#444", mirror=True,
455
+ showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True
 
 
 
 
 
 
 
 
 
456
  )
 
457
  fig.update_yaxes(
458
  title_text=ylab,
459
  title_font=dict(size=20, family=BOLD_FONT, color="#000"),
 
464
  )
465
  return fig
466
 
467
+ # ---------- Preview (matplotlib) — colorful tracks, y ticks only on left ----------
468
  def preview_tracks(df: pd.DataFrame, cols: list[str]):
 
 
 
 
 
 
469
  cols = [c for c in cols if c in df.columns]
470
  n = len(cols)
471
  if n == 0:
 
474
  ax.axis("off")
475
  return fig
476
 
 
477
  depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
478
  if depth_col is not None:
479
  idx = pd.to_numeric(df[depth_col], errors="coerce")
480
  y_label = depth_col
481
+ y_min, y_max = float(np.nanmin(idx)), float(np.nanmax(idx))
482
  else:
483
  idx = pd.Series(np.arange(1, len(df) + 1))
484
  y_label = "Point Index"
485
+ y_min, y_max = float(idx.min()), float(idx.max())
486
 
 
 
 
487
  cmap = plt.get_cmap("tab20")
488
  col_colors = {col: cmap(i % cmap.N) for i, col in enumerate(cols)}
489
 
490
+ fig, axes = plt.subplots(1, n, figsize=(2.4 * n, 7.0), sharey=True, dpi=100)
491
  if n == 1:
492
  axes = [axes]
493
 
494
  for i, (ax, col) in enumerate(zip(axes, cols)):
495
  x = pd.to_numeric(df[col], errors="coerce")
496
+ ax.plot(x, idx, '-', lw=1.6, color=col_colors[col])
497
  ax.set_xlabel(col)
498
  ax.xaxis.set_label_position('top')
499
  ax.xaxis.tick_top()
500
+ ax.set_ylim(y_max, y_min) # reversed depth down
501
  ax.grid(True, linestyle=":", alpha=0.3)
502
 
503
  if i == 0:
504
  ax.set_ylabel(y_label)
505
  else:
506
+ ax.tick_params(labelleft=False) # hide ticks on all but left-most
507
  ax.set_ylabel("")
508
 
509
  fig.tight_layout()
 
529
  except Exception:
530
  return None
531
 
532
+ # load model
533
  mpath = ensure_model()
534
  if not mpath:
535
+ st.error("Model not found. Upload models/minstress_model.joblib (or set MODEL_URL).")
536
  st.stop()
537
  try:
538
  model = load_model(str(mpath))
 
540
  st.error(f"Failed to load model: {e}")
541
  st.stop()
542
 
543
+ # meta: features / target / pred_col / aliases / transform / units / actual_col
544
  meta = {}
545
+ meta_path = next((p for p in META_CANDIDATES if p.exists()), None)
546
+ ALIASES = None
547
  if meta_path:
548
  try:
549
  meta = json.loads(meta_path.read_text(encoding="utf-8"))
550
+ FEATURES = meta.get("features", FEATURES)
551
+ TARGET = meta.get("target", TARGET)
552
+ PRED_COL = meta.get("pred_col", PRED_COL)
553
+ ACTUAL_COL = meta.get("actual_col", ACTUAL_COL)
554
+ TRANSFORM = meta.get("transform", TRANSFORM)
555
+ UNITS = meta.get("units", UNITS)
556
+ ALIASES = meta.get("feature_aliases") # {"RPM (1/min)": ["RPM", "RPM(1/min)"], ...}
557
  except Exception as e:
558
  st.warning(f"Could not parse meta file ({meta_path.name}): {e}")
559
 
 
 
 
 
 
 
 
 
 
 
 
560
  # =========================
561
  # Session state
562
  # =========================
563
  st.session_state.setdefault("app_step", "intro")
564
  st.session_state.setdefault("results", {})
565
+ st.session_state.setdefault("train_ranges", None)
566
  st.session_state.setdefault("dev_file_name","")
567
  st.session_state.setdefault("dev_file_bytes",b"")
568
  st.session_state.setdefault("dev_file_loaded",False)
 
574
  # =========================
575
  st.sidebar.markdown(f"""
576
  <div class="centered-container">
577
+ <img src="{inline_logo('logo.png')}" class="brand-logo">
578
  <div style='font-weight:800;font-size:1.2rem;'>{APP_NAME}</div>
579
  <div style='color:#667085;'>{TAGLINE}</div>
580
  </div>
581
+ """, unsafe_allow_html=True)
 
582
 
583
  def sticky_header(title, message):
584
  st.markdown(
 
602
  # =========================
603
  if st.session_state.app_step == "intro":
604
  st.header("Welcome!")
605
+ st.markdown(f"This software is developed by *Smart Thinking AI-Solutions Team* to estimate **Minimum Horizontal Stress** ({UNITS}) from drilling/offset data.")
606
  st.subheader("How It Works")
607
  st.markdown(
608
+ "1) **Upload your data to build the case and preview the model performance.** \n"
609
+ "2) Click **Run Model** to compute metrics and plots. \n"
610
+ "3) **Proceed to Validation** (with actual stress) or **Proceed to Prediction** (no actual)."
611
  )
612
  if st.button("Start Showcase", type="primary"):
613
  st.session_state.app_step = "dev"; st.rerun()
 
617
  # =========================
618
  if st.session_state.app_step == "dev":
619
  st.sidebar.header("Case Building")
620
+ up = st.sidebar.file_uploader("Upload Your Data File (Train/Test sheets)", type=["xlsx","xls"])
621
  if up is not None:
622
  st.session_state.dev_file_bytes = up.getvalue()
623
  st.session_state.dev_file_name = up.name
 
642
  elif st.session_state.dev_file_loaded:
643
  sticky_header("Case Building", "📄 **Preview uploaded data** using the sidebar button, then click **Run Model**.")
644
  else:
645
+ sticky_header("Case Building", "**Upload your data to build a case, then run the model to review performance.**")
646
 
647
  if run and st.session_state.dev_file_bytes:
648
  book = read_book_bytes(st.session_state.dev_file_bytes)
649
+ # Expect Train/Test sheets already prepared (no random split)
650
+ def find_sheet(book, names):
651
+ low2orig = {k.lower(): k for k in book.keys()}
652
+ for nm in names:
653
+ if nm.lower() in low2orig: return low2orig[nm.lower()]
654
+ return None
655
  sh_train = find_sheet(book, ["Train","Training","training2","train","training"])
656
  sh_test = find_sheet(book, ["Test","Testing","testing2","test","testing"])
657
  if sh_train is None or sh_test is None:
658
+ st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training and Test/Testing sheets.</div>', unsafe_allow_html=True)
659
  st.stop()
660
 
661
+ tr0 = _normalize_columns(book[sh_train].copy(), FEATURES, TARGET, ALIASES)
662
+ te0 = _normalize_columns(book[sh_test].copy(), FEATURES, TARGET, ALIASES)
663
+
664
+ # Determine actual column: ACTUAL_COL (preferred) else TARGET
665
+ actual_col = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in tr0.columns) else TARGET
666
 
667
+ if not (ensure_cols(tr0, FEATURES+[actual_col]) and ensure_cols(te0, FEATURES+[actual_col])):
668
  st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True)
669
  st.stop()
670
 
671
+ # Predict using the trained feature order
672
+ tr = tr0.copy(); te = te0.copy()
673
+ tr[PRED_COL] = _inv_transform(model.predict(_make_X(tr0, FEATURES)), TRANSFORM)
674
+ te[PRED_COL] = _inv_transform(model.predict(_make_X(te0, FEATURES)), TRANSFORM)
675
+
676
+ # Metrics
677
+ st.session_state.results["Train"] = tr
678
+ st.session_state.results["Test"] = te
679
+ st.session_state.results["m_train"] = {
680
+ "R": pearson_r(tr[actual_col], tr[PRED_COL]),
681
+ "RMSE": rmse(tr[actual_col], tr[PRED_COL]),
682
+ "MAPE%": mape(tr[actual_col], tr[PRED_COL]),
683
  }
684
+ st.session_state.results["m_test"] = {
685
+ "R": pearson_r(te[actual_col], te[PRED_COL]),
686
+ "RMSE": rmse(te[actual_col], te[PRED_COL]),
687
+ "MAPE%": mape(te[actual_col], te[PRED_COL]),
688
  }
689
 
690
  tr_min = tr[FEATURES].min().to_dict(); tr_max = tr[FEATURES].max().to_dict()
 
693
 
694
  def _dev_block(df, m):
695
  c1,c2,c3 = st.columns(3)
696
+ c1.metric("R", f"{m['R']:.3f}")
697
+ c2.metric("RMSE", f"{m['RMSE']:.2f}")
698
+ c3.metric("MAPE%", f"{m['MAPE%']:.2f}")
699
  st.markdown("""
700
  <div style='text-align: left; font-size: 0.8em; color: #6b7280; margin-top: -16px; margin-bottom: 8px;'>
701
  <strong>R:</strong> Pearson Correlation Coefficient<br>
 
705
  """, unsafe_allow_html=True)
706
  col_track, col_cross = st.columns([2, 3], gap="large")
707
  with col_track:
708
+ st.plotly_chart(track_plot(df, include_actual=True),
709
+ use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
710
  with col_cross:
711
+ act_col = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in df.columns) else TARGET
712
+ st.pyplot(cross_plot_static(df[act_col], df[PRED_COL]), use_container_width=False)
713
 
714
  if "Train" in st.session_state.results or "Test" in st.session_state.results:
715
  tab1, tab2 = st.tabs(["Training", "Testing"])
716
+ if "Train" in st.session_state.results:
717
  with tab1: _dev_block(st.session_state.results["Train"], st.session_state.results["m_train"])
718
  if "Test" in st.session_state.results:
719
+ with tab2: _dev_block(st.session_state.results["Test"], st.session_state.results["m_test"])
720
  render_export_button(phase_key="dev")
721
 
722
  # =========================
723
+ # VALIDATION (with actual)
724
  # =========================
725
  if st.session_state.app_step == "validate":
726
  st.sidebar.header("Validate the Model")
 
736
  if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
737
  if st.sidebar.button("Proceed to Prediction ▶", use_container_width=True): st.session_state.app_step="predict"; st.rerun()
738
 
739
+ sticky_header("Validate the Model", "Upload a dataset with the same **features** and **actual stress** to evaluate performance.")
740
 
741
  if go_btn and up is not None:
742
  book = read_book_bytes(up.getvalue())
743
+ # choose sheet named Validation/Validate/Val or first
744
+ names = list(book.keys())
745
+ name = next((s for s in names if s.lower() in ("validation","validate","validation2","val","val2")), names[0])
746
+ df0 = _normalize_columns(book[name].copy(), FEATURES, TARGET, ALIASES)
747
+
748
+ act_col = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in df0.columns) else TARGET
749
+ if not ensure_cols(df0, FEATURES+[act_col]):
750
  st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
 
 
751
 
752
+ df = df0.copy()
753
+ df[PRED_COL] = _inv_transform(model.predict(_make_X(df0, FEATURES)), TRANSFORM)
754
+ st.session_state.results["Validate"] = df
755
+
756
+ # OOR check vs training ranges
757
  ranges = st.session_state.train_ranges; oor_pct = 0.0; tbl=None
758
  if ranges:
759
+ any_viol = pd.DataFrame({f:(df[f] < ranges[f][0]) | (df[f] > ranges[f][1]) for f in FEATURES}).any(axis=1)
760
+ oor_pct = float(any_viol.mean() * 100.0)
761
  if any_viol.any():
762
  tbl = df.loc[any_viol, FEATURES].copy()
763
  for c in FEATURES:
764
  if pd.api.types.is_numeric_dtype(tbl[c]): tbl[c] = tbl[c].round(3)
765
+ tbl["Violations"] = pd.DataFrame({f:(df[f] < ranges[f][0]) | (df[f] > ranges[f][1]) for f in FEATURES}).loc[any_viol].apply(
766
  lambda r:", ".join([c for c,v in r.items() if v]), axis=1
767
  )
768
+
769
+ st.session_state.results["m_val"] = {
770
+ "R": pearson_r(df[act_col], df[PRED_COL]),
771
+ "RMSE": rmse(df[act_col], df[PRED_COL]),
772
+ "MAPE%": mape(df[act_col], df[PRED_COL]),
773
  }
774
+ st.session_state.results["sv_val"] = {"n":len(df), "pred_min":float(df[PRED_COL].min()), "pred_max":float(df[PRED_COL].max()), "oor":oor_pct}
775
+ st.session_state.results["oor_tbl"] = tbl
776
 
777
  if "Validate" in st.session_state.results:
778
  m = st.session_state.results["m_val"]
779
  c1,c2,c3 = st.columns(3)
780
+ c1.metric("R", f"{m['R']:.3f}"); c2.metric("RMSE", f"{m['RMSE']:.2f}"); c3.metric("MAPE%", f"{m['MAPE%']:.2f}")
 
 
781
  st.markdown("""
782
  <div style='text-align: left; font-size: 0.8em; color: #6b7280; margin-top: -16px; margin-bottom: 8px;'>
783
  <strong>R:</strong> Pearson Correlation Coefficient<br>
 
791
  st.plotly_chart(track_plot(st.session_state.results["Validate"], include_actual=True),
792
  use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
793
  with col_cross:
794
+ act_col = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in st.session_state.results["Validate"].columns) else TARGET
795
+ st.pyplot(cross_plot_static(st.session_state.results["Validate"][act_col],
796
  st.session_state.results["Validate"][PRED_COL]),
797
  use_container_width=False)
798
 
 
805
  df_centered_rounded(st.session_state.results["oor_tbl"])
806
 
807
  # =========================
808
+ # PREDICTION (no actual)
809
  # =========================
810
  if st.session_state.app_step == "predict":
811
+ st.sidebar.header("Prediction (No Actual)")
812
  up = st.sidebar.file_uploader("Upload Prediction Excel", type=["xlsx","xls"])
813
  if up is not None:
814
  book = read_book_bytes(up.getvalue())
 
820
  go_btn = st.sidebar.button("Predict", type="primary", use_container_width=True)
821
  if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
822
 
823
+ sticky_header("Prediction", "Upload a dataset with the feature columns (no actual column).")
824
 
825
  if go_btn and up is not None:
826
  book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
827
+ df0 = _normalize_columns(book[name].copy(), FEATURES, TARGET, ALIASES)
828
+ if not ensure_cols(df0, FEATURES):
829
  st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
830
+ df = df0.copy()
831
+ df[PRED_COL] = _inv_transform(model.predict(_make_X(df0, FEATURES)), TRANSFORM)
832
+ st.session_state.results["PredictOnly"] = df
833
 
834
  ranges = st.session_state.train_ranges; oor_pct = 0.0
835
  if ranges:
836
+ any_viol = pd.DataFrame({f:(df[f] < ranges[f][0]) | (df[f] > ranges[f][1]) for f in FEATURES}).any(axis=1)
837
+ oor_pct = float(any_viol.mean() * 100.0)
838
+ st.session_state.results["sv_pred"] = {
839
  "n":len(df),
840
  "pred_min":float(df[PRED_COL].min()),
841
  "pred_max":float(df[PRED_COL].max()),
 
846
 
847
  if "PredictOnly" in st.session_state.results:
848
  df = st.session_state.results["PredictOnly"]; sv = st.session_state.results["sv_pred"]
 
849
  col_left, col_right = st.columns([2,3], gap="large")
850
  with col_left:
851
  table = pd.DataFrame({
 
859
  with col_right:
860
  st.plotly_chart(track_plot(df, include_actual=False),
861
  use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
 
862
  render_export_button(phase_key="predict")
863
 
864
  # =========================
 
879
  tabs = st.tabs(names)
880
  for t, name in zip(tabs, names):
881
  with t:
882
+ df = _normalize_columns(book_to_preview[name], FEATURES, TARGET, ALIASES)
883
  t1, t2 = st.tabs(["Tracks", "Summary"])
884
  with t1:
885
  st.pyplot(preview_tracks(df, FEATURES), use_container_width=True)