UCS2014 commited on
Commit
08e38ee
·
verified ·
1 Parent(s): fe099d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +230 -152
app.py CHANGED
@@ -1,8 +1,11 @@
1
  # app_FP.py — ST_GeoMech_FP (Fracture Pressure)
2
- # Mirrors the SHmin app's specs & workflow (password gate, top preview panel, UI/metrics/exports).
3
  # Self-contained: trains a fixed, optimized RF pipeline in-app. No external model files.
4
 
5
- import io, os, base64, math
 
 
 
6
  from pathlib import Path
7
  from datetime import datetime
8
 
@@ -28,25 +31,25 @@ from sklearn.impute import SimpleImputer
28
  APP_NAME = "ST_GeoMech_FP"
29
  TAGLINE = "Real-Time Fracture Pressure Prediction"
30
 
31
- # Canonical feature names (match SHmin app)
32
  FEATURES = ["Q (gpm)", "SPP (psi)", "T (kft.lbf)", "WOB (klbf)", "ROP (ft/h)"]
33
 
34
- # Canonical prediction/target labels (we'll auto-resolve target via aliases)
35
  TARGET_CANON = "FracPress_Actual"
36
  PRED_COL = "FracPress_Pred"
37
  UNITS = "Psi"
38
 
39
- # Target aliases (we'll accept any of these if present in sheets)
40
  TARGET_ALIASES = [
41
  "FracPress_Actual", "FracturePressure_Actual", "Fracture Pressure (psi)",
42
  "Frac Pressure (psi)", "FracPressure", "Frac_Pressure", "FracturePressure",
43
  "FP_Actual", "FP (psi)"
44
  ]
45
 
46
- # Model transform (kept for parity with SHmin; unused for RF by default)
47
  TRANSFORM = "none" # "none" | "log10" | "ln"
48
 
49
- # Fixed "best" RF params (robust & fast; you can tweak if you have tuned FP params)
50
  BEST_PARAMS = dict(
51
  n_estimators=400,
52
  max_depth=None,
@@ -59,10 +62,10 @@ BEST_PARAMS = dict(
59
  )
60
 
61
  # Color / layout
62
- COLORS = {"pred": "#1f77b4", "actual": "#f2b702", "ref": "#5a5a5a"}
63
  CROSS_W, CROSS_H = 350, 350
64
  TRACK_H, TRACK_W = 1000, 500
65
- FONT_SZ = 13
66
  BOLD_FONT = "Arial Black, Arial, sans-serif"
67
 
68
  # =========================
@@ -94,14 +97,14 @@ TABLE_CENTER_CSS = [
94
  ]
95
 
96
  # =========================
97
- # Password gate (same as SHmin)
98
  # =========================
99
- def inline_logo(path="logo.png") -> str:
100
  try:
101
  p = Path(path)
102
- if not p.exists(): return ""
103
- import base64 as _b64
104
- return f"data:image/png;base64,{_b64.b64encode(p.read_bytes()).decode('ascii')}"
105
  except Exception:
106
  return ""
107
 
@@ -147,17 +150,19 @@ def mape(y_true, y_pred, eps: float = 1e-9) -> float:
147
  def pearson_r(y_true, y_pred) -> float:
148
  a = np.asarray(y_true, dtype=float)
149
  p = np.asarray(y_pred, dtype=float)
150
- if a.size < 2: return float("nan")
151
- if np.all(a == a[0]) or np.all(p == p[0]): return float("nan")
 
 
152
  return float(np.corrcoef(a, p)[0, 1])
153
 
154
  @st.cache_data(show_spinner=False)
155
- def parse_excel(data_bytes: bytes):
156
  bio = io.BytesIO(data_bytes)
157
  xl = pd.ExcelFile(bio)
158
  return {sh: xl.parse(sh) for sh in xl.sheet_names}
159
 
160
- def read_book_bytes(b: bytes):
161
  return parse_excel(b) if b else {}
162
 
163
  def _excel_engine() -> str:
@@ -179,7 +184,7 @@ def _round_numeric(df: pd.DataFrame, ndigits: int = 3) -> pd.DataFrame:
179
  out[c] = pd.to_numeric(out[c], errors="coerce").round(ndigits)
180
  return out
181
 
182
- def df_centered_rounded(df: pd.DataFrame, hide_index=True):
183
  out = df.copy()
184
  numcols = out.select_dtypes(include=[np.number]).columns
185
  styler = (
@@ -203,8 +208,10 @@ def _nice_tick0(xmin: float, step: float = 0.1) -> float:
203
  # ---------- Transform helpers ----------
204
  def _inv_transform(x: np.ndarray, transform: str) -> np.ndarray:
205
  t = (transform or "none").lower()
206
- if t in ("log10", "log_10", "log10()"): return np.power(10.0, x)
207
- if t in ("ln", "log", "loge", "log_e", "natural"): return np.exp(x)
 
 
208
  return x
209
 
210
  # ---------- Build X exactly as trained ----------
@@ -214,7 +221,7 @@ def _make_X(df: pd.DataFrame, features: list[str]) -> pd.DataFrame:
214
  X[c] = pd.to_numeric(X[c], errors="coerce")
215
  return X
216
 
217
- # ---------- Target resolver ----------
218
  def _resolve_target_col(df: pd.DataFrame) -> str | None:
219
  cols_lower = {c.lower(): c for c in df.columns}
220
  for cand in TARGET_ALIASES:
@@ -227,20 +234,24 @@ def _resolve_target_col(df: pd.DataFrame) -> str | None:
227
  # =========================
228
  def _summary_table(df: pd.DataFrame, cols: list[str]) -> pd.DataFrame:
229
  cols = [c for c in cols if c in df.columns]
230
- if not cols: return pd.DataFrame()
231
- tbl = (df[cols]
232
- .agg(['min','max','mean','std'])
233
- .T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"})
234
- .reset_index(names="Field"))
 
 
 
235
  return _round_numeric(tbl, 3)
236
 
237
  def _train_ranges_df(ranges: dict[str, tuple[float, float]]) -> pd.DataFrame:
238
- if not ranges: return pd.DataFrame()
 
239
  df = pd.DataFrame(ranges).T.reset_index()
240
  df.columns = ["Feature", "Min", "Max"]
241
  return _round_numeric(df, 3)
242
 
243
- def _excel_autofit(writer, sheet_name: str, df: pd.DataFrame, min_w: int = 8, max_w: int = 40):
244
  try:
245
  import xlsxwriter # noqa: F401
246
  except Exception:
@@ -254,45 +265,64 @@ def _excel_autofit(writer, sheet_name: str, df: pd.DataFrame, min_w: int = 8, ma
254
 
255
  def _available_sections() -> list[str]:
256
  res = st.session_state.get("results", {})
257
- sections = []
258
- if "Train" in res: sections += ["Training","Training_Metrics","Training_Summary"]
259
- if "Test" in res: sections += ["Testing","Testing_Metrics","Testing_Summary"]
260
- if "Validate" in res: sections += ["Validation","Validation_Metrics","Validation_Summary","Validation_OOR"]
261
- if "PredictOnly" in res: sections += ["Prediction","Prediction_Summary"]
262
- if st.session_state.get("train_ranges"): sections += ["Training_Ranges"]
 
 
 
 
 
263
  sections += ["Info"]
264
  return sections
265
 
266
- def build_export_workbook(selected: list[str], ndigits: int = 3, do_autofit: bool = True) -> tuple[bytes|None, str|None, list[str]]:
267
  res = st.session_state.get("results", {})
268
- if not res: return None, None, []
 
269
  sheets: dict[str, pd.DataFrame] = {}
270
  order: list[str] = []
271
 
272
- def _add(name: str, df: pd.DataFrame):
273
- if df is None or (isinstance(df, pd.DataFrame) and df.empty): return
274
- sheets[name] = _round_numeric(df, ndigits); order.append(name)
275
-
276
- if "Training" in selected and "Train" in res: _add("Training", res["Train"])
277
- if "Training_Metrics" in selected and res.get("m_train"): _add("Training_Metrics", pd.DataFrame([res["m_train"]]))
 
 
 
 
 
278
  if "Training_Summary" in selected and "Train" in res:
279
- tr_cols = FEATURES + [c for c in [PRED_COL, st.session_state.get("tcol_train", TARGET_CANON)] if c in res["Train"].columns]
280
  _add("Training_Summary", _summary_table(res["Train"], tr_cols))
281
 
282
- if "Testing" in selected and "Test" in res: _add("Testing", res["Test"])
283
- if "Testing_Metrics" in selected and res.get("m_test"): _add("Testing_Metrics", pd.DataFrame([res["m_test"]]))
 
 
284
  if "Testing_Summary" in selected and "Test" in res:
285
- te_cols = FEATURES + [c for c in [PRED_COL, st.session_state.get("tcol_test", TARGET_CANON)] if c in res["Test"].columns]
286
  _add("Testing_Summary", _summary_table(res["Test"], te_cols))
287
 
288
- if "Validation" in selected and "Validate" in res: _add("Validation", res["Validate"])
289
- if "Validation_Metrics" in selected and res.get("m_val"): _add("Validation_Metrics", pd.DataFrame([res["m_val"]]))
290
- if "Validation_Summary" in selected and res.get("sv_val"): _add("Validation_Summary", pd.DataFrame([res["sv_val"]]))
 
 
 
 
291
  if "Validation_OOR" in selected and isinstance(res.get("oor_tbl"), pd.DataFrame) and not res["oor_tbl"].empty:
292
  _add("Validation_OOR", res["oor_tbl"].reset_index(drop=True))
293
 
294
- if "Prediction" in selected and "PredictOnly" in res: _add("Prediction", res["PredictOnly"])
295
- if "Prediction_Summary" in selected and res.get("sv_pred"): _add("Prediction_Summary", pd.DataFrame([res["sv_pred"]]))
 
 
296
 
297
  if "Training_Ranges" in selected and st.session_state.get("train_ranges"):
298
  _add("Training_Ranges", _train_ranges_df(st.session_state["train_ranges"]))
@@ -308,51 +338,62 @@ def build_export_workbook(selected: list[str], ndigits: int = 3, do_autofit: boo
308
  ])
309
  _add("Info", info)
310
 
311
- if not order: return None, None, []
 
312
 
313
  bio = io.BytesIO()
314
  engine = _excel_engine()
315
  with pd.ExcelWriter(bio, engine=engine) as writer:
316
  for name in order:
317
- df = sheets[name]; sheet = _excel_safe_name(name)
 
318
  df.to_excel(writer, sheet_name=sheet, index=False)
319
- if do_autofit: _excel_autofit(writer, sheet, df)
 
320
  bio.seek(0)
321
  fname = f"FracPressure_Export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
322
  return bio.getvalue(), fname, order
323
 
324
  def render_export_button(phase_key: str) -> None:
325
  res = st.session_state.get("results", {})
326
- if not res: return
 
327
  st.divider()
328
  st.markdown("### Export to Excel")
329
  options = _available_sections()
330
  selected_sheets = st.multiselect(
331
  "Sheets to include",
332
- options=options, default=[],
 
333
  placeholder="Choose option(s)",
334
  help="Pick the sheets you want in the Excel export.",
335
  key=f"sheets_{phase_key}",
336
  )
337
  if not selected_sheets:
338
  st.caption("Select one or more sheets above to enable export.")
339
- st.download_button("⬇️ Export Excel", data=b"", file_name="FracPressure_Export.xlsx",
340
- mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
341
- disabled=True, key=f"download_{phase_key}")
 
 
342
  return
343
  data, fname, names = build_export_workbook(selected=selected_sheets, ndigits=3, do_autofit=True)
344
- if names: st.caption("Will include: " + ", ".join(names))
345
- st.download_button("⬇️ Export Excel", data=(data or b""), file_name=(fname or "FracPressure_Export.xlsx"),
346
- mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
347
- disabled=(data is None), key=f"download_{phase_key}")
 
 
 
348
 
349
  # =========================
350
- # Plots (integer x ticks)
351
  # =========================
352
- def cross_plot_static(actual, pred, label="Fracture Pressure"):
353
  a = pd.Series(actual, dtype=float)
354
  p = pd.Series(pred, dtype=float)
355
- lo = float(min(a.min(), p.min())); hi = float(max(a.max(), p.max()))
 
356
  pad = 0.03 * (hi - lo if hi > lo else 1.0)
357
  lo2, hi2 = lo - pad, hi + pad
358
  ticks = np.linspace(lo2, hi2, 5)
@@ -379,13 +420,15 @@ def cross_plot_static(actual, pred, label="Fracture Pressure"):
379
  fig.subplots_adjust(left=0.16, bottom=0.16, right=0.98, top=0.98)
380
  return fig
381
 
382
- def track_plot(df: pd.DataFrame, actual_col: str | None, include_actual=True):
383
- depth_col = next((c for c in df.columns if 'depth' in str(c).lower() or 'tvd' in str(c).lower()), None)
384
  if depth_col is not None:
385
- y = pd.to_numeric(df[depth_col], errors="coerce"); ylab = depth_col
 
386
  y_range = [float(np.nanmax(y)), float(np.nanmin(y))] # reversed
387
  else:
388
- y = pd.Series(np.arange(1, len(df) + 1)); ylab = "Point Index"
 
389
  y_range = [float(y.max()), float(y.min())]
390
 
391
  x_series = pd.Series(df.get(PRED_COL, pd.Series(dtype=float))).astype(float)
@@ -403,14 +446,14 @@ def track_plot(df: pd.DataFrame, actual_col: str | None, include_actual=True):
403
  x=df[PRED_COL], y=y, mode="lines",
404
  line=dict(color=COLORS["pred"], width=1.8),
405
  name=PRED_COL,
406
- hovertemplate=f"{PRED_COL}: "+ "%{x:.0f}<br>" + ylab + ": %{y}<extra></extra>"
407
  ))
408
  if include_actual and actual_col and actual_col in df.columns:
409
  fig.add_trace(go.Scatter(
410
  x=df[actual_col], y=y, mode="lines",
411
  line=dict(color=COLORS["actual"], width=2.0, dash="dot"),
412
  name=f"{actual_col} (actual)",
413
- hovertemplate=f"{actual_col}: "+ "%{x:.0f}<br>" + ylab + ": %{y}<extra></extra>"
414
  ))
415
 
416
  fig.update_layout(
@@ -450,7 +493,7 @@ def preview_tracks(df: pd.DataFrame, cols: list[str]):
450
  ax.axis("off")
451
  return fig
452
 
453
- depth_col = next((c for c in df.columns if 'depth' in str(c).lower() or 'tvd' in str(c).lower()), None)
454
  if depth_col is not None:
455
  idx = pd.to_numeric(df[depth_col], errors="coerce")
456
  y_label = depth_col
@@ -498,9 +541,9 @@ def build_pipeline() -> Pipeline:
498
  st.session_state.setdefault("app_step", "intro")
499
  st.session_state.setdefault("results", {})
500
  st.session_state.setdefault("train_ranges", None)
501
- st.session_state.setdefault("dev_file_name","")
502
- st.session_state.setdefault("dev_file_bytes",b"")
503
- st.session_state.setdefault("dev_file_loaded",False)
504
  st.session_state.setdefault("fitted_model", None)
505
 
506
  # Persistent top-of-page preview panel
@@ -518,7 +561,7 @@ st.sidebar.markdown(f"""
518
  </div>
519
  """, unsafe_allow_html=True)
520
 
521
- def sticky_header(title, message):
522
  st.markdown(
523
  f"""
524
  <style>
@@ -535,7 +578,7 @@ def sticky_header(title, message):
535
  unsafe_allow_html=True
536
  )
537
 
538
- def render_preview_panel():
539
  """Top-of-page preview panel (same behavior as SHmin)."""
540
  if not st.session_state.get("show_preview_panel"):
541
  return
@@ -566,9 +609,9 @@ def render_preview_panel():
566
  else:
567
  tbl = (
568
  df[feat_present]
569
- .agg(['min','max','mean','std'])
570
- .T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"})
571
- .reset_index(names="Feature")
572
  )
573
  df_centered_rounded(tbl)
574
  with tabs[-1]:
@@ -582,7 +625,9 @@ def render_preview_panel():
582
  # =========================
583
  if st.session_state.app_step == "intro":
584
  st.header("Welcome!")
585
- st.markdown(f"This software is developed by *Smart Thinking AI-Solutions Team* to estimate **Fracture Pressure** ({UNITS}) from drilling/offset data.")
 
 
586
  st.subheader("How It Works")
587
  st.markdown(
588
  "1) **Upload your data file** and click **Run Model** to fit the baked-in pipeline. \n"
@@ -590,26 +635,28 @@ if st.session_state.app_step == "intro":
590
  "3) **Predict** on wells without actual."
591
  )
592
  if st.button("Start Showcase", type="primary"):
593
- st.session_state.app_step = "dev"; st.rerun()
 
594
 
595
  # =========================
596
  # CASE BUILDING (Train/Test)
597
  # =========================
598
- def _find_sheet(book, names):
599
  low2orig = {k.lower(): k for k in book.keys()}
600
  for nm in names:
601
- if nm.lower() in low2orig: return low2orig[nm.lower()]
 
602
  return None
603
 
604
  if st.session_state.app_step == "dev":
605
  st.sidebar.header("Case Building")
606
- up = st.sidebar.file_uploader("Upload Your Data File", type=["xlsx","xls"])
607
  if up is not None:
608
  st.session_state.dev_file_bytes = up.getvalue()
609
  st.session_state.dev_file_name = up.name
610
  st.session_state.dev_file_loaded = True
611
  st.session_state.fitted_model = None
612
- # show preview panel
613
  st.session_state.preview_book = read_book_bytes(st.session_state.dev_file_bytes) if st.session_state.dev_file_bytes else {}
614
  st.session_state.show_preview_panel = True
615
  st.rerun()
@@ -621,8 +668,10 @@ if st.session_state.app_step == "dev":
621
  st.sidebar.caption(f"**Data loaded:** {st.session_state.dev_file_name} • {df0.shape[0]} rows × {df0.shape[1]} cols")
622
 
623
  run = st.sidebar.button("Run Model", type="primary", use_container_width=True)
624
- if st.sidebar.button("Proceed to Validation ▶", use_container_width=True): st.session_state.app_step="validate"; st.rerun()
625
- if st.sidebar.button("Proceed to Prediction ▶", use_container_width=True): st.session_state.app_step="predict"; st.rerun()
 
 
626
 
627
  if st.session_state.dev_file_loaded and st.session_state.show_preview_panel:
628
  sticky_header("Case Building", "Previewed ✓ — now click **Run Model**.")
@@ -635,8 +684,8 @@ if st.session_state.app_step == "dev":
635
 
636
  if run and st.session_state.dev_file_bytes:
637
  book = read_book_bytes(st.session_state.dev_file_bytes)
638
- sh_train = _find_sheet(book, ["Train","Training","training2","train","training"])
639
- sh_test = _find_sheet(book, ["Test","Testing","testing2","test","testing"])
640
  if sh_train is None or sh_test is None:
641
  st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training and Test/Testing sheets.</div>', unsafe_allow_html=True)
642
  st.stop()
@@ -644,14 +693,14 @@ if st.session_state.app_step == "dev":
644
  tr0 = book[sh_train].copy()
645
  te0 = book[sh_test].copy()
646
 
647
- # Resolve target name per-sheet
648
  tcol_tr = _resolve_target_col(tr0)
649
  tcol_te = _resolve_target_col(te0)
650
  if tcol_tr is None or tcol_te is None:
651
  st.error(f"Missing target column. Expected one of: {TARGET_ALIASES}")
652
  st.stop()
653
 
654
- # Ensure features exist
655
  if not (ensure_cols(tr0, FEATURES) and ensure_cols(te0, FEATURES)):
656
  st.markdown('<div class="st-message-box st-error">Missing required feature columns.</div>', unsafe_allow_html=True)
657
  st.stop()
@@ -668,7 +717,7 @@ if st.session_state.app_step == "dev":
668
 
669
  pipe = build_pipeline()
670
  pipe.fit(X_tr, y_tr)
671
- st.session_state.fitted_model = pipe # cache
672
 
673
  # Predictions
674
  tr = tr0.copy(); te = te0.copy()
@@ -679,28 +728,29 @@ if st.session_state.app_step == "dev":
679
  st.session_state.results["Train"] = tr
680
  st.session_state.results["Test"] = te
681
  st.session_state.results["m_train"] = {
682
- "R": pearson_r(tr[tcol_tr], tr[PRED_COL]),
683
- "RMSE": rmse(tr[tcol_tr], tr[PRED_COL]),
684
  "MAPE%": mape(tr[tcol_tr], tr[PRED_COL]),
685
  }
686
  st.session_state.results["m_test"] = {
687
- "R": pearson_r(te[tcol_te], te[PRED_COL]),
688
- "RMSE": rmse(te[tcol_te], te[PRED_COL]),
689
  "MAPE%": mape(te[tcol_te], te[PRED_COL]),
690
  }
691
 
692
- # Persist which target names we used (for export/plots)
693
  st.session_state["tcol_train"] = tcol_tr
694
  st.session_state["tcol_test"] = tcol_te
695
 
696
  # Training min–max ranges
697
- tr_min = tr[FEATURES].min().to_dict(); tr_max = tr[FEATURES].max().to_dict()
698
- st.session_state.train_ranges = {f:(float(tr_min[f]), float(tr_max[f])) for f in FEATURES}
 
699
 
700
  st.markdown('<div class="st-message-box st-success">Case has been built and results are displayed below.</div>', unsafe_allow_html=True)
701
 
702
  def _dev_block(df: pd.DataFrame, actual_col: str, m: dict):
703
- c1,c2,c3 = st.columns(3)
704
  c1.metric("R", f"{m['R']:.3f}")
705
  c2.metric("RMSE", f"{m['RMSE']:.2f}")
706
  c3.metric("MAPE%", f"{m['MAPE%']:.2f}")
@@ -713,17 +763,25 @@ if st.session_state.app_step == "dev":
713
  """, unsafe_allow_html=True)
714
  col_track, col_cross = st.columns([2, 3], gap="large")
715
  with col_track:
716
- st.plotly_chart(track_plot(df, actual_col, include_actual=True),
717
- use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
 
 
 
718
  with col_cross:
719
- st.pyplot(cross_plot_static(df[actual_col], df[PRED_COL], label="Fracture Pressure"), use_container_width=False)
 
 
 
720
 
721
  if "Train" in st.session_state.results or "Test" in st.session_state.results:
722
  tab1, tab2 = st.tabs(["Training", "Testing"])
723
  if "Train" in st.session_state.results:
724
- with tab1: _dev_block(st.session_state.results["Train"], st.session_state.get("tcol_train", TARGET_CANON), st.session_state.results["m_train"])
 
725
  if "Test" in st.session_state.results:
726
- with tab2: _dev_block(st.session_state.results["Test"], st.session_state.get("tcol_test", TARGET_CANON), st.session_state.results["m_test"])
 
727
  render_export_button(phase_key="dev")
728
 
729
  # =========================
@@ -731,33 +789,36 @@ if st.session_state.app_step == "dev":
731
  # =========================
732
  if st.session_state.app_step == "validate":
733
  st.sidebar.header("Validate the Model")
734
- up = st.sidebar.file_uploader("Upload Validation Excel", type=["xlsx","xls"])
735
  if up is not None:
736
  book = read_book_bytes(up.getvalue())
737
  if book:
738
  df0 = next(iter(book.values()))
739
  st.sidebar.caption(f"**Data loaded:** {up.name} • {df0.shape[0]} rows × {df0.shape[1]} cols")
740
 
741
- # preview panel on top
742
  if st.sidebar.button("Preview data", use_container_width=True, disabled=(up is None)):
743
  st.session_state.preview_book = read_book_bytes(up.getvalue()) if up is not None else {}
744
  st.session_state.show_preview_panel = True
745
  st.rerun()
746
 
747
  go_btn = st.sidebar.button("Predict & Validate", type="primary", use_container_width=True)
748
- if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
749
- if st.sidebar.button("Proceed to Prediction ▶", use_container_width=True): st.session_state.app_step="predict"; st.rerun()
 
 
750
 
751
  sticky_header("Validate the Model", "Upload a dataset with the same **features** and an **actual fracture pressure** column.")
752
  render_preview_panel()
753
 
754
  if go_btn and up is not None:
755
  if st.session_state.fitted_model is None:
756
- st.error("Please train the model first in Case Building."); st.stop()
 
757
 
758
  book = read_book_bytes(up.getvalue())
759
  names = list(book.keys())
760
- name = next((s for s in names if s.lower() in ("validation","validate","validation2","val","val2")), names[0])
761
  df0 = book[name].copy()
762
 
763
  tcol = _resolve_target_col(df0)
@@ -765,23 +826,27 @@ if st.session_state.app_step == "validate":
765
  st.error(f"Missing target column. Expected one of: {TARGET_ALIASES}")
766
  st.stop()
767
  if not ensure_cols(df0, FEATURES):
768
- st.markdown('<div class="st-message-box st-error">Missing required feature columns.</div>', unsafe_allow_html=True); st.stop()
 
769
 
770
  df = df0.copy()
771
  df[PRED_COL] = _inv_transform(st.session_state.fitted_model.predict(_make_X(df0, FEATURES)), TRANSFORM)
772
  st.session_state.results["Validate"] = df
773
 
774
  # Range checks
775
- ranges = st.session_state.train_ranges; oor_pct = 0.0; tbl=None
 
 
776
  if ranges:
777
- any_viol = pd.DataFrame({f:(df[f] < ranges[f][0]) | (df[f] > ranges[f][1]) for f in FEATURES}).any(axis=1)
778
  oor_pct = float(any_viol.mean() * 100.0)
779
  if any_viol.any():
780
  tbl = df.loc[any_viol, FEATURES].copy()
781
  for c in FEATURES:
782
- if pd.api.types.is_numeric_dtype(tbl[c]): tbl[c] = tbl[c].round(3)
783
- tbl["Violations"] = pd.DataFrame({f:(df[f] < ranges[f][0]) | (df[f] > ranges[f][1]) for f in FEATURES}).loc[any_viol].apply(
784
- lambda r:", ".join([c for c,v in r.items() if v]), axis=1
 
785
  )
786
 
787
  st.session_state.results["m_val"] = {
@@ -789,13 +854,14 @@ if st.session_state.app_step == "validate":
789
  "RMSE": rmse(df[tcol], df[PRED_COL]),
790
  "MAPE%": mape(df[tcol], df[PRED_COL]),
791
  }
792
- st.session_state.results["sv_val"] = {"n":len(df), "pred_min":float(df[PRED_COL].min()), "pred_max":float(df[PRED_COL].max()), "oor":oor_pct}
793
  st.session_state.results["oor_tbl"] = tbl
794
  st.session_state["tcol_val"] = tcol
795
 
796
  if "Validate" in st.session_state.results:
797
- m = st.session_state.results["m_val"]; tcol = st.session_state.get("tcol_val", TARGET_CANON)
798
- c1,c2,c3 = st.columns(3)
 
799
  c1.metric("R", f"{m['R']:.3f}"); c2.metric("RMSE", f"{m['RMSE']:.2f}"); c3.metric("MAPE%", f"{m['MAPE%']:.2f}")
800
  st.markdown("""
801
  <div style='text-align: left; font-size: 0.8em; color: #6b7280; margin-top: -16px; margin-bottom: 8px;'>
@@ -807,18 +873,21 @@ if st.session_state.app_step == "validate":
807
 
808
  col_track, col_cross = st.columns([2, 3], gap="large")
809
  with col_track:
810
- st.plotly_chart(track_plot(st.session_state.results["Validate"], tcol, include_actual=True),
811
- use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
 
 
812
  with col_cross:
813
- st.pyplot(cross_plot_static(st.session_state.results["Validate"][tcol],
814
- st.session_state.results["Validate"][PRED_COL],
815
- label="Fracture Pressure"),
816
- use_container_width=False)
817
 
818
  render_export_button(phase_key="validate")
819
 
820
  sv = st.session_state.results["sv_val"]
821
- if sv["oor"] > 0: st.markdown('<div class="st-message-box st-warning">Some inputs fall outside **training min–max** ranges.</div>', unsafe_allow_html=True)
 
822
  if st.session_state.results["oor_tbl"] is not None:
823
  st.write("*Out-of-range rows (vs. Training min–max):*")
824
  df_centered_rounded(st.session_state.results["oor_tbl"])
@@ -828,66 +897,74 @@ if st.session_state.app_step == "validate":
828
  # =========================
829
  if st.session_state.app_step == "predict":
830
  st.sidebar.header("Prediction (No Actual)")
831
- up = st.sidebar.file_uploader("Upload Prediction Excel", type=["xlsx","xls"])
832
  if up is not None:
833
  book = read_book_bytes(up.getvalue())
834
  if book:
835
  df0 = next(iter(book.values()))
836
  st.sidebar.caption(f"**Data loaded:** {up.name} • {df0.shape[0]} rows × {df0.shape[1]} cols")
837
 
838
- # preview panel on top
839
  if st.sidebar.button("Preview data", use_container_width=True, disabled=(up is None)):
840
  st.session_state.preview_book = read_book_bytes(up.getvalue()) if up is not None else {}
841
  st.session_state.show_preview_panel = True
842
  st.rerun()
843
 
844
  go_btn = st.sidebar.button("Predict", type="primary", use_container_width=True)
845
- if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
 
846
 
847
  sticky_header("Prediction", "Upload a dataset with the 5 feature columns (no actual column).")
848
  render_preview_panel()
849
 
850
  if go_btn and up is not None:
851
  if st.session_state.fitted_model is None:
852
- st.error("Please train the model first in Case Building."); st.stop()
 
853
 
854
- book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
 
855
  df0 = book[name].copy()
856
  if not ensure_cols(df0, FEATURES):
857
- st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
 
858
 
859
  df = df0.copy()
860
  df[PRED_COL] = _inv_transform(st.session_state.fitted_model.predict(_make_X(df0, FEATURES)), TRANSFORM)
861
  st.session_state.results["PredictOnly"] = df
862
 
863
- ranges = st.session_state.train_ranges; oor_pct = 0.0
 
864
  if ranges:
865
- any_viol = pd.DataFrame({f:(df[f] < ranges[f][0]) | (df[f] > ranges[f][1]) for f in FEATURES}).any(axis=1)
866
  oor_pct = float(any_viol.mean() * 100.0)
867
  st.session_state.results["sv_pred"] = {
868
- "n":len(df),
869
- "pred_min":float(df[PRED_COL].min()),
870
- "pred_max":float(df[PRED_COL].max()),
871
- "pred_mean":float(df[PRED_COL].mean()),
872
- "pred_std":float(df[PRED_COL].std(ddof=0)),
873
- "oor":oor_pct
874
  }
875
 
876
  if "PredictOnly" in st.session_state.results:
877
- df = st.session_state.results["PredictOnly"]; sv = st.session_state.results["sv_pred"]
878
- col_left, col_right = st.columns([2,3], gap="large")
 
879
  with col_left:
880
  table = pd.DataFrame({
881
- "Metric": ["# points","Pred min","Pred max","Pred mean","Pred std","OOR %"],
882
- "Value": [sv["n"], round(sv["pred_min"],3), round(sv["pred_max"],3),
883
- round(sv["pred_mean"],3), round(sv["pred_std"],3), f'{sv["oor"]:.1f}%']
884
  })
885
  st.markdown('<div class="st-message-box st-success">Predictions ready ✓</div>', unsafe_allow_html=True)
886
  df_centered_rounded(table, hide_index=True)
887
  st.caption("**★ OOR** = % of rows with input features outside the training min–max range.")
888
  with col_right:
889
- st.plotly_chart(track_plot(df, actual_col=None, include_actual=False),
890
- use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
 
 
891
  render_export_button(phase_key="predict")
892
 
893
  # =========================
@@ -897,6 +974,7 @@ st.markdown("""
897
  <br><br><br>
898
  <hr>
899
  <div style='text-align:center;color:#6b7280;font-size:1.0em;'>
900
- © 2025 Smart Thinking AI-Solutions Team. All rights reserved.
 
901
  </div>
902
  """, unsafe_allow_html=True)
 
1
  # app_FP.py — ST_GeoMech_FP (Fracture Pressure)
2
+ # Mirrors the SHmin app's specs & workflow (password gate, preview panel, train/validate/predict, Excel export).
3
  # Self-contained: trains a fixed, optimized RF pipeline in-app. No external model files.
4
 
5
+ import io
6
+ import os
7
+ import base64
8
+ import math
9
  from pathlib import Path
10
  from datetime import datetime
11
 
 
31
  APP_NAME = "ST_GeoMech_FP"
32
  TAGLINE = "Real-Time Fracture Pressure Prediction"
33
 
34
+ # Canonical features (match SHmin app)
35
  FEATURES = ["Q (gpm)", "SPP (psi)", "T (kft.lbf)", "WOB (klbf)", "ROP (ft/h)"]
36
 
37
+ # Canonical prediction/target labels
38
  TARGET_CANON = "FracPress_Actual"
39
  PRED_COL = "FracPress_Pred"
40
  UNITS = "Psi"
41
 
42
+ # Target aliases accepted in input workbooks
43
  TARGET_ALIASES = [
44
  "FracPress_Actual", "FracturePressure_Actual", "Fracture Pressure (psi)",
45
  "Frac Pressure (psi)", "FracPressure", "Frac_Pressure", "FracturePressure",
46
  "FP_Actual", "FP (psi)"
47
  ]
48
 
49
+ # Optional transform (kept for parity; RF is used on raw scale)
50
  TRANSFORM = "none" # "none" | "log10" | "ln"
51
 
52
+ # Fixed "best" RandomForest params
53
  BEST_PARAMS = dict(
54
  n_estimators=400,
55
  max_depth=None,
 
62
  )
63
 
64
  # Color / layout
65
+ COLORS = {"pred": "#1f77b4", "actual": "#f2b702", "ref": "#5a5a5a"}
66
  CROSS_W, CROSS_H = 350, 350
67
  TRACK_H, TRACK_W = 1000, 500
68
+ FONT_SZ = 13
69
  BOLD_FONT = "Arial Black, Arial, sans-serif"
70
 
71
  # =========================
 
97
  ]
98
 
99
  # =========================
100
+ # Password gate (optional)
101
  # =========================
102
+ def inline_logo(path: str = "logo.png") -> str:
103
  try:
104
  p = Path(path)
105
+ if not p.exists():
106
+ return ""
107
+ return f"data:image/png;base64,{base64.b64encode(p.read_bytes()).decode('ascii')}"
108
  except Exception:
109
  return ""
110
 
 
150
  def pearson_r(y_true, y_pred) -> float:
151
  a = np.asarray(y_true, dtype=float)
152
  p = np.asarray(y_pred, dtype=float)
153
+ if a.size < 2:
154
+ return float("nan")
155
+ if np.all(a == a[0]) or np.all(p == p[0]):
156
+ return float("nan")
157
  return float(np.corrcoef(a, p)[0, 1])
158
 
159
  @st.cache_data(show_spinner=False)
160
+ def parse_excel(data_bytes: bytes) -> dict[str, pd.DataFrame]:
161
  bio = io.BytesIO(data_bytes)
162
  xl = pd.ExcelFile(bio)
163
  return {sh: xl.parse(sh) for sh in xl.sheet_names}
164
 
165
+ def read_book_bytes(b: bytes) -> dict[str, pd.DataFrame]:
166
  return parse_excel(b) if b else {}
167
 
168
  def _excel_engine() -> str:
 
184
  out[c] = pd.to_numeric(out[c], errors="coerce").round(ndigits)
185
  return out
186
 
187
+ def df_centered_rounded(df: pd.DataFrame, hide_index: bool = True) -> None:
188
  out = df.copy()
189
  numcols = out.select_dtypes(include=[np.number]).columns
190
  styler = (
 
208
  # ---------- Transform helpers ----------
209
  def _inv_transform(x: np.ndarray, transform: str) -> np.ndarray:
210
  t = (transform or "none").lower()
211
+ if t in ("log10", "log_10", "log10()"):
212
+ return np.power(10.0, x)
213
+ if t in ("ln", "log", "loge", "log_e", "natural"):
214
+ return np.exp(x)
215
  return x
216
 
217
  # ---------- Build X exactly as trained ----------
 
221
  X[c] = pd.to_numeric(X[c], errors="coerce")
222
  return X
223
 
224
+ # ---------- Target resolver (use aliases) ----------
225
  def _resolve_target_col(df: pd.DataFrame) -> str | None:
226
  cols_lower = {c.lower(): c for c in df.columns}
227
  for cand in TARGET_ALIASES:
 
234
  # =========================
235
  def _summary_table(df: pd.DataFrame, cols: list[str]) -> pd.DataFrame:
236
  cols = [c for c in cols if c in df.columns]
237
+ if not cols:
238
+ return pd.DataFrame()
239
+ tbl = (
240
+ df[cols]
241
+ .agg(["min", "max", "mean", "std"])
242
+ .T.rename(columns={"min": "Min", "max": "Max", "mean": "Mean", "std": "Std"})
243
+ .reset_index(names="Field")
244
+ )
245
  return _round_numeric(tbl, 3)
246
 
247
  def _train_ranges_df(ranges: dict[str, tuple[float, float]]) -> pd.DataFrame:
248
+ if not ranges:
249
+ return pd.DataFrame()
250
  df = pd.DataFrame(ranges).T.reset_index()
251
  df.columns = ["Feature", "Min", "Max"]
252
  return _round_numeric(df, 3)
253
 
254
+ def _excel_autofit(writer, sheet_name: str, df: pd.DataFrame, min_w: int = 8, max_w: int = 40) -> None:
255
  try:
256
  import xlsxwriter # noqa: F401
257
  except Exception:
 
265
 
266
  def _available_sections() -> list[str]:
267
  res = st.session_state.get("results", {})
268
+ sections: list[str] = []
269
+ if "Train" in res:
270
+ sections += ["Training", "Training_Metrics", "Training_Summary"]
271
+ if "Test" in res:
272
+ sections += ["Testing", "Testing_Metrics", "Testing_Summary"]
273
+ if "Validate" in res:
274
+ sections += ["Validation", "Validation_Metrics", "Validation_Summary", "Validation_OOR"]
275
+ if "PredictOnly" in res:
276
+ sections += ["Prediction", "Prediction_Summary"]
277
+ if st.session_state.get("train_ranges"):
278
+ sections += ["Training_Ranges"]
279
  sections += ["Info"]
280
  return sections
281
 
282
+ def build_export_workbook(selected: list[str], ndigits: int = 3, do_autofit: bool = True) -> tuple[bytes | None, str | None, list[str]]:
283
  res = st.session_state.get("results", {})
284
+ if not res:
285
+ return None, None, []
286
  sheets: dict[str, pd.DataFrame] = {}
287
  order: list[str] = []
288
 
289
+ def _add(name: str, df: pd.DataFrame) -> None:
290
+ if df is None or (isinstance(df, pd.DataFrame) and df.empty):
291
+ return
292
+ sheets[name] = _round_numeric(df, ndigits)
293
+ order.append(name)
294
+
295
+ # Training / Testing
296
+ if "Training" in selected and "Train" in res:
297
+ _add("Training", res["Train"])
298
+ if "Training_Metrics" in selected and res.get("m_train"):
299
+ _add("Training_Metrics", pd.DataFrame([res["m_train"]]))
300
  if "Training_Summary" in selected and "Train" in res:
301
+ tr_cols = FEATURES + [c for c in [st.session_state.get("tcol_train", TARGET_CANON), PRED_COL] if c in res["Train"].columns]
302
  _add("Training_Summary", _summary_table(res["Train"], tr_cols))
303
 
304
+ if "Testing" in selected and "Test" in res:
305
+ _add("Testing", res["Test"])
306
+ if "Testing_Metrics" in selected and res.get("m_test"):
307
+ _add("Testing_Metrics", pd.DataFrame([res["m_test"]]))
308
  if "Testing_Summary" in selected and "Test" in res:
309
+ te_cols = FEATURES + [c for c in [st.session_state.get("tcol_test", TARGET_CANON), PRED_COL] if c in res["Test"].columns]
310
  _add("Testing_Summary", _summary_table(res["Test"], te_cols))
311
 
312
+ # Validation / Prediction
313
+ if "Validation" in selected and "Validate" in res:
314
+ _add("Validation", res["Validate"])
315
+ if "Validation_Metrics" in selected and res.get("m_val"):
316
+ _add("Validation_Metrics", pd.DataFrame([res["m_val"]]))
317
+ if "Validation_Summary" in selected and res.get("sv_val"):
318
+ _add("Validation_Summary", pd.DataFrame([res["sv_val"]]))
319
  if "Validation_OOR" in selected and isinstance(res.get("oor_tbl"), pd.DataFrame) and not res["oor_tbl"].empty:
320
  _add("Validation_OOR", res["oor_tbl"].reset_index(drop=True))
321
 
322
+ if "Prediction" in selected and "PredictOnly" in res:
323
+ _add("Prediction", res["PredictOnly"])
324
+ if "Prediction_Summary" in selected and res.get("sv_pred"):
325
+ _add("Prediction_Summary", pd.DataFrame([res["sv_pred"]]))
326
 
327
  if "Training_Ranges" in selected and st.session_state.get("train_ranges"):
328
  _add("Training_Ranges", _train_ranges_df(st.session_state["train_ranges"]))
 
338
  ])
339
  _add("Info", info)
340
 
341
+ if not order:
342
+ return None, None, []
343
 
344
  bio = io.BytesIO()
345
  engine = _excel_engine()
346
  with pd.ExcelWriter(bio, engine=engine) as writer:
347
  for name in order:
348
+ df = sheets[name]
349
+ sheet = _excel_safe_name(name)
350
  df.to_excel(writer, sheet_name=sheet, index=False)
351
+ if do_autofit:
352
+ _excel_autofit(writer, sheet, df)
353
  bio.seek(0)
354
  fname = f"FracPressure_Export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
355
  return bio.getvalue(), fname, order
356
 
357
  def render_export_button(phase_key: str) -> None:
358
  res = st.session_state.get("results", {})
359
+ if not res:
360
+ return
361
  st.divider()
362
  st.markdown("### Export to Excel")
363
  options = _available_sections()
364
  selected_sheets = st.multiselect(
365
  "Sheets to include",
366
+ options=options,
367
+ default=[],
368
  placeholder="Choose option(s)",
369
  help="Pick the sheets you want in the Excel export.",
370
  key=f"sheets_{phase_key}",
371
  )
372
  if not selected_sheets:
373
  st.caption("Select one or more sheets above to enable export.")
374
+ st.download_button(
375
+ "⬇️ Export Excel", data=b"", file_name="FracPressure_Export.xlsx",
376
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
377
+ disabled=True, key=f"download_{phase_key}",
378
+ )
379
  return
380
  data, fname, names = build_export_workbook(selected=selected_sheets, ndigits=3, do_autofit=True)
381
+ if names:
382
+ st.caption("Will include: " + ", ".join(names))
383
+ st.download_button(
384
+ "⬇️ Export Excel", data=(data or b""), file_name=(fname or "FracPressure_Export.xlsx"),
385
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
386
+ disabled=(data is None), key=f"download_{phase_key}",
387
+ )
388
 
389
  # =========================
390
+ # Plots (integer ticks)
391
  # =========================
392
+ def cross_plot_static(actual, pred, label: str = "Fracture Pressure"):
393
  a = pd.Series(actual, dtype=float)
394
  p = pd.Series(pred, dtype=float)
395
+ lo = float(min(a.min(), p.min()))
396
+ hi = float(max(a.max(), p.max()))
397
  pad = 0.03 * (hi - lo if hi > lo else 1.0)
398
  lo2, hi2 = lo - pad, hi + pad
399
  ticks = np.linspace(lo2, hi2, 5)
 
420
  fig.subplots_adjust(left=0.16, bottom=0.16, right=0.98, top=0.98)
421
  return fig
422
 
423
+ def track_plot(df: pd.DataFrame, actual_col: str | None, include_actual: bool = True):
424
+ depth_col = next((c for c in df.columns if ("depth" in str(c).lower()) or ("tvd" in str(c).lower())), None)
425
  if depth_col is not None:
426
+ y = pd.to_numeric(df[depth_col], errors="coerce")
427
+ ylab = depth_col
428
  y_range = [float(np.nanmax(y)), float(np.nanmin(y))] # reversed
429
  else:
430
+ y = pd.Series(np.arange(1, len(df) + 1))
431
+ ylab = "Point Index"
432
  y_range = [float(y.max()), float(y.min())]
433
 
434
  x_series = pd.Series(df.get(PRED_COL, pd.Series(dtype=float))).astype(float)
 
446
  x=df[PRED_COL], y=y, mode="lines",
447
  line=dict(color=COLORS["pred"], width=1.8),
448
  name=PRED_COL,
449
+ hovertemplate=f"{PRED_COL}: "+"%{x:.0f}<br>"+ylab+": %{y}<extra></extra>"
450
  ))
451
  if include_actual and actual_col and actual_col in df.columns:
452
  fig.add_trace(go.Scatter(
453
  x=df[actual_col], y=y, mode="lines",
454
  line=dict(color=COLORS["actual"], width=2.0, dash="dot"),
455
  name=f"{actual_col} (actual)",
456
+ hovertemplate=f"{actual_col}: "+"%{x:.0f}<br>"+ylab+": %{y}<extra></extra>"
457
  ))
458
 
459
  fig.update_layout(
 
493
  ax.axis("off")
494
  return fig
495
 
496
+ depth_col = next((c for c in df.columns if ("depth" in str(c).lower()) or ("tvd" in str(c).lower())), None)
497
  if depth_col is not None:
498
  idx = pd.to_numeric(df[depth_col], errors="coerce")
499
  y_label = depth_col
 
541
  st.session_state.setdefault("app_step", "intro")
542
  st.session_state.setdefault("results", {})
543
  st.session_state.setdefault("train_ranges", None)
544
+ st.session_state.setdefault("dev_file_name", "")
545
+ st.session_state.setdefault("dev_file_bytes", b"")
546
+ st.session_state.setdefault("dev_file_loaded", False)
547
  st.session_state.setdefault("fitted_model", None)
548
 
549
  # Persistent top-of-page preview panel
 
561
  </div>
562
  """, unsafe_allow_html=True)
563
 
564
+ def sticky_header(title: str, message: str) -> None:
565
  st.markdown(
566
  f"""
567
  <style>
 
578
  unsafe_allow_html=True
579
  )
580
 
581
+ def render_preview_panel() -> None:
582
  """Top-of-page preview panel (same behavior as SHmin)."""
583
  if not st.session_state.get("show_preview_panel"):
584
  return
 
609
  else:
610
  tbl = (
611
  df[feat_present]
612
+ .agg(["min", "max", "mean", "std"])
613
+ .T.rename(columns={"min": "Min", "max": "Max", "mean": "Mean", "std": "Std"})
614
+ .reset_index(names="Feature")
615
  )
616
  df_centered_rounded(tbl)
617
  with tabs[-1]:
 
625
  # =========================
626
  if st.session_state.app_step == "intro":
627
  st.header("Welcome!")
628
+ st.markdown(
629
+ f"This software is developed by *Smart Thinking AI-Solutions Team* to estimate **Fracture Pressure** ({UNITS}) from drilling/offset data."
630
+ )
631
  st.subheader("How It Works")
632
  st.markdown(
633
  "1) **Upload your data file** and click **Run Model** to fit the baked-in pipeline. \n"
 
635
  "3) **Predict** on wells without actual."
636
  )
637
  if st.button("Start Showcase", type="primary"):
638
+ st.session_state.app_step = "dev"
639
+ st.rerun()
640
 
641
  # =========================
642
  # CASE BUILDING (Train/Test)
643
  # =========================
644
+ def _find_sheet(book: dict[str, pd.DataFrame], names: list[str]) -> str | None:
645
  low2orig = {k.lower(): k for k in book.keys()}
646
  for nm in names:
647
+ if nm.lower() in low2orig:
648
+ return low2orig[nm.lower()]
649
  return None
650
 
651
  if st.session_state.app_step == "dev":
652
  st.sidebar.header("Case Building")
653
+ up = st.sidebar.file_uploader("Upload Your Data File", type=["xlsx", "xls"])
654
  if up is not None:
655
  st.session_state.dev_file_bytes = up.getvalue()
656
  st.session_state.dev_file_name = up.name
657
  st.session_state.dev_file_loaded = True
658
  st.session_state.fitted_model = None
659
+ # show preview panel immediately
660
  st.session_state.preview_book = read_book_bytes(st.session_state.dev_file_bytes) if st.session_state.dev_file_bytes else {}
661
  st.session_state.show_preview_panel = True
662
  st.rerun()
 
668
  st.sidebar.caption(f"**Data loaded:** {st.session_state.dev_file_name} • {df0.shape[0]} rows × {df0.shape[1]} cols")
669
 
670
  run = st.sidebar.button("Run Model", type="primary", use_container_width=True)
671
+ if st.sidebar.button("Proceed to Validation ▶", use_container_width=True):
672
+ st.session_state.app_step = "validate"; st.rerun()
673
+ if st.sidebar.button("Proceed to Prediction ▶", use_container_width=True):
674
+ st.session_state.app_step = "predict"; st.rerun()
675
 
676
  if st.session_state.dev_file_loaded and st.session_state.show_preview_panel:
677
  sticky_header("Case Building", "Previewed ✓ — now click **Run Model**.")
 
684
 
685
  if run and st.session_state.dev_file_bytes:
686
  book = read_book_bytes(st.session_state.dev_file_bytes)
687
+ sh_train = _find_sheet(book, ["Train", "Training", "training2", "train", "training"])
688
+ sh_test = _find_sheet(book, ["Test", "Testing", "testing2", "test", "testing"])
689
  if sh_train is None or sh_test is None:
690
  st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training and Test/Testing sheets.</div>', unsafe_allow_html=True)
691
  st.stop()
 
693
  tr0 = book[sh_train].copy()
694
  te0 = book[sh_test].copy()
695
 
696
+ # Resolve target name per-sheet from aliases
697
  tcol_tr = _resolve_target_col(tr0)
698
  tcol_te = _resolve_target_col(te0)
699
  if tcol_tr is None or tcol_te is None:
700
  st.error(f"Missing target column. Expected one of: {TARGET_ALIASES}")
701
  st.stop()
702
 
703
+ # Ensure feature columns exist
704
  if not (ensure_cols(tr0, FEATURES) and ensure_cols(te0, FEATURES)):
705
  st.markdown('<div class="st-message-box st-error">Missing required feature columns.</div>', unsafe_allow_html=True)
706
  st.stop()
 
717
 
718
  pipe = build_pipeline()
719
  pipe.fit(X_tr, y_tr)
720
+ st.session_state.fitted_model = pipe
721
 
722
  # Predictions
723
  tr = tr0.copy(); te = te0.copy()
 
728
  st.session_state.results["Train"] = tr
729
  st.session_state.results["Test"] = te
730
  st.session_state.results["m_train"] = {
731
+ "R": pearson_r(tr[tcol_tr], tr[PRED_COL]),
732
+ "RMSE": rmse(tr[tcol_tr], tr[PRED_COL]),
733
  "MAPE%": mape(tr[tcol_tr], tr[PRED_COL]),
734
  }
735
  st.session_state.results["m_test"] = {
736
+ "R": pearson_r(te[tcol_te], te[PRED_COL]),
737
+ "RMSE": rmse(te[tcol_te], te[PRED_COL]),
738
  "MAPE%": mape(te[tcol_te], te[PRED_COL]),
739
  }
740
 
741
+ # Persist used target names (for export/plots)
742
  st.session_state["tcol_train"] = tcol_tr
743
  st.session_state["tcol_test"] = tcol_te
744
 
745
  # Training min–max ranges
746
+ tr_min = tr[FEATURES].min().to_dict()
747
+ tr_max = tr[FEATURES].max().to_dict()
748
+ st.session_state.train_ranges = {f: (float(tr_min[f]), float(tr_max[f])) for f in FEATURES}
749
 
750
  st.markdown('<div class="st-message-box st-success">Case has been built and results are displayed below.</div>', unsafe_allow_html=True)
751
 
752
  def _dev_block(df: pd.DataFrame, actual_col: str, m: dict):
753
+ c1, c2, c3 = st.columns(3)
754
  c1.metric("R", f"{m['R']:.3f}")
755
  c2.metric("RMSE", f"{m['RMSE']:.2f}")
756
  c3.metric("MAPE%", f"{m['MAPE%']:.2f}")
 
763
  """, unsafe_allow_html=True)
764
  col_track, col_cross = st.columns([2, 3], gap="large")
765
  with col_track:
766
+ st.plotly_chart(
767
+ track_plot(df, actual_col, include_actual=True),
768
+ use_container_width=False,
769
+ config={"displayModeBar": False, "scrollZoom": True}
770
+ )
771
  with col_cross:
772
+ st.pyplot(
773
+ cross_plot_static(df[actual_col], df[PRED_COL], label="Fracture Pressure"),
774
+ use_container_width=False
775
+ )
776
 
777
  if "Train" in st.session_state.results or "Test" in st.session_state.results:
778
  tab1, tab2 = st.tabs(["Training", "Testing"])
779
  if "Train" in st.session_state.results:
780
+ with tab1:
781
+ _dev_block(st.session_state.results["Train"], st.session_state.get("tcol_train", TARGET_CANON), st.session_state.results["m_train"])
782
  if "Test" in st.session_state.results:
783
+ with tab2:
784
+ _dev_block(st.session_state.results["Test"], st.session_state.get("tcol_test", TARGET_CANON), st.session_state.results["m_test"])
785
  render_export_button(phase_key="dev")
786
 
787
  # =========================
 
789
  # =========================
790
  if st.session_state.app_step == "validate":
791
  st.sidebar.header("Validate the Model")
792
+ up = st.sidebar.file_uploader("Upload Validation Excel", type=["xlsx", "xls"])
793
  if up is not None:
794
  book = read_book_bytes(up.getvalue())
795
  if book:
796
  df0 = next(iter(book.values()))
797
  st.sidebar.caption(f"**Data loaded:** {up.name} • {df0.shape[0]} rows × {df0.shape[1]} cols")
798
 
799
+ # Preview button
800
  if st.sidebar.button("Preview data", use_container_width=True, disabled=(up is None)):
801
  st.session_state.preview_book = read_book_bytes(up.getvalue()) if up is not None else {}
802
  st.session_state.show_preview_panel = True
803
  st.rerun()
804
 
805
  go_btn = st.sidebar.button("Predict & Validate", type="primary", use_container_width=True)
806
+ if st.sidebar.button("⬅ Back to Case Building", use_container_width=True):
807
+ st.session_state.app_step = "dev"; st.rerun()
808
+ if st.sidebar.button("Proceed to Prediction ▶", use_container_width=True):
809
+ st.session_state.app_step = "predict"; st.rerun()
810
 
811
  sticky_header("Validate the Model", "Upload a dataset with the same **features** and an **actual fracture pressure** column.")
812
  render_preview_panel()
813
 
814
  if go_btn and up is not None:
815
  if st.session_state.fitted_model is None:
816
+ st.error("Please train the model first in Case Building.")
817
+ st.stop()
818
 
819
  book = read_book_bytes(up.getvalue())
820
  names = list(book.keys())
821
+ name = next((s for s in names if s.lower() in ("validation", "validate", "validation2", "val", "val2")), names[0])
822
  df0 = book[name].copy()
823
 
824
  tcol = _resolve_target_col(df0)
 
826
  st.error(f"Missing target column. Expected one of: {TARGET_ALIASES}")
827
  st.stop()
828
  if not ensure_cols(df0, FEATURES):
829
+ st.markdown('<div class="st-message-box st-error">Missing required feature columns.</div>', unsafe_allow_html=True)
830
+ st.stop()
831
 
832
  df = df0.copy()
833
  df[PRED_COL] = _inv_transform(st.session_state.fitted_model.predict(_make_X(df0, FEATURES)), TRANSFORM)
834
  st.session_state.results["Validate"] = df
835
 
836
  # Range checks
837
+ ranges = st.session_state.train_ranges
838
+ oor_pct = 0.0
839
+ tbl = None
840
  if ranges:
841
+ any_viol = pd.DataFrame({f: (df[f] < ranges[f][0]) | (df[f] > ranges[f][1]) for f in FEATURES}).any(axis=1)
842
  oor_pct = float(any_viol.mean() * 100.0)
843
  if any_viol.any():
844
  tbl = df.loc[any_viol, FEATURES].copy()
845
  for c in FEATURES:
846
+ if pd.api.types.is_numeric_dtype(tbl[c]):
847
+ tbl[c] = tbl[c].round(3)
848
+ tbl["Violations"] = pd.DataFrame({f: (df[f] < ranges[f][0]) | (df[f] > ranges[f][1]) for f in FEATURES}).loc[any_viol].apply(
849
+ lambda r: ", ".join([c for c, v in r.items() if v]), axis=1
850
  )
851
 
852
  st.session_state.results["m_val"] = {
 
854
  "RMSE": rmse(df[tcol], df[PRED_COL]),
855
  "MAPE%": mape(df[tcol], df[PRED_COL]),
856
  }
857
+ st.session_state.results["sv_val"] = {"n": len(df), "pred_min": float(df[PRED_COL].min()), "pred_max": float(df[PRED_COL].max()), "oor": oor_pct}
858
  st.session_state.results["oor_tbl"] = tbl
859
  st.session_state["tcol_val"] = tcol
860
 
861
  if "Validate" in st.session_state.results:
862
+ m = st.session_state.results["m_val"]
863
+ tcol = st.session_state.get("tcol_val", TARGET_CANON)
864
+ c1, c2, c3 = st.columns(3)
865
  c1.metric("R", f"{m['R']:.3f}"); c2.metric("RMSE", f"{m['RMSE']:.2f}"); c3.metric("MAPE%", f"{m['MAPE%']:.2f}")
866
  st.markdown("""
867
  <div style='text-align: left; font-size: 0.8em; color: #6b7280; margin-top: -16px; margin-bottom: 8px;'>
 
873
 
874
  col_track, col_cross = st.columns([2, 3], gap="large")
875
  with col_track:
876
+ st.plotly_chart(
877
+ track_plot(st.session_state.results["Validate"], tcol, include_actual=True),
878
+ use_container_width=False, config={"displayModeBar": False, "scrollZoom": True}
879
+ )
880
  with col_cross:
881
+ st.pyplot(
882
+ cross_plot_static(st.session_state.results["Validate"][tcol], st.session_state.results["Validate"][PRED_COL], label="Fracture Pressure"),
883
+ use_container_width=False
884
+ )
885
 
886
  render_export_button(phase_key="validate")
887
 
888
  sv = st.session_state.results["sv_val"]
889
+ if sv["oor"] > 0:
890
+ st.markdown('<div class="st-message-box st-warning">Some inputs fall outside **training min–max** ranges.</div>', unsafe_allow_html=True)
891
  if st.session_state.results["oor_tbl"] is not None:
892
  st.write("*Out-of-range rows (vs. Training min–max):*")
893
  df_centered_rounded(st.session_state.results["oor_tbl"])
 
897
  # =========================
898
  if st.session_state.app_step == "predict":
899
  st.sidebar.header("Prediction (No Actual)")
900
+ up = st.sidebar.file_uploader("Upload Prediction Excel", type=["xlsx", "xls"])
901
  if up is not None:
902
  book = read_book_bytes(up.getvalue())
903
  if book:
904
  df0 = next(iter(book.values()))
905
  st.sidebar.caption(f"**Data loaded:** {up.name} • {df0.shape[0]} rows × {df0.shape[1]} cols")
906
 
907
+ # Preview button
908
  if st.sidebar.button("Preview data", use_container_width=True, disabled=(up is None)):
909
  st.session_state.preview_book = read_book_bytes(up.getvalue()) if up is not None else {}
910
  st.session_state.show_preview_panel = True
911
  st.rerun()
912
 
913
  go_btn = st.sidebar.button("Predict", type="primary", use_container_width=True)
914
+ if st.sidebar.button("⬅ Back to Case Building", use_container_width=True):
915
+ st.session_state.app_step = "dev"; st.rerun()
916
 
917
  sticky_header("Prediction", "Upload a dataset with the 5 feature columns (no actual column).")
918
  render_preview_panel()
919
 
920
  if go_btn and up is not None:
921
  if st.session_state.fitted_model is None:
922
+ st.error("Please train the model first in Case Building.")
923
+ st.stop()
924
 
925
+ book = read_book_bytes(up.getvalue())
926
+ name = list(book.keys())[0]
927
  df0 = book[name].copy()
928
  if not ensure_cols(df0, FEATURES):
929
+ st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True)
930
+ st.stop()
931
 
932
  df = df0.copy()
933
  df[PRED_COL] = _inv_transform(st.session_state.fitted_model.predict(_make_X(df0, FEATURES)), TRANSFORM)
934
  st.session_state.results["PredictOnly"] = df
935
 
936
+ ranges = st.session_state.train_ranges
937
+ oor_pct = 0.0
938
  if ranges:
939
+ any_viol = pd.DataFrame({f: (df[f] < ranges[f][0]) | (df[f] > ranges[f][1]) for f in FEATURES}).any(axis=1)
940
  oor_pct = float(any_viol.mean() * 100.0)
941
  st.session_state.results["sv_pred"] = {
942
+ "n": len(df),
943
+ "pred_min": float(df[PRED_COL].min()),
944
+ "pred_max": float(df[PRED_COL].max()),
945
+ "pred_mean": float(df[PRED_COL].mean()),
946
+ "pred_std": float(df[PRED_COL].std(ddof=0)),
947
+ "oor": oor_pct
948
  }
949
 
950
  if "PredictOnly" in st.session_state.results:
951
+ df = st.session_state.results["PredictOnly"]
952
+ sv = st.session_state.results["sv_pred"]
953
+ col_left, col_right = st.columns([2, 3], gap="large")
954
  with col_left:
955
  table = pd.DataFrame({
956
+ "Metric": ["# points", "Pred min", "Pred max", "Pred mean", "Pred std", "OOR %"],
957
+ "Value": [sv["n"], round(sv["pred_min"], 3), round(sv["pred_max"], 3),
958
+ round(sv["pred_mean"], 3), round(sv["pred_std"], 3), f'{sv["oor"]:.1f}%']
959
  })
960
  st.markdown('<div class="st-message-box st-success">Predictions ready ✓</div>', unsafe_allow_html=True)
961
  df_centered_rounded(table, hide_index=True)
962
  st.caption("**★ OOR** = % of rows with input features outside the training min–max range.")
963
  with col_right:
964
+ st.plotly_chart(
965
+ track_plot(df, actual_col=None, include_actual=False),
966
+ use_container_width=False, config={"displayModeBar": False, "scrollZoom": True}
967
+ )
968
  render_export_button(phase_key="predict")
969
 
970
  # =========================
 
974
  <br><br><br>
975
  <hr>
976
  <div style='text-align:center;color:#6b7280;font-size:1.0em;'>
977
+ © 2025 Smart Thinking AI-Solutions Team. All rights reserved.<br>
978
+ Website: <a href="https://smartthinking.com.sa" target="_blank" rel="noopener noreferrer">smartthinking.com.sa</a>
979
  </div>
980
  """, unsafe_allow_html=True)