UCS2014 commited on
Commit
442b8ae
·
verified ·
1 Parent(s): bd16f79

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +272 -317
app.py CHANGED
@@ -1,4 +1,5 @@
1
- # app.py — ST_GR (Gamma Ray) app adapted from your UCS app, same flow & design
 
2
  import io, json, os, base64, math
3
  from pathlib import Path
4
  import streamlit as st
@@ -7,7 +8,7 @@ import numpy as np
7
  import joblib
8
  from datetime import datetime
9
 
10
- # Matplotlib for PREVIEW modal and for the CROSS-PLOT (static)
11
  import matplotlib
12
  matplotlib.use("Agg")
13
  import matplotlib.pyplot as plt
@@ -24,13 +25,13 @@ TAGLINE = "Real-Time Gamma Ray Prediction"
24
 
25
  FEATURES = ["GPM", "SPP", "RPM", "WOB", "T", "ROP"]
26
 
27
- # Column name of the target you trained on
28
- TARGET = "log(GR)" # if your sheet uses a different name, set it here
29
 
30
- # >>> IMPORTANT: set the transform you used in training <<<
31
- TARGET_TRANSFORM = "log10" # use "log10" if trained on log10(GR); use "ln" if trained on ln(GR)
32
 
33
- # Column that contains real GR in API (for metrics/plots). If absent, the app will invert TARGET.
34
  ACTUAL_COL = "GR"
35
 
36
  MODELS_DIR = Path("models")
@@ -40,10 +41,10 @@ MODEL_FALLBACKS = [MODELS_DIR / "model.joblib", MODELS_DIR / "model.pkl"]
40
  COLORS = {"pred": "#1f77b4", "actual": "#f2b702", "ref": "#5a5a5a"}
41
 
42
  # ---- Plot sizing controls ----
43
- CROSS_W = 350 # px (matplotlib figure size; Streamlit will still scale)
44
  CROSS_H = 350
45
- TRACK_H = 1000 # px (plotly height)
46
- TRACK_W = 500 # px (plotly width)
47
  FONT_SZ = 13
48
  BOLD_FONT = "Arial Black, Arial, sans-serif"
49
 
@@ -52,71 +53,47 @@ BOLD_FONT = "Arial Black, Arial, sans-serif"
52
  # =========================
53
  st.set_page_config(page_title=APP_NAME, page_icon="logo.png", layout="wide")
54
 
55
- # General CSS
56
  st.markdown("""
57
  <style>
58
  .brand-logo { width: 200px; height: auto; object-fit: contain; }
59
- .sidebar-header { display:flex; align-items:center; gap:12px; }
60
- .sidebar-header .text h1 { font-size: 1.05rem; margin:0; line-height:1.1; }
61
- .sidebar-header .text .tag { font-size: .85rem; color:#6b7280; margin:2px 0 0; }
62
  .centered-container { display:flex; flex-direction:column; align-items:center; text-align:center; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  </style>
64
  """, unsafe_allow_html=True)
65
 
66
- # Allow sticky bits (preview expander header & tabs)
67
- st.markdown("""
68
- <style>
69
- .main .block-container { overflow: unset !important; }
70
- div[data-testid="stVerticalBlock"] { overflow: unset !important; }
71
- </style>
72
- """, unsafe_allow_html=True)
73
-
74
- # Hide uploader helper text
75
- st.markdown("""
76
- <style>
77
- section[data-testid="stFileUploader"] div[data-testid="stMarkdownContainer"]{display:none !important;}
78
- section[data-testid="stFileUploader"] [data-testid="stFileUploaderDropzone"] > div:first-child{display:none !important;}
79
- section[data-testid="stFileUploader"] [data-testid="stFileUploaderInstructions"]{display:none !important;}
80
- section[data-testid="stFileUploader"] p, section[data-testid="stFileUploader"] small{display:none !重要;}
81
- </style>
82
- """, unsafe_allow_html=True)
83
-
84
- # Sticky Preview expander & its tabs
85
- st.markdown("""
86
- <style>
87
- div[data-testid="stExpander"] > details > summary {
88
- position: sticky; top: 0; z-index: 10; background: #fff; border-bottom: 1px solid #eee;
89
- }
90
- div[data-testid="stExpander"] div[data-baseweb="tab-list"] {
91
- position: sticky; top: 42px; z-index: 9; background: #fff; padding-top: 6px;
92
- }
93
- </style>
94
- """, unsafe_allow_html=True)
95
-
96
- # Center text in all pandas Styler tables (headers + cells)
97
  TABLE_CENTER_CSS = [
98
  dict(selector="th", props=[("text-align", "center")]),
99
  dict(selector="td", props=[("text-align", "center")]),
100
  ]
101
 
102
- # Message box styles
103
- st.markdown("""
104
- <style>
105
- .st-message-box { background:#f0f2f6; color:#333; padding:10px; border-radius:10px; border:1px solid #e6e9ef; }
106
- .st-message-box.st-success { background:#d4edda; color:#155724; border-color:#c3e6cb; }
107
- .st-message-box.st-warning { background:#fff3cd; color:#856404; border-color:#ffeeba; }
108
- .st-message-box.st-error { background:#f8d7da; color:#721c24; border-color:#f5c6cb; }
109
- </style>
110
- """, unsafe_allow_html=True)
111
-
112
  # =========================
113
  # Password gate
114
  # =========================
115
  def inline_logo(path="logo.png") -> str:
116
  try:
117
  p = Path(path)
118
- if not p.exists():
119
- return ""
120
  return f"data:image/png;base64,{base64.b64encode(p.read_bytes()).decode('ascii')}"
121
  except Exception:
122
  return ""
@@ -162,9 +139,7 @@ def rmse(y_true, y_pred) -> float:
162
  def pearson_r(y_true, y_pred) -> float:
163
  a = np.asarray(y_true, dtype=float)
164
  p = np.asarray(y_pred, dtype=float)
165
- # Check for constant arrays
166
- if a.size < 2 or np.all(a == a[0]) or np.all(p == p[0]):
167
- return float("nan")
168
  return float(np.corrcoef(a, p)[0, 1])
169
 
170
  @st.cache_resource(show_spinner=False)
@@ -182,10 +157,10 @@ def read_book_bytes(b: bytes):
182
 
183
  def normalize_df(df: pd.DataFrame) -> pd.DataFrame:
184
  out = df.copy()
185
- out.columns = [str(c).strip() for c in out.columns]
186
  return out
187
 
188
- def ensure_cols(df, cols):
189
  miss = [c for c in cols if c not in df.columns]
190
  if miss:
191
  st.error(f"Missing columns: {miss}\nFound: {list(df.columns)}")
@@ -195,8 +170,7 @@ def ensure_cols(df, cols):
195
  def find_sheet(book, names):
196
  low2orig = {k.lower(): k for k in book.keys()}
197
  for nm in names:
198
- if nm.lower() in low2orig:
199
- return low2orig[nm.lower()]
200
  return None
201
 
202
  def _nice_tick0(xmin: float, step: int = 5) -> float:
@@ -207,9 +181,9 @@ def df_centered_rounded(df: pd.DataFrame, hide_index=True):
207
  numcols = out.select_dtypes(include=[np.number]).columns
208
  styler = (
209
  out.style
210
- .format({c: "{:.2f}" for c in numcols})
211
- .set_properties(**{"text-align": "center"})
212
- .set_table_styles(TABLE_CENTER_CSS)
213
  )
214
  st.dataframe(styler, use_container_width=True, hide_index=hide_index)
215
 
@@ -223,10 +197,7 @@ def inverse_target(x: np.ndarray, transform: str) -> np.ndarray:
223
  return x # "none"
224
 
225
  def to_actual_series(df: pd.DataFrame, target_col: str, actual_col_hint: str, transform: str) -> pd.Series:
226
- """
227
- Return the 'actual GR' series (API).
228
- If an explicit actual column exists, use it; else invert the target.
229
- """
230
  if actual_col_hint and actual_col_hint in df.columns:
231
  return pd.Series(df[actual_col_hint], dtype=float)
232
  if target_col in df.columns:
@@ -235,7 +206,7 @@ def to_actual_series(df: pd.DataFrame, target_col: str, actual_col_hint: str, tr
235
  return pd.Series(df["GR"], dtype=float)
236
  raise ValueError("Cannot find actual GR column or target to invert.")
237
 
238
- # === Excel export helpers =================================================
239
  def _excel_engine() -> str:
240
  try:
241
  import xlsxwriter # noqa: F401
@@ -257,127 +228,157 @@ def _round_numeric(df: pd.DataFrame, ndigits: int = 2) -> pd.DataFrame:
257
 
258
  def _summary_table(df: pd.DataFrame, cols: list[str]) -> pd.DataFrame:
259
  cols = [c for c in cols if c in df.columns]
260
- if not cols:
261
- return pd.DataFrame()
262
  tbl = (df[cols]
263
  .agg(['min','max','mean','std'])
264
  .T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"})
265
  .reset_index(names="Field"))
266
- return _round_numeric(tbl)
267
 
268
  def _train_ranges_df(ranges: dict[str, tuple[float, float]]) -> pd.DataFrame:
269
- if not ranges:
270
- return pd.DataFrame()
271
  df = pd.DataFrame(ranges).T.reset_index()
272
  df.columns = ["Feature", "Min", "Max"]
273
- return _round_numeric(df)
274
 
275
- def build_export_workbook() -> tuple[bytes|None, str|None, list[str]]:
 
 
 
 
 
 
 
 
 
 
 
 
276
  res = st.session_state.get("results", {})
277
- if not res:
278
- return None, None, []
 
 
 
 
 
 
 
 
 
 
279
 
280
  sheets: dict[str, pd.DataFrame] = {}
281
  order: list[str] = []
282
 
283
  # Training
284
- if "Train" in res:
285
- tr = _round_numeric(res["Train"])
286
- sheets["Training"] = tr; order.append("Training")
287
- m = res.get("m_train", {})
288
- if m:
289
- sheets["Training_Metrics"] = _round_numeric(pd.DataFrame([m])); order.append("Training_Metrics")
290
- tr_cols = FEATURES + [c for c in ["GR_Actual","GR_Pred"] if c in tr.columns]
291
- s = _summary_table(tr, tr_cols)
292
  if not s.empty:
293
  sheets["Training_Summary"] = s; order.append("Training_Summary")
294
 
295
  # Testing
296
- if "Test" in res:
297
- te = _round_numeric(res["Test"])
298
- sheets["Testing"] = te; order.append("Testing")
299
- m = res.get("m_test", {})
300
- if m:
301
- sheets["Testing_Metrics"] = _round_numeric(pd.DataFrame([m])); order.append("Testing_Metrics")
302
- te_cols = FEATURES + [c for c in ["GR_Actual","GR_Pred"] if c in te.columns]
303
- s = _summary_table(te, te_cols)
304
  if not s.empty:
305
  sheets["Testing_Summary"] = s; order.append("Testing_Summary")
306
 
307
  # Validation
308
- if "Validate" in res:
309
- va = _round_numeric(res["Validate"])
310
- sheets["Validation"] = va; order.append("Validation")
311
- m = res.get("m_val", {})
312
- if m:
313
- sheets["Validation_Metrics"] = _round_numeric(pd.DataFrame([m])); order.append("Validation_Metrics")
314
- sv = res.get("sv_val", {})
315
- if sv:
316
- sheets["Validation_Summary"] = _round_numeric(pd.DataFrame([sv])); order.append("Validation_Summary")
317
- oor_tbl = res.get("oor_tbl")
318
- if isinstance(oor_tbl, pd.DataFrame) and not oor_tbl.empty:
319
- sheets["Validation_OOR"] = _round_numeric(oor_tbl.reset_index(drop=True)); order.append("Validation_OOR")
320
-
321
- # Prediction (no actual)
322
- if "PredictOnly" in res:
323
- pr = _round_numeric(res["PredictOnly"])
324
- sheets["Prediction"] = pr; order.append("Prediction")
325
- svp = res.get("sv_pred", {})
326
- if svp:
327
- sheets["Prediction_Summary"] = _round_numeric(pd.DataFrame([svp])); order.append("Prediction_Summary")
328
- oor_tbl_p = res.get("oor_tbl_pred")
329
- if isinstance(oor_tbl_p, pd.DataFrame) and not oor_tbl_p.empty:
330
- sheets["Prediction_OOR"] = _round_numeric(oor_tbl_p.reset_index(drop=True)); order.append("Prediction_OOR")
331
 
332
  # Training ranges
333
- tr_ranges = st.session_state.get("train_ranges")
334
- if tr_ranges:
335
- rr = _train_ranges_df(tr_ranges)
336
- if not rr.empty:
337
- sheets["Training_Ranges"] = rr; order.append("Training_Ranges")
338
-
339
- # Info sheet
340
- info = pd.DataFrame([
341
- {"Key": "AppName", "Value": APP_NAME},
342
- {"Key": "Tagline", "Value": TAGLINE},
343
- {"Key": "Target", "Value": TARGET},
344
- {"Key": "TargetTransform", "Value": TARGET_TRANSFORM},
345
- {"Key": "ActualColumn", "Value": ACTUAL_COL},
346
- {"Key": "Features", "Value": ", ".join(FEATURES)},
347
- {"Key": "ExportedAt", "Value": datetime.now().strftime("%Y-%m-%d %H:%M:%S")},
348
- ])
349
- sheets["Info"] = info; order.append("Info")
350
-
351
- # Write workbook to memory
352
  bio = io.BytesIO()
353
- with pd.ExcelWriter(bio, engine=_excel_engine()) as writer:
 
354
  for name in order:
355
  df = sheets[name]
356
- df.to_excel(writer, sheet_name=_excel_safe_name(name), index=False)
 
 
357
  bio.seek(0)
358
-
359
  fname = f"GR_Export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
360
  return bio.getvalue(), fname, order
361
 
362
- def render_export_button(key: str = "export_main") -> None:
363
- """Bottom-of-page export button (main content area)."""
364
- data, fname, names = build_export_workbook()
365
  st.divider()
366
  st.markdown("### Export to Excel")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
367
  if names:
368
- st.caption("Includes sheets: " + ", ".join(names))
369
  st.download_button(
370
- label="⬇️ Export Excel",
371
  data=(data or b""),
372
  file_name=(fname or "GR_Export.xlsx"),
373
  mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
374
  disabled=(data is None),
375
- help="Exports all available results, metrics, summaries, OOR, training ranges, and info.",
376
- key=key,
377
  )
378
 
379
  # =========================
380
- # Cross plot (Matplotlib) — auto limits for GR
381
  # =========================
382
  def _nice_bounds(arr_min, arr_max, n_ticks=6):
383
  if not np.isfinite(arr_min) or not np.isfinite(arr_max):
@@ -403,11 +404,7 @@ def cross_plot_static(actual, pred):
403
  ticks = np.arange(fixed_min, fixed_max + step, step)
404
 
405
  dpi = 110
406
- fig, ax = plt.subplots(
407
- figsize=(CROSS_W / dpi, CROSS_H / dpi),
408
- dpi=dpi,
409
- constrained_layout=False
410
- )
411
 
412
  ax.scatter(a, p, s=14, c=COLORS["pred"], alpha=0.9, linewidths=0)
413
  ax.plot([fixed_min, fixed_max], [fixed_min, fixed_max],
@@ -415,22 +412,19 @@ def cross_plot_static(actual, pred):
415
 
416
  ax.set_xlim(fixed_min, fixed_max)
417
  ax.set_ylim(fixed_min, fixed_max)
418
- ax.set_xticks(ticks)
419
- ax.set_yticks(ticks)
420
  ax.set_aspect("equal", adjustable="box")
421
 
422
  fmt = FuncFormatter(lambda x, _: f"{int(x):,}")
423
- ax.xaxis.set_major_formatter(fmt)
424
- ax.yaxis.set_major_formatter(fmt)
425
 
426
- ax.set_xlabel("Actual GR (API)", fontweight="bold", fontsize=10, color="black")
427
- ax.set_ylabel("Predicted GR (API)", fontweight="bold", fontsize=10, color="black")
428
  ax.tick_params(labelsize=8, colors="black")
429
 
430
  ax.grid(True, linestyle=":", alpha=0.3)
431
  for spine in ax.spines.values():
432
- spine.set_linewidth(1.1)
433
- spine.set_color("#444")
434
 
435
  fig.subplots_adjust(left=0.16, bottom=0.16, right=0.98, top=0.98)
436
  return fig
@@ -439,25 +433,18 @@ def cross_plot_static(actual, pred):
439
  # Track plot (Plotly) — y-axis reversed
440
  # =========================
441
  def track_plot(df, include_actual=True, pred_col="GR_Pred", actual_col="GR"):
442
- # ensure 1D series even if duplicate col names exist
443
  def _col_1d(frame: pd.DataFrame, col: str) -> pd.Series:
444
- if col not in frame.columns:
445
- return pd.Series(dtype=float)
446
  v = frame[col]
447
- if isinstance(v, pd.DataFrame):
448
- v = v.iloc[:, 0]
449
  return pd.Series(v, dtype=float)
450
 
451
- # Depth (or index) for y
452
  depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
453
  if depth_col is not None:
454
- y = pd.Series(df[depth_col]).astype(float)
455
- ylab = depth_col
456
  else:
457
- y = pd.Series(np.arange(1, len(df) + 1), dtype=float)
458
- ylab = "Point Index"
459
 
460
- # X (GR) domain and ticks
461
  x_pred = _col_1d(df, pred_col)
462
  if include_actual and actual_col in df.columns:
463
  x_act = _col_1d(df, actual_col)
@@ -491,10 +478,8 @@ def track_plot(df, include_actual=True, pred_col="GR_Pred", actual_col="GR"):
491
  paper_bgcolor="#fff", plot_bgcolor="#fff",
492
  margin=dict(l=64, r=16, t=36, b=48), hovermode="closest",
493
  font=dict(size=FONT_SZ, color="#000"),
494
- legend=dict(
495
- x=0.98, y=0.05, xanchor="right", yanchor="bottom",
496
- bgcolor="rgba(255,255,255,0.75)", bordercolor="#ccc", borderwidth=1
497
- ),
498
  legend_title_text=""
499
  )
500
  fig.update_xaxes(
@@ -502,11 +487,13 @@ def track_plot(df, include_actual=True, pred_col="GR_Pred", actual_col="GR"):
502
  title_font=dict(size=20, family=BOLD_FONT, color="#000"),
503
  tickfont=dict(size=15, family=BOLD_FONT, color="#000"),
504
  side="top", range=[xmin, xmax],
505
- ticks="outside", tickformat=",.0f", tickmode="auto", tick0=tick0,
 
 
 
506
  showline=True, linewidth=1.2, linecolor="#444", mirror=True,
507
- showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True
508
  )
509
- # Reverse y-axis universally (top=shallow, bottom=deep)
510
  fig.update_yaxes(
511
  title_text=ylab,
512
  title_font=dict(size=20, family=BOLD_FONT, color="#000"),
@@ -514,58 +501,54 @@ def track_plot(df, include_actual=True, pred_col="GR_Pred", actual_col="GR"):
514
  autorange="reversed",
515
  ticks="outside",
516
  showline=True, linewidth=1.2, linecolor="#444", mirror=True,
517
- showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True
518
  )
519
-
520
  return fig
521
 
522
- # ---------- Preview modal (matplotlib) — y-axis reversed ----------
523
  def preview_tracks(df: pd.DataFrame, cols: list[str]):
524
  cols = [c for c in cols if c in df.columns]
525
  n = len(cols)
526
  if n == 0:
527
  fig, ax = plt.subplots(figsize=(4, 2))
528
- ax.text(0.5, 0.5, "No selected columns", ha="center", va="center"); ax.axis("off")
 
529
  return fig
530
 
531
  depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
532
  if depth_col is not None:
533
- y = pd.Series(df[depth_col], dtype=float)
534
  ylab = depth_col
535
  else:
536
  y = pd.Series(np.arange(1, len(df) + 1), dtype=float)
537
  ylab = "Point Index"
538
 
539
- fig, axes = plt.subplots(1, n, figsize=(2.4 * n, 7.0), dpi=100, sharey=False)
 
 
 
 
540
  if n == 1:
541
  axes = [axes]
542
 
543
- for ax, col in zip(axes, cols):
544
- ax.plot(pd.to_numeric(df[col], errors="coerce"), y, '-', lw=1.4, color="#333")
 
 
 
545
  ax.set_xlabel(col)
546
- ax.xaxis.set_label_position('top')
547
- ax.xaxis.tick_top()
548
- ax.set_ylim(float(y.min()), float(y.max()))
549
- ax.invert_yaxis()
550
  ax.grid(True, linestyle=":", alpha=0.3)
551
- for s in ax.spines.values():
552
- s.set_visible(True)
 
 
 
553
 
554
- axes[0].set_ylabel(ylab)
555
  return fig
556
 
557
- # Modal wrapper (Streamlit compatibility)
558
- try:
559
- dialog = st.dialog
560
- except AttributeError:
561
- def dialog(title):
562
- def deco(fn):
563
- def wrapper(*args, **kwargs):
564
- with st.expander(title, expanded=True):
565
- return fn(*args, **kwargs)
566
- return wrapper
567
- return deco
568
-
569
  # =========================
570
  # Load model + meta
571
  # =========================
@@ -583,8 +566,7 @@ def ensure_model() -> Path|None:
583
  r.raise_for_status()
584
  with open(DEFAULT_MODEL, "wb") as f:
585
  for chunk in r.iter_content(1<<20):
586
- if chunk:
587
- f.write(chunk)
588
  return DEFAULT_MODEL
589
  except Exception:
590
  return None
@@ -623,7 +605,7 @@ st.session_state.setdefault("dev_preview",False)
623
  st.session_state.setdefault("show_preview_modal", False)
624
 
625
  # =========================
626
- # Branding in Sidebar
627
  # =========================
628
  st.sidebar.markdown(f"""
629
  <div class="centered-container">
@@ -634,7 +616,6 @@ st.sidebar.markdown(f"""
634
  """, unsafe_allow_html=True
635
  )
636
 
637
- # Reusable sticky header
638
  def sticky_header(title, message):
639
  st.markdown(
640
  f"""
@@ -657,10 +638,10 @@ def sticky_header(title, message):
657
  # =========================
658
  if st.session_state.app_step == "intro":
659
  st.header("Welcome!")
660
- st.markdown("This software is developed by *Smart Thinking AI-Solutions Team* to estimate Gamma Ray (GR) from input features.")
661
  st.subheader("How It Works")
662
  st.markdown(
663
- "1) **Upload your data to build the case and preview the performance of our model.** \n"
664
  "2) Click **Run Model** to compute metrics and plots. \n"
665
  "3) **Proceed to Validation** (with actual GR) or **Proceed to Prediction** (no GR)."
666
  )
@@ -689,18 +670,15 @@ if st.session_state.app_step == "dev":
689
  st.session_state.dev_preview = True
690
 
691
  run = st.sidebar.button("Run Model", type="primary", use_container_width=True)
692
- if st.sidebar.button("Proceed to Validation ▶", use_container_width=True):
693
- st.session_state.app_step="validate"; st.rerun()
694
- if st.sidebar.button("Proceed to Prediction ▶", use_container_width=True):
695
- st.session_state.app_step="predict"; st.rerun()
696
 
697
- # Sticky helper
698
  if st.session_state.dev_file_loaded and st.session_state.dev_preview:
699
  sticky_header("Case Building", "Previewed ✓ — now click **Run Model**.")
700
  elif st.session_state.dev_file_loaded:
701
- sticky_header("Case Building", "📄 **Preview uploaded data** using the sidebar button, then click **Run Model**.")
702
  else:
703
- sticky_header("Case Building", "**Upload your data to build a case, then run the model to review development performance.**")
704
 
705
  if run and st.session_state.dev_file_bytes:
706
  book = read_book_bytes(st.session_state.dev_file_bytes)
@@ -709,18 +687,20 @@ if st.session_state.app_step == "dev":
709
  if sh_train is None or sh_test is None:
710
  st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training and Test/Testing sheets.</div>', unsafe_allow_html=True)
711
  st.stop()
712
- tr = normalize_df(book[sh_train].copy()); te = normalize_df(book[sh_test].copy())
 
 
713
  if not (ensure_cols(tr, FEATURES) and ensure_cols(te, FEATURES)):
714
  st.markdown('<div class="st-message-box st-error">Missing required feature columns.</div>', unsafe_allow_html=True)
715
  st.stop()
716
 
717
- # predictions (handle log targets)
718
  tr_pred_raw = model.predict(tr[FEATURES])
719
  te_pred_raw = model.predict(te[FEATURES])
720
  tr["GR_Pred"] = inverse_target(np.asarray(tr_pred_raw, dtype=float), TARGET_TRANSFORM)
721
  te["GR_Pred"] = inverse_target(np.asarray(te_pred_raw, dtype=float), TARGET_TRANSFORM)
722
 
723
- # actual GR (for metrics/plots)
724
  tr["GR_Actual"] = to_actual_series(tr, TARGET, ACTUAL_COL, TARGET_TRANSFORM)
725
  te["GR_Actual"] = to_actual_series(te, TARGET, ACTUAL_COL, TARGET_TRANSFORM)
726
 
@@ -740,23 +720,18 @@ if st.session_state.app_step == "dev":
740
  st.session_state.train_ranges = {f:(float(tr_min[f]), float(tr_max[f])) for f in FEATURES}
741
  st.markdown('<div class="st-message-box st-success">Case has been built and results are displayed below.</div>', unsafe_allow_html=True)
742
 
743
- # -------- Metrics + Plots (3 decimals here) --------
744
  def _dev_block(df, m):
745
  c1, c2, c3 = st.columns(3)
746
  c1.metric("R", f"{m['R']:.3f}")
747
  c2.metric("RMSE", f"{m['RMSE']:.3f}")
748
  c3.metric("MAE", f"{m['MAE']:.3f}")
749
-
750
- st.markdown(
751
- """
752
  <div style='text-align:left;font-size:0.8em;color:#6b7280;margin-top:-16px;margin-bottom:8px;'>
753
  <strong>R:</strong> Pearson Correlation Coefficient<br>
754
  <strong>RMSE:</strong> Root Mean Square Error<br>
755
  <strong>MAE:</strong> Mean Absolute Error
756
  </div>
757
- """,
758
- unsafe_allow_html=True,
759
- )
760
 
761
  col_track, col_cross = st.columns([2, 3], gap="large")
762
  with col_track:
@@ -771,11 +746,10 @@ if st.session_state.app_step == "dev":
771
  if "Train" in st.session_state.results or "Test" in st.session_state.results:
772
  tab1, tab2 = st.tabs(["Training", "Testing"])
773
  if "Train" in st.session_state.results:
774
- with tab1:
775
- _dev_block(st.session_state.results["Train"], st.session_state.results["m_train"])
776
  if "Test" in st.session_state.results:
777
- with tab2:
778
- _dev_block(st.session_state.results["Test"], st.session_state.results["m_test"])
779
 
780
  # =========================
781
  # VALIDATION (with actual GR)
@@ -791,10 +765,8 @@ if st.session_state.app_step == "validate":
791
  if st.sidebar.button("Preview data", use_container_width=True, disabled=(up is None)):
792
  st.session_state.show_preview_modal = True
793
  go_btn = st.sidebar.button("Predict & Validate", type="primary", use_container_width=True)
794
- if st.sidebar.button("⬅ Back to Case Building", use_container_width=True):
795
- st.session_state.app_step="dev"; st.rerun()
796
- if st.sidebar.button("Proceed to Prediction ▶", use_container_width=True):
797
- st.session_state.app_step="predict"; st.rerun()
798
 
799
  sticky_header("Validate the Model", "Upload a dataset with the same **features** and **GR** to evaluate performance.")
800
 
@@ -803,10 +775,9 @@ if st.session_state.app_step == "validate":
803
  name = find_sheet(book, ["Validation","Validate","validation2","Val","val"]) or list(book.keys())[0]
804
  df = normalize_df(book[name].copy())
805
  if not ensure_cols(df, FEATURES):
806
- st.markdown('<div class="st-message-box st-error">Missing required feature columns.</div>', unsafe_allow_html=True)
807
- st.stop()
808
 
809
- pred_raw = model.predict(df[FEATURES]) # <-- fixed here
810
  df["GR_Pred"] = inverse_target(np.asarray(pred_raw, dtype=float), TARGET_TRANSFORM)
811
  try:
812
  df["GR_Actual"] = to_actual_series(df, TARGET, ACTUAL_COL, TARGET_TRANSFORM)
@@ -823,9 +794,10 @@ if st.session_state.app_step == "validate":
823
  if any_viol.any():
824
  tbl = df.loc[any_viol, FEATURES].copy()
825
  for c in FEATURES:
826
- if pd.api.types.is_numeric_dtype(tbl[c]):
827
- tbl[c] = tbl[c].round(2)
828
- tbl["Violations"] = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in FEATURES}).loc[any_viol].apply(lambda r:", ".join([c for c,v in r.items() if v]), axis=1)
 
829
 
830
  st.session_state.results["m_val"]={
831
  "R": pearson_r(df["GR_Actual"], df["GR_Pred"]),
@@ -838,9 +810,7 @@ if st.session_state.app_step == "validate":
838
  if "Validate" in st.session_state.results:
839
  m = st.session_state.results["m_val"]
840
  c1,c2,c3 = st.columns(3)
841
- c1.metric("R", f"{m['R']:.2f}")
842
- c2.metric("RMSE", f"{m['RMSE']:.2f}")
843
- c3.metric("MAE", f"{m['MAE']:.2f}")
844
  st.markdown("""
845
  <div style='text-align:left;font-size:0.8em;color:#6b7280;margin-top:-16px;margin-bottom:8px;'>
846
  <strong>R:</strong> Pearson Correlation Coefficient<br>
@@ -852,20 +822,18 @@ if st.session_state.app_step == "validate":
852
  col_track, col_cross = st.columns([2, 3], gap="large")
853
  with col_track:
854
  st.plotly_chart(
855
- track_plot(st.session_state.results["Validate"],
856
- include_actual=True, pred_col="GR_Pred", actual_col="GR_Actual"),
857
  use_container_width=False, config={"displayModeBar": False, "scrollZoom": True}
858
  )
859
  with col_cross:
860
- st.pyplot(
861
- cross_plot_static(st.session_state.results["Validate"]["GR_Actual"],
862
- st.session_state.results["Validate"]["GR_Pred"]),
863
- use_container_width=False
864
- )
865
 
866
  sv = st.session_state.results["sv_val"]
867
- if sv["oor"] > 0:
868
- st.markdown('<div class="st-message-box st-warning">Some inputs fall outside **training min–max** ranges.</div>', unsafe_allow_html=True)
869
  if st.session_state.results["oor_tbl"] is not None:
870
  st.write("*Out-of-range rows (vs. Training min–max):*")
871
  df_centered_rounded(st.session_state.results["oor_tbl"])
@@ -884,57 +852,51 @@ if st.session_state.app_step == "predict":
884
  if st.sidebar.button("Preview data", use_container_width=True, disabled=(up is None)):
885
  st.session_state.show_preview_modal = True
886
 
887
- if st.sidebar.button("Predict", type="primary", use_container_width=True):
888
- if up is not None:
889
- book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
890
- df = normalize_df(book[name].copy())
891
- if not ensure_cols(df, FEATURES):
892
- st.markdown('<div class="st-message-box st-error">Missing required feature columns.</div>', unsafe_allow_html=True)
893
- st.stop()
894
- pred_raw = model.predict(df[FEATURES])
895
- df["GR_Pred"] = inverse_target(np.asarray(pred_raw, dtype=float), TARGET_TRANSFORM)
896
- st.session_state.results["PredictOnly"]=df
897
- ranges = st.session_state.train_ranges
898
- oor_pct = 0.0
899
- oor_tbl = None
900
- if ranges:
901
- any_viol = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in FEATURES}).any(axis=1)
902
- oor_pct = float(any_viol.mean()*100.0)
903
- if any_viol.any():
904
- oor_tbl = df.loc[any_viol, FEATURES].copy()
905
- for c in FEATURES:
906
- if pd.api.types.is_numeric_dtype(oor_tbl[c]):
907
- oor_tbl[c] = oor_tbl[c].round(2)
908
- oor_tbl["Violations"] = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in FEATURES}).loc[any_viol].apply(lambda r:", ".join([c for c,v in r.items() if v]), axis=1)
909
- st.session_state.results["sv_pred"]={
910
- "n":len(df),
911
- "pred_min":float(df["GR_Pred"].min()),
912
- "pred_max":float(df["GR_Pred"].max()),
913
- "pred_mean":float(df["GR_Pred"].mean()),
914
- "pred_std":float(df["GR_Pred"].std(ddof=0)),
915
- "oor":oor_pct
916
- }
917
- st.session_state.results["oor_tbl_pred"] = oor_tbl
918
-
919
- if st.sidebar.button("⬅ Back to Case Building", use_container_width=True):
920
- st.session_state.app_step="dev"
921
- st.rerun()
922
 
923
  sticky_header("Prediction", "Upload a dataset with the feature columns (no **GR**).")
924
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
925
  if "PredictOnly" in st.session_state.results:
926
- df = st.session_state.results["PredictOnly"]
927
- sv = st.session_state.results["sv_pred"]
928
  col_left, col_right = st.columns([2,3], gap="large")
929
  with col_left:
930
  table = pd.DataFrame({
931
  "Metric": ["# points","Pred min","Pred max","Pred mean","Pred std","OOR %"],
932
- "Value": [sv["n"],
933
- round(sv["pred_min"],2),
934
- round(sv["pred_max"],2),
935
- round(sv["pred_mean"],2),
936
- round(sv["pred_std"],2),
937
- f'{sv["oor"]:.1f}%']
938
  })
939
  st.markdown('<div class="st-message-box st-success">Predictions ready ✓</div>', unsafe_allow_html=True)
940
  df_centered_rounded(table, hide_index=True)
@@ -951,6 +913,8 @@ if st.session_state.app_step == "predict":
951
  use_container_width=False, config={"displayModeBar": False, "scrollZoom": True}
952
  )
953
 
 
 
954
  # =========================
955
  # Preview modal (re-usable)
956
  # =========================
@@ -972,32 +936,23 @@ if st.session_state.show_preview_modal:
972
  df = normalize_df(book_to_preview[name])
973
  t1, t2 = st.tabs(["Tracks", "Summary"])
974
  with t1:
975
- if any(c in df.columns for c in FEATURES):
976
- st.pyplot(preview_tracks(df, FEATURES), use_container_width=True)
 
977
  else:
978
- st.info(f"None of the expected feature columns were found in this sheet. "
979
- f"Expected any of: {FEATURES}. Found: {list(df.columns)}")
980
  with t2:
981
  present = [c for c in FEATURES if c in df.columns]
982
  if present:
983
  tbl = (df[present]
984
- .agg(['min','max','mean','std'])
985
- .T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"}))
986
- df_centered_rounded(tbl.reset_index(names="Feature"))
 
987
  else:
988
  st.info("No expected feature columns found to summarize.")
989
  st.session_state.show_preview_modal = False
990
 
991
- # === Bottom-of-page Export (per step) =========================================
992
- if st.session_state.app_step in ("dev", "validate", "predict"):
993
- has_results = any(
994
- k in st.session_state.results
995
- for k in ("Train", "Test", "Validate", "PredictOnly")
996
- )
997
- if has_results:
998
- render_export_button(key=f"export_{st.session_state.app_step}")
999
- # ==============================================================================
1000
-
1001
  # =========================
1002
  # Footer
1003
  # =========================
 
1
+ # app.py — ST_Log_GR (Gamma Ray) UI aligned with TS/Tc apps
2
+
3
  import io, json, os, base64, math
4
  from pathlib import Path
5
  import streamlit as st
 
8
  import joblib
9
  from datetime import datetime
10
 
11
+ # Matplotlib (static plots: preview, cross-plot)
12
  import matplotlib
13
  matplotlib.use("Agg")
14
  import matplotlib.pyplot as plt
 
25
 
26
  FEATURES = ["GPM", "SPP", "RPM", "WOB", "T", "ROP"]
27
 
28
+ # Target used during training
29
+ TARGET = "log(GR)" # set to your training target column name if different
30
 
31
+ # Inverse transform used to map predictions/target back to API
32
+ TARGET_TRANSFORM = "log10" # "log10" for log10(GR); "ln" for ln(GR); "none" for raw
33
 
34
+ # Column with actual GR in API units (if present)
35
  ACTUAL_COL = "GR"
36
 
37
  MODELS_DIR = Path("models")
 
41
  COLORS = {"pred": "#1f77b4", "actual": "#f2b702", "ref": "#5a5a5a"}
42
 
43
  # ---- Plot sizing controls ----
44
+ CROSS_W = 350
45
  CROSS_H = 350
46
+ TRACK_H = 1000
47
+ TRACK_W = 500
48
  FONT_SZ = 13
49
  BOLD_FONT = "Arial Black, Arial, sans-serif"
50
 
 
53
  # =========================
54
  st.set_page_config(page_title=APP_NAME, page_icon="logo.png", layout="wide")
55
 
 
56
  st.markdown("""
57
  <style>
58
  .brand-logo { width: 200px; height: auto; object-fit: contain; }
 
 
 
59
  .centered-container { display:flex; flex-direction:column; align-items:center; text-align:center; }
60
+ .main .block-container { overflow: unset !important; }
61
+ div[data-testid="stVerticalBlock"] { overflow: unset !important; }
62
+
63
+ /* Sticky preview expander + its tabs */
64
+ div[data-testid="stExpander"] > details > summary {
65
+ position: sticky; top: 0; z-index: 10; background: #fff; border-bottom: 1px solid #eee;
66
+ }
67
+ div[data-testid="stExpander"] div[data-baseweb="tab-list"] {
68
+ position: sticky; top: 42px; z-index: 9; background: #fff; padding-top: 6px;
69
+ }
70
+
71
+ /* Hide uploader helper text */
72
+ section[data-testid="stFileUploader"] div[data-testid="stMarkdownContainer"]{display:none !important;}
73
+ section[data-testid="stFileUploader"] [data-testid="stFileUploaderDropzone"] > div:first-child{display:none !important;}
74
+ section[data-testid="stFileUploader"] [data-testid="stFileUploaderInstructions"]{display:none !important;}
75
+ section[data-testid="stFileUploader"] p, section[data-testid="stFileUploader"] small{display:none !important;}
76
+
77
+ /* Message boxes */
78
+ .st-message-box { background:#f0f2f6; color:#333; padding:10px; border-radius:10px; border:1px solid #e6e9ef; }
79
+ .st-message-box.st-success { background:#d4edda; color:#155724; border-color:#c3e6cb; }
80
+ .st-message-box.st-warning { background:#fff3cd; color:#856404; border-color:#ffeeba; }
81
+ .st-message-box.st-error { background:#f8d7da; color:#721c24; border-color:#f5c6cb; }
82
  </style>
83
  """, unsafe_allow_html=True)
84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  TABLE_CENTER_CSS = [
86
  dict(selector="th", props=[("text-align", "center")]),
87
  dict(selector="td", props=[("text-align", "center")]),
88
  ]
89
 
 
 
 
 
 
 
 
 
 
 
90
  # =========================
91
  # Password gate
92
  # =========================
93
  def inline_logo(path="logo.png") -> str:
94
  try:
95
  p = Path(path)
96
+ if not p.exists(): return ""
 
97
  return f"data:image/png;base64,{base64.b64encode(p.read_bytes()).decode('ascii')}"
98
  except Exception:
99
  return ""
 
139
  def pearson_r(y_true, y_pred) -> float:
140
  a = np.asarray(y_true, dtype=float)
141
  p = np.asarray(y_pred, dtype=float)
142
+ if a.size < 2 or np.all(a == a[0]) or np.all(p == p[0]): return float("nan")
 
 
143
  return float(np.corrcoef(a, p)[0, 1])
144
 
145
  @st.cache_resource(show_spinner=False)
 
157
 
158
  def normalize_df(df: pd.DataFrame) -> pd.DataFrame:
159
  out = df.copy()
160
+ out.columns = [str(c).strip().replace(" ", " ") for c in out.columns]
161
  return out
162
 
163
+ def ensure_cols(df: pd.DataFrame, cols: list[str]) -> bool:
164
  miss = [c for c in cols if c not in df.columns]
165
  if miss:
166
  st.error(f"Missing columns: {miss}\nFound: {list(df.columns)}")
 
170
  def find_sheet(book, names):
171
  low2orig = {k.lower(): k for k in book.keys()}
172
  for nm in names:
173
+ if nm.lower() in low2orig: return low2orig[nm.lower()]
 
174
  return None
175
 
176
  def _nice_tick0(xmin: float, step: int = 5) -> float:
 
181
  numcols = out.select_dtypes(include=[np.number]).columns
182
  styler = (
183
  out.style
184
+ .format({c: "{:.2f}" for c in numcols})
185
+ .set_properties(**{"text-align": "center"})
186
+ .set_table_styles(TABLE_CENTER_CSS)
187
  )
188
  st.dataframe(styler, use_container_width=True, hide_index=hide_index)
189
 
 
197
  return x # "none"
198
 
199
  def to_actual_series(df: pd.DataFrame, target_col: str, actual_col_hint: str, transform: str) -> pd.Series:
200
+ # Prefer explicit GR column if available; else invert target
 
 
 
201
  if actual_col_hint and actual_col_hint in df.columns:
202
  return pd.Series(df[actual_col_hint], dtype=float)
203
  if target_col in df.columns:
 
206
  return pd.Series(df["GR"], dtype=float)
207
  raise ValueError("Cannot find actual GR column or target to invert.")
208
 
209
+ # === Excel export helpers (TS/Tc-style multiselect) =======================
210
  def _excel_engine() -> str:
211
  try:
212
  import xlsxwriter # noqa: F401
 
228
 
229
  def _summary_table(df: pd.DataFrame, cols: list[str]) -> pd.DataFrame:
230
  cols = [c for c in cols if c in df.columns]
231
+ if not cols: return pd.DataFrame()
 
232
  tbl = (df[cols]
233
  .agg(['min','max','mean','std'])
234
  .T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"})
235
  .reset_index(names="Field"))
236
+ return _round_numeric(tbl, 2)
237
 
238
  def _train_ranges_df(ranges: dict[str, tuple[float, float]]) -> pd.DataFrame:
239
+ if not ranges: return pd.DataFrame()
 
240
  df = pd.DataFrame(ranges).T.reset_index()
241
  df.columns = ["Feature", "Min", "Max"]
242
+ return _round_numeric(df, 2)
243
 
244
+ def _excel_autofit(writer, sheet_name: str, df: pd.DataFrame, min_w: int = 8, max_w: int = 40):
245
+ try:
246
+ import xlsxwriter # noqa: F401
247
+ except Exception:
248
+ return
249
+ ws = writer.sheets[sheet_name]
250
+ for i, col in enumerate(df.columns):
251
+ series = df[col].astype(str)
252
+ max_len = max([len(str(col))] + series.map(len).tolist())
253
+ ws.set_column(i, i, max(min_w, min(max_len + 2, max_w)))
254
+ ws.freeze_panes(1, 0)
255
+
256
+ def _available_sections() -> list[str]:
257
  res = st.session_state.get("results", {})
258
+ sections = []
259
+ if "Train" in res: sections += ["Training","Training_Metrics","Training_Summary"]
260
+ if "Test" in res: sections += ["Testing","Testing_Metrics","Testing_Summary"]
261
+ if "Validate" in res: sections += ["Validation","Validation_Metrics","Validation_Summary","Validation_OOR"]
262
+ if "PredictOnly" in res: sections += ["Prediction","Prediction_Summary","Prediction_OOR"]
263
+ if st.session_state.get("train_ranges"): sections += ["Training_Ranges"]
264
+ sections += ["Info"]
265
+ return sections
266
+
267
+ def build_export_workbook(selected: list[str], ndigits: int = 2, do_autofit: bool = True) -> tuple[bytes|None, str|None, list[str]]:
268
+ res = st.session_state.get("results", {})
269
+ if not res: return None, None, []
270
 
271
  sheets: dict[str, pd.DataFrame] = {}
272
  order: list[str] = []
273
 
274
  # Training
275
+ if "Training" in selected and "Train" in res:
276
+ sheets["Training"] = _round_numeric(res["Train"], ndigits); order.append("Training")
277
+ if "Training_Metrics" in selected and res.get("m_train"):
278
+ sheets["Training_Metrics"] = _round_numeric(pd.DataFrame([res["m_train"]]), ndigits); order.append("Training_Metrics")
279
+ if "Training_Summary" in selected and "Train" in res:
280
+ tr_cols = FEATURES + [c for c in ["GR_Actual","GR_Pred"] if c in res["Train"].columns]
281
+ s = _summary_table(res["Train"], tr_cols)
 
282
  if not s.empty:
283
  sheets["Training_Summary"] = s; order.append("Training_Summary")
284
 
285
  # Testing
286
+ if "Testing" in selected and "Test" in res:
287
+ sheets["Testing"] = _round_numeric(res["Test"], ndigits); order.append("Testing")
288
+ if "Testing_Metrics" in selected and res.get("m_test"):
289
+ sheets["Testing_Metrics"] = _round_numeric(pd.DataFrame([res["m_test"]]), ndigits); order.append("Testing_Metrics")
290
+ if "Testing_Summary" in selected and "Test" in res:
291
+ te_cols = FEATURES + [c for c in ["GR_Actual","GR_Pred"] if c in res["Test"].columns]
292
+ s = _summary_table(res["Test"], te_cols)
 
293
  if not s.empty:
294
  sheets["Testing_Summary"] = s; order.append("Testing_Summary")
295
 
296
  # Validation
297
+ if "Validation" in selected and "Validate" in res:
298
+ sheets["Validation"] = _round_numeric(res["Validate"], ndigits); order.append("Validation")
299
+ if "Validation_Metrics" in selected and res.get("m_val"):
300
+ sheets["Validation_Metrics"] = _round_numeric(pd.DataFrame([res["m_val"]]), ndigits); order.append("Validation_Metrics")
301
+ if "Validation_Summary" in selected and res.get("sv_val"):
302
+ sheets["Validation_Summary"] = _round_numeric(pd.DataFrame([res["sv_val"]]), ndigits); order.append("Validation_Summary")
303
+ if "Validation_OOR" in selected and isinstance(res.get("oor_tbl"), pd.DataFrame) and not res["oor_tbl"].empty:
304
+ sheets["Validation_OOR"] = _round_numeric(res["oor_tbl"].reset_index(drop=True), ndigits); order.append("Validation_OOR")
305
+
306
+ # Prediction
307
+ if "Prediction" in selected and "PredictOnly" in res:
308
+ sheets["Prediction"] = _round_numeric(res["PredictOnly"], ndigits); order.append("Prediction")
309
+ if "Prediction_Summary" in selected and res.get("sv_pred"):
310
+ sheets["Prediction_Summary"] = _round_numeric(pd.DataFrame([res["sv_pred"]]), ndigits); order.append("Prediction_Summary")
311
+ if "Prediction_OOR" in selected and isinstance(res.get("oor_tbl_pred"), pd.DataFrame) and not res["oor_tbl_pred"].empty:
312
+ sheets["Prediction_OOR"] = _round_numeric(res["oor_tbl_pred"].reset_index(drop=True), ndigits); order.append("Prediction_OOR")
 
 
 
 
 
 
 
313
 
314
  # Training ranges
315
+ if "Training_Ranges" in selected and st.session_state.get("train_ranges"):
316
+ sheets["Training_Ranges"] = _train_ranges_df(st.session_state["train_ranges"]); order.append("Training_Ranges")
317
+
318
+ # Info
319
+ if "Info" in selected:
320
+ info = pd.DataFrame([
321
+ {"Key": "AppName", "Value": APP_NAME},
322
+ {"Key": "Tagline", "Value": TAGLINE},
323
+ {"Key": "Target", "Value": TARGET},
324
+ {"Key": "TargetTransform", "Value": TARGET_TRANSFORM},
325
+ {"Key": "ActualColumn", "Value": ACTUAL_COL},
326
+ {"Key": "Features", "Value": ", ".join(FEATURES)},
327
+ {"Key": "ExportedAt", "Value": datetime.now().strftime("%Y-%m-%d %H:%M:%S")},
328
+ ])
329
+ sheets["Info"] = info; order.append("Info")
330
+
331
+ if not order: return None, None, []
332
+
 
333
  bio = io.BytesIO()
334
+ engine = _excel_engine()
335
+ with pd.ExcelWriter(bio, engine=engine) as writer:
336
  for name in order:
337
  df = sheets[name]
338
+ sheet = _excel_safe_name(name)
339
+ df.to_excel(writer, sheet_name=sheet, index=False)
340
+ _excel_autofit(writer, sheet, df)
341
  bio.seek(0)
 
342
  fname = f"GR_Export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
343
  return bio.getvalue(), fname, order
344
 
345
+ def render_export_button(phase_key: str) -> None:
346
+ res = st.session_state.get("results", {})
347
+ if not res: return
348
  st.divider()
349
  st.markdown("### Export to Excel")
350
+
351
+ options = _available_sections()
352
+ selected_sheets = st.multiselect(
353
+ "Sheets to include",
354
+ options=options,
355
+ default=[],
356
+ placeholder="Choose option(s)",
357
+ help="Pick the sheets you want to include in the Excel export.",
358
+ key=f"sheets_{phase_key}",
359
+ )
360
+
361
+ if not selected_sheets:
362
+ st.caption("Select one or more sheets above to enable the export.")
363
+ st.download_button("⬇️ Export Excel", data=b"", file_name="GR_Export.xlsx",
364
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
365
+ disabled=True, key=f"download_{phase_key}")
366
+ return
367
+
368
+ data, fname, names = build_export_workbook(selected=selected_sheets, ndigits=2, do_autofit=True)
369
  if names:
370
+ st.caption("Will include: " + ", ".join(names))
371
  st.download_button(
372
+ "⬇️ Export Excel",
373
  data=(data or b""),
374
  file_name=(fname or "GR_Export.xlsx"),
375
  mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
376
  disabled=(data is None),
377
+ key=f"download_{phase_key}",
 
378
  )
379
 
380
  # =========================
381
+ # Cross plot (Matplotlib)
382
  # =========================
383
  def _nice_bounds(arr_min, arr_max, n_ticks=6):
384
  if not np.isfinite(arr_min) or not np.isfinite(arr_max):
 
404
  ticks = np.arange(fixed_min, fixed_max + step, step)
405
 
406
  dpi = 110
407
+ fig, ax = plt.subplots(figsize=(CROSS_W / dpi, CROSS_H / dpi), dpi=dpi, constrained_layout=False)
 
 
 
 
408
 
409
  ax.scatter(a, p, s=14, c=COLORS["pred"], alpha=0.9, linewidths=0)
410
  ax.plot([fixed_min, fixed_max], [fixed_min, fixed_max],
 
412
 
413
  ax.set_xlim(fixed_min, fixed_max)
414
  ax.set_ylim(fixed_min, fixed_max)
415
+ ax.set_xticks(ticks); ax.set_yticks(ticks)
 
416
  ax.set_aspect("equal", adjustable="box")
417
 
418
  fmt = FuncFormatter(lambda x, _: f"{int(x):,}")
419
+ ax.xaxis.set_major_formatter(fmt); ax.yaxis.set_major_formatter(fmt)
 
420
 
421
+ ax.set_xlabel("Actual GR (API)", fontweight="bold", fontsize=10, color="black")
422
+ ax.set_ylabel("Predicted GR (API)", fontweight="bold", fontsize=10, color="black")
423
  ax.tick_params(labelsize=8, colors="black")
424
 
425
  ax.grid(True, linestyle=":", alpha=0.3)
426
  for spine in ax.spines.values():
427
+ spine.set_linewidth(1.1); spine.set_color("#444")
 
428
 
429
  fig.subplots_adjust(left=0.16, bottom=0.16, right=0.98, top=0.98)
430
  return fig
 
433
  # Track plot (Plotly) — y-axis reversed
434
  # =========================
435
  def track_plot(df, include_actual=True, pred_col="GR_Pred", actual_col="GR"):
 
436
  def _col_1d(frame: pd.DataFrame, col: str) -> pd.Series:
437
+ if col not in frame.columns: return pd.Series(dtype=float)
 
438
  v = frame[col]
439
+ if isinstance(v, pd.DataFrame): v = v.iloc[:, 0]
 
440
  return pd.Series(v, dtype=float)
441
 
 
442
  depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
443
  if depth_col is not None:
444
+ y = pd.Series(df[depth_col]).astype(float); ylab = depth_col
 
445
  else:
446
+ y = pd.Series(np.arange(1, len(df) + 1), dtype=float); ylab = "Point Index"
 
447
 
 
448
  x_pred = _col_1d(df, pred_col)
449
  if include_actual and actual_col in df.columns:
450
  x_act = _col_1d(df, actual_col)
 
478
  paper_bgcolor="#fff", plot_bgcolor="#fff",
479
  margin=dict(l=64, r=16, t=36, b=48), hovermode="closest",
480
  font=dict(size=FONT_SZ, color="#000"),
481
+ legend=dict(x=0.98, y=0.05, xanchor="right", yanchor="bottom",
482
+ bgcolor="rgba(255,255,255,0.75)", bordercolor="#ccc", borderwidth=1),
 
 
483
  legend_title_text=""
484
  )
485
  fig.update_xaxes(
 
487
  title_font=dict(size=20, family=BOLD_FONT, color="#000"),
488
  tickfont=dict(size=15, family=BOLD_FONT, color="#000"),
489
  side="top", range=[xmin, xmax],
490
+ ticks="outside",
491
+ tickformat=",.0f",
492
+ tickmode="auto",
493
+ tick0=tick0,
494
  showline=True, linewidth=1.2, linecolor="#444", mirror=True,
495
+ showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True,
496
  )
 
497
  fig.update_yaxes(
498
  title_text=ylab,
499
  title_font=dict(size=20, family=BOLD_FONT, color="#000"),
 
501
  autorange="reversed",
502
  ticks="outside",
503
  showline=True, linewidth=1.2, linecolor="#444", mirror=True,
504
+ showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True,
505
  )
 
506
  return fig
507
 
508
+ # ---------- Preview (Matplotlib) — colorful tracks; shared Y; ticks only left ----------
509
  def preview_tracks(df: pd.DataFrame, cols: list[str]):
510
  cols = [c for c in cols if c in df.columns]
511
  n = len(cols)
512
  if n == 0:
513
  fig, ax = plt.subplots(figsize=(4, 2))
514
+ ax.text(0.5, 0.5, "No selected columns", ha="center", va="center")
515
+ ax.axis("off")
516
  return fig
517
 
518
  depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
519
  if depth_col is not None:
520
+ y = pd.to_numeric(df[depth_col], errors="coerce")
521
  ylab = depth_col
522
  else:
523
  y = pd.Series(np.arange(1, len(df) + 1), dtype=float)
524
  ylab = "Point Index"
525
 
526
+ # Stable qualitative colors
527
+ cmap = plt.get_cmap("tab20")
528
+ col_colors = {col: cmap(i % cmap.N) for i, col in enumerate(cols)}
529
+
530
+ fig, axes = plt.subplots(1, n, figsize=(2.3 * n, 7.0), sharey=True, dpi=100)
531
  if n == 1:
532
  axes = [axes]
533
 
534
+ y_min, y_max = float(np.nanmin(y)), float(np.nanmax(y))
535
+
536
+ for i, (ax, col) in enumerate(zip(axes, cols)):
537
+ x = pd.to_numeric(df[col], errors="coerce")
538
+ ax.plot(x, y, '-', lw=1.8, color=col_colors[col])
539
  ax.set_xlabel(col)
540
+ ax.xaxis.set_label_position('top'); ax.xaxis.tick_top()
541
+ ax.set_ylim(y_max, y_min) # reverse Y (Depth down)
 
 
542
  ax.grid(True, linestyle=":", alpha=0.3)
543
+ if i == 0:
544
+ ax.set_ylabel(ylab)
545
+ else:
546
+ # Hide Y ticks and labels for non-left tracks
547
+ ax.tick_params(axis='y', left=False, labelleft=False)
548
 
549
+ fig.tight_layout()
550
  return fig
551
 
 
 
 
 
 
 
 
 
 
 
 
 
552
  # =========================
553
  # Load model + meta
554
  # =========================
 
566
  r.raise_for_status()
567
  with open(DEFAULT_MODEL, "wb") as f:
568
  for chunk in r.iter_content(1<<20):
569
+ if chunk: f.write(chunk)
 
570
  return DEFAULT_MODEL
571
  except Exception:
572
  return None
 
605
  st.session_state.setdefault("show_preview_modal", False)
606
 
607
  # =========================
608
+ # Sidebar branding
609
  # =========================
610
  st.sidebar.markdown(f"""
611
  <div class="centered-container">
 
616
  """, unsafe_allow_html=True
617
  )
618
 
 
619
  def sticky_header(title, message):
620
  st.markdown(
621
  f"""
 
638
  # =========================
639
  if st.session_state.app_step == "intro":
640
  st.header("Welcome!")
641
+ st.markdown("This software is developed by *Smart Thinking AI-Solutions Team* to estimate **Gamma Ray (GR)** from drilling data.")
642
  st.subheader("How It Works")
643
  st.markdown(
644
+ "1) **Upload your data to build the case and preview model performance.** \n"
645
  "2) Click **Run Model** to compute metrics and plots. \n"
646
  "3) **Proceed to Validation** (with actual GR) or **Proceed to Prediction** (no GR)."
647
  )
 
670
  st.session_state.dev_preview = True
671
 
672
  run = st.sidebar.button("Run Model", type="primary", use_container_width=True)
673
+ if st.sidebar.button("Proceed to Validation ▶", use_container_width=True): st.session_state.app_step="validate"; st.rerun()
674
+ if st.sidebar.button("Proceed to Prediction ▶", use_container_width=True): st.session_state.app_step="predict"; st.rerun()
 
 
675
 
 
676
  if st.session_state.dev_file_loaded and st.session_state.dev_preview:
677
  sticky_header("Case Building", "Previewed ✓ — now click **Run Model**.")
678
  elif st.session_state.dev_file_loaded:
679
+ sticky_header("Case Building", "📄 **Preview uploaded data**, then click **Run Model**.")
680
  else:
681
+ sticky_header("Case Building", "Upload your data to build a case, then run the model to review development performance.")
682
 
683
  if run and st.session_state.dev_file_bytes:
684
  book = read_book_bytes(st.session_state.dev_file_bytes)
 
687
  if sh_train is None or sh_test is None:
688
  st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training and Test/Testing sheets.</div>', unsafe_allow_html=True)
689
  st.stop()
690
+
691
+ tr = normalize_df(book[sh_train].copy())
692
+ te = normalize_df(book[sh_test].copy())
693
  if not (ensure_cols(tr, FEATURES) and ensure_cols(te, FEATURES)):
694
  st.markdown('<div class="st-message-box st-error">Missing required feature columns.</div>', unsafe_allow_html=True)
695
  st.stop()
696
 
697
+ # Predict (model trained on transformed target)
698
  tr_pred_raw = model.predict(tr[FEATURES])
699
  te_pred_raw = model.predict(te[FEATURES])
700
  tr["GR_Pred"] = inverse_target(np.asarray(tr_pred_raw, dtype=float), TARGET_TRANSFORM)
701
  te["GR_Pred"] = inverse_target(np.asarray(te_pred_raw, dtype=float), TARGET_TRANSFORM)
702
 
703
+ # Actual GR for metrics/plots
704
  tr["GR_Actual"] = to_actual_series(tr, TARGET, ACTUAL_COL, TARGET_TRANSFORM)
705
  te["GR_Actual"] = to_actual_series(te, TARGET, ACTUAL_COL, TARGET_TRANSFORM)
706
 
 
720
  st.session_state.train_ranges = {f:(float(tr_min[f]), float(tr_max[f])) for f in FEATURES}
721
  st.markdown('<div class="st-message-box st-success">Case has been built and results are displayed below.</div>', unsafe_allow_html=True)
722
 
 
723
  def _dev_block(df, m):
724
  c1, c2, c3 = st.columns(3)
725
  c1.metric("R", f"{m['R']:.3f}")
726
  c2.metric("RMSE", f"{m['RMSE']:.3f}")
727
  c3.metric("MAE", f"{m['MAE']:.3f}")
728
+ st.markdown("""
 
 
729
  <div style='text-align:left;font-size:0.8em;color:#6b7280;margin-top:-16px;margin-bottom:8px;'>
730
  <strong>R:</strong> Pearson Correlation Coefficient<br>
731
  <strong>RMSE:</strong> Root Mean Square Error<br>
732
  <strong>MAE:</strong> Mean Absolute Error
733
  </div>
734
+ """, unsafe_allow_html=True)
 
 
735
 
736
  col_track, col_cross = st.columns([2, 3], gap="large")
737
  with col_track:
 
746
  if "Train" in st.session_state.results or "Test" in st.session_state.results:
747
  tab1, tab2 = st.tabs(["Training", "Testing"])
748
  if "Train" in st.session_state.results:
749
+ with tab1: _dev_block(st.session_state.results["Train"], st.session_state.results["m_train"])
 
750
  if "Test" in st.session_state.results:
751
+ with tab2: _dev_block(st.session_state.results["Test"], st.session_state.results["m_test"])
752
+ render_export_button(phase_key="dev")
753
 
754
  # =========================
755
  # VALIDATION (with actual GR)
 
765
  if st.sidebar.button("Preview data", use_container_width=True, disabled=(up is None)):
766
  st.session_state.show_preview_modal = True
767
  go_btn = st.sidebar.button("Predict & Validate", type="primary", use_container_width=True)
768
+ if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
769
+ if st.sidebar.button("Proceed to Prediction ▶", use_container_width=True): st.session_state.app_step="predict"; st.rerun()
 
 
770
 
771
  sticky_header("Validate the Model", "Upload a dataset with the same **features** and **GR** to evaluate performance.")
772
 
 
775
  name = find_sheet(book, ["Validation","Validate","validation2","Val","val"]) or list(book.keys())[0]
776
  df = normalize_df(book[name].copy())
777
  if not ensure_cols(df, FEATURES):
778
+ st.markdown('<div class="st-message-box st-error">Missing required feature columns.</div>', unsafe_allow_html=True); st.stop()
 
779
 
780
+ pred_raw = model.predict(df[FEATURES])
781
  df["GR_Pred"] = inverse_target(np.asarray(pred_raw, dtype=float), TARGET_TRANSFORM)
782
  try:
783
  df["GR_Actual"] = to_actual_series(df, TARGET, ACTUAL_COL, TARGET_TRANSFORM)
 
794
  if any_viol.any():
795
  tbl = df.loc[any_viol, FEATURES].copy()
796
  for c in FEATURES:
797
+ if pd.api.types.is_numeric_dtype(tbl[c]): tbl[c] = tbl[c].round(2)
798
+ tbl["Violations"] = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in FEATURES}).loc[any_viol].apply(
799
+ lambda r:", ".join([c for c,v in r.items() if v]), axis=1
800
+ )
801
 
802
  st.session_state.results["m_val"]={
803
  "R": pearson_r(df["GR_Actual"], df["GR_Pred"]),
 
810
  if "Validate" in st.session_state.results:
811
  m = st.session_state.results["m_val"]
812
  c1,c2,c3 = st.columns(3)
813
+ c1.metric("R", f"{m['R']:.3f}"); c2.metric("RMSE", f"{m['RMSE']:.2f}"); c3.metric("MAE", f"{m['MAE']:.2f}")
 
 
814
  st.markdown("""
815
  <div style='text-align:left;font-size:0.8em;color:#6b7280;margin-top:-16px;margin-bottom:8px;'>
816
  <strong>R:</strong> Pearson Correlation Coefficient<br>
 
822
  col_track, col_cross = st.columns([2, 3], gap="large")
823
  with col_track:
824
  st.plotly_chart(
825
+ track_plot(st.session_state.results["Validate"], include_actual=True, pred_col="GR_Pred", actual_col="GR_Actual"),
 
826
  use_container_width=False, config={"displayModeBar": False, "scrollZoom": True}
827
  )
828
  with col_cross:
829
+ st.pyplot(cross_plot_static(st.session_state.results["Validate"]["GR_Actual"],
830
+ st.session_state.results["Validate"]["GR_Pred"]),
831
+ use_container_width=False)
832
+
833
+ render_export_button(phase_key="validate")
834
 
835
  sv = st.session_state.results["sv_val"]
836
+ if sv["oor"] > 0: st.markdown('<div class="st-message-box st-warning">Some inputs fall outside **training min–max** ranges.</div>', unsafe_allow_html=True)
 
837
  if st.session_state.results["oor_tbl"] is not None:
838
  st.write("*Out-of-range rows (vs. Training min–max):*")
839
  df_centered_rounded(st.session_state.results["oor_tbl"])
 
852
  if st.sidebar.button("Preview data", use_container_width=True, disabled=(up is None)):
853
  st.session_state.show_preview_modal = True
854
 
855
+ go_btn = st.sidebar.button("Predict", type="primary", use_container_width=True)
856
+ if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
857
 
858
  sticky_header("Prediction", "Upload a dataset with the feature columns (no **GR**).")
859
 
860
+ if go_btn and up is not None:
861
+ book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
862
+ df = normalize_df(book[name].copy())
863
+ if not ensure_cols(df, FEATURES):
864
+ st.markdown('<div class="st-message-box st-error">Missing required feature columns.</div>', unsafe_allow_html=True); st.stop()
865
+
866
+ pred_raw = model.predict(df[FEATURES])
867
+ df["GR_Pred"] = inverse_target(np.asarray(pred_raw, dtype=float), TARGET_TRANSFORM)
868
+ st.session_state.results["PredictOnly"]=df
869
+
870
+ ranges = st.session_state.train_ranges; oor_pct = 0.0; oor_tbl=None
871
+ if ranges:
872
+ any_viol = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in FEATURES}).any(axis=1)
873
+ oor_pct = float(any_viol.mean()*100.0)
874
+ if any_viol.any():
875
+ oor_tbl = df.loc[any_viol, FEATURES].copy()
876
+ for c in FEATURES:
877
+ if pd.api.types.is_numeric_dtype(oor_tbl[c]): oor_tbl[c] = oor_tbl[c].round(2)
878
+ oor_tbl["Violations"] = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in FEATURES}).loc[any_viol].apply(
879
+ lambda r:", ".join([c for c,v in r.items() if v]), axis=1
880
+ )
881
+ st.session_state.results["sv_pred"]={
882
+ "n":len(df),
883
+ "pred_min":float(df["GR_Pred"].min()),
884
+ "pred_max":float(df["GR_Pred"].max()),
885
+ "pred_mean":float(df["GR_Pred"].mean()),
886
+ "pred_std":float(df["GR_Pred"].std(ddof=0)),
887
+ "oor":oor_pct
888
+ }
889
+ st.session_state.results["oor_tbl_pred"] = oor_tbl
890
+
891
  if "PredictOnly" in st.session_state.results:
892
+ df = st.session_state.results["PredictOnly"]; sv = st.session_state.results["sv_pred"]
893
+
894
  col_left, col_right = st.columns([2,3], gap="large")
895
  with col_left:
896
  table = pd.DataFrame({
897
  "Metric": ["# points","Pred min","Pred max","Pred mean","Pred std","OOR %"],
898
+ "Value": [sv["n"], round(sv["pred_min"],2), round(sv["pred_max"],2),
899
+ round(sv["pred_mean"],2), round(sv["pred_std"],2), f'{sv["oor"]:.1f}%']
 
 
 
 
900
  })
901
  st.markdown('<div class="st-message-box st-success">Predictions ready ✓</div>', unsafe_allow_html=True)
902
  df_centered_rounded(table, hide_index=True)
 
913
  use_container_width=False, config={"displayModeBar": False, "scrollZoom": True}
914
  )
915
 
916
+ render_export_button(phase_key="predict")
917
+
918
  # =========================
919
  # Preview modal (re-usable)
920
  # =========================
 
936
  df = normalize_df(book_to_preview[name])
937
  t1, t2 = st.tabs(["Tracks", "Summary"])
938
  with t1:
939
+ present = [c for c in FEATURES if c in df.columns]
940
+ if present:
941
+ st.pyplot(preview_tracks(df, present), use_container_width=True)
942
  else:
943
+ st.info(f"No expected feature columns found. Expected any of: {FEATURES}. Found: {list(df.columns)}")
 
944
  with t2:
945
  present = [c for c in FEATURES if c in df.columns]
946
  if present:
947
  tbl = (df[present]
948
+ .agg(['min','max','mean','std'])
949
+ .T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"})
950
+ .reset_index(names="Feature"))
951
+ df_centered_rounded(tbl)
952
  else:
953
  st.info("No expected feature columns found to summarize.")
954
  st.session_state.show_preview_modal = False
955
 
 
 
 
 
 
 
 
 
 
 
956
  # =========================
957
  # Footer
958
  # =========================