UCS2014 commited on
Commit
4d43e75
·
verified ·
1 Parent(s): fa1bd3b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +451 -174
app.py CHANGED
@@ -1,6 +1,6 @@
1
  # app.py — ST_Min_Horizontal_Stress (σhmin)
2
- # Streamlit app that LOADS THE MODEL/META FROM USER UPLOADS (memory only; no auth, no saving).
3
- # After the model is in memory, the rest of the workflow (Train/Test/Validate/Predict) is unchanged.
4
 
5
  import io, json, os, base64, math
6
  from pathlib import Path
@@ -29,7 +29,7 @@ TAGLINE = "Real-Time Minimum Horizontal Stress Prediction"
29
  FEATURES = ["Q (gpm)", "SPP (psi)", "ST (kft.lbf)", "WOB (klbf)", "ROP (ft/h)"]
30
  TARGET = "σhmin (MPa)"
31
  PRED_COL = "σhmin_Pred"
32
- ACTUAL_COL = None # If your workbook has a separate actual column, set it via meta.json (actual_col)
33
  TRANSFORM = "none" # "none" | "log10" | "ln"
34
  UNITS = "MPa"
35
 
@@ -42,6 +42,9 @@ BOLD_FONT = "Arial Black, Arial, sans-serif"
42
 
43
  STRICT_VERSION_CHECK = True
44
 
 
 
 
45
  # =========================
46
  # Page / CSS
47
  # =========================
@@ -71,7 +74,7 @@ TABLE_CENTER_CSS = [
71
  ]
72
 
73
  # =========================
74
- # Password gate (optional)
75
  # =========================
76
  def inline_logo(path="logo.png") -> str:
77
  try:
@@ -88,8 +91,8 @@ def add_password_gate() -> None:
88
  required = os.environ.get("APP_PASSWORD", "")
89
 
90
  if not required:
91
- return # disable gate if no password set
92
-
93
  if st.session_state.get("auth_ok", False):
94
  return
95
 
@@ -205,6 +208,266 @@ def _make_X(df: pd.DataFrame, features: list[str]) -> pd.DataFrame:
205
  X[c] = pd.to_numeric(X[c], errors="coerce")
206
  return X
207
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  # =========================
209
  # Session state
210
  # =========================
@@ -216,12 +479,9 @@ st.session_state.setdefault("dev_file_bytes",b"")
216
  st.session_state.setdefault("dev_file_loaded",False)
217
  st.session_state.setdefault("dev_preview",False)
218
  st.session_state.setdefault("show_preview_modal", False)
219
- st.session_state.setdefault("model_loaded", False)
220
- st.session_state.setdefault("model_obj", None)
221
- st.session_state.setdefault("meta_dict", {})
222
 
223
  # =========================
224
- # Sidebar: branding + model upload
225
  # =========================
226
  st.sidebar.markdown(f"""
227
  <div class="centered-container">
@@ -231,68 +491,6 @@ st.sidebar.markdown(f"""
231
  </div>
232
  """, unsafe_allow_html=True)
233
 
234
- with st.sidebar.expander("① Load model (upload)", expanded=True):
235
- up_model = st.file_uploader("Model file (.joblib)", type=["joblib","pkl"], key="mdl_up")
236
- up_meta = st.file_uploader("Meta file (.json)", type=["json"], key="meta_up")
237
- load_btn = st.button("Load model", type="primary")
238
-
239
- if load_btn:
240
- if not up_model:
241
- st.error("Please upload the model .joblib file.")
242
- st.stop()
243
- try:
244
- st.session_state.model_obj = joblib.load(io.BytesIO(up_model.getvalue()))
245
- st.session_state.model_loaded = True
246
- except Exception as e:
247
- st.error(f"Failed to load model: {e}")
248
- st.stop()
249
-
250
- if up_meta:
251
- try:
252
- st.session_state.meta_dict = json.loads(up_meta.getvalue().decode("utf-8"))
253
- except Exception as e:
254
- st.warning(f"Could not parse meta.json: {e}")
255
- st.session_state.meta_dict = {}
256
- else:
257
- st.warning("No meta.json uploaded — using app defaults.")
258
- st.session_state.meta_dict = {}
259
-
260
- st.success("Model loaded in memory ✓")
261
-
262
- # Apply meta (if provided)
263
- meta = st.session_state.meta_dict
264
- if meta:
265
- FEATURES = meta.get("features", FEATURES)
266
- TARGET = meta.get("target", TARGET)
267
- PRED_COL = meta.get("pred_col", PRED_COL)
268
- ACTUAL_COL = meta.get("actual_col", ACTUAL_COL)
269
- TRANSFORM = meta.get("transform", TRANSFORM)
270
- UNITS = meta.get("units", UNITS)
271
- ALIASES = meta.get("feature_aliases")
272
- if STRICT_VERSION_CHECK and meta.get("versions"):
273
- import numpy as _np, sklearn as _skl
274
- mv = meta["versions"]; msg=[]
275
- if mv.get("numpy") and mv["numpy"] != _np.__version__:
276
- msg.append(f"NumPy {mv['numpy']} expected, running {_np.__version__}")
277
- if mv.get("scikit_learn") and mv["scikit_learn"] != _skl.__version__:
278
- msg.append(f"scikit-learn {mv['scikit_learn']} expected, running {_skl.__version__}")
279
- if msg:
280
- st.warning("Environment mismatch: " + " | ".join(msg))
281
- else:
282
- ALIASES = None
283
-
284
- # Guard: require model first
285
- if not st.session_state.model_loaded:
286
- st.header("Welcome!")
287
- st.info("Upload your **model** (.joblib) and optional **meta.json** in the left sidebar, then click **Load model**.")
288
- st.stop()
289
-
290
- # Keep a short alias
291
- model = st.session_state.model_obj
292
-
293
- # =========================
294
- # Sticky header helper
295
- # =========================
296
  def sticky_header(title, message):
297
  st.markdown(
298
  f"""
@@ -310,20 +508,161 @@ def sticky_header(title, message):
310
  unsafe_allow_html=True
311
  )
312
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
313
  # =========================
314
  # INTRO
315
  # =========================
316
  if st.session_state.app_step == "intro":
317
- st.header("Model ready ✓")
 
 
318
  st.markdown(
319
- f"This software estimates **Minimum Horizontal Stress** ({UNITS}). "
320
- "Now build a case, validate, or predict."
 
321
  )
322
  if st.button("Start Showcase", type="primary"):
323
  st.session_state.app_step = "dev"; st.rerun()
324
 
325
  # =========================
326
- # CASE BUILDING (Train/Test)
327
  # =========================
328
  def _find_sheet(book, names):
329
  low2orig = {k.lower(): k for k in book.keys()}
@@ -332,8 +671,8 @@ def _find_sheet(book, names):
332
  return None
333
 
334
  if st.session_state.app_step == "dev":
335
- st.sidebar.header("Case Building")
336
- up = st.sidebar.file_uploader("Upload Train/Test Excel", type=["xlsx","xls"])
337
  if up is not None:
338
  st.session_state.dev_file_bytes = up.getvalue()
339
  st.session_state.dev_file_name = up.name
@@ -368,8 +707,8 @@ if st.session_state.app_step == "dev":
368
  st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training and Test/Testing sheets.</div>', unsafe_allow_html=True)
369
  st.stop()
370
 
371
- tr0 = _normalize_columns(book[sh_train].copy(), FEATURES, TARGET, ALIASES)
372
- te0 = _normalize_columns(book[sh_test].copy(), FEATURES, TARGET, ALIASES)
373
 
374
  actual_col = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in tr0.columns) else TARGET
375
  if not (ensure_cols(tr0, FEATURES+[actual_col]) and ensure_cols(te0, FEATURES+[actual_col])):
@@ -393,8 +732,8 @@ if st.session_state.app_step == "dev":
393
  "MAPE%": mape(te[actual_col], te[PRED_COL]),
394
  }
395
 
396
- tr_min = tr[FEATURES].min().to_dict(); tr_max = tr[FEATURES].max().to_dict()
397
- st.session_state.train_ranges = {f:(float(tr_min[f]), float(tr_max[f])) for f in FEATURES}
398
  st.markdown('<div class="st-message-box st-success">Case has been built and results are displayed below.</div>', unsafe_allow_html=True)
399
 
400
  def _dev_block(df, m):
@@ -414,8 +753,8 @@ if st.session_state.app_step == "dev":
414
  st.plotly_chart(track_plot(df, include_actual=True),
415
  use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
416
  with col_cross:
417
- act_col = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in df.columns) else TARGET
418
- st.pyplot(cross_plot_static(df[act_col], df[PRED_COL]), use_container_width=False)
419
 
420
  if "Train" in st.session_state.results or "Test" in st.session_state.results:
421
  tab1, tab2 = st.tabs(["Training", "Testing"])
@@ -423,61 +762,13 @@ if st.session_state.app_step == "dev":
423
  with tab1: _dev_block(st.session_state.results["Train"], st.session_state.results["m_train"])
424
  if "Test" in st.session_state.results:
425
  with tab2: _dev_block(st.session_state.results["Test"], st.session_state.results["m_test"])
426
- # Export
427
- st.divider()
428
- st.markdown("### Export to Excel")
429
- options = ["Training","Training_Metrics","Training_Summary","Testing","Testing_Metrics","Testing_Summary","Info"]
430
- selected = st.multiselect("Sheets to include", options=options, default=[])
431
- def _summary_table(df: pd.DataFrame, cols: list[str]) -> pd.DataFrame:
432
- cols = [c for c in cols if c in df.columns]
433
- if not cols: return pd.DataFrame()
434
- tbl = (df[cols].agg(['min','max','mean','std'])
435
- .T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"})
436
- .reset_index(names="Field"))
437
- return _round_numeric(tbl, 3)
438
- def build_export(selected: list[str]) -> tuple[bytes|None, str|None]:
439
- res = st.session_state.get("results", {})
440
- if not res: return None, None
441
- sheets, order = {}, []
442
- def _add(n, d):
443
- if isinstance(d, pd.DataFrame) and not d.empty: sheets[n]=_round_numeric(d,3); order.append(n)
444
- if "Training" in selected and "Train" in res: _add("Training", res["Train"])
445
- if "Training_Metrics" in selected and res.get("m_train"): _add("Training_Metrics", pd.DataFrame([res["m_train"]]))
446
- if "Training_Summary" in selected and "Train" in res:
447
- tr_cols = FEATURES + [c for c in [TARGET, PRED_COL] if c in res["Train"].columns]
448
- _add("Training_Summary", _summary_table(res["Train"], tr_cols))
449
- if "Testing" in selected and "Test" in res: _add("Testing", res["Test"])
450
- if "Testing_Metrics" in selected and res.get("m_test"): _add("Testing_Metrics", pd.DataFrame([res["m_test"]]))
451
- if "Testing_Summary" in selected and "Test" in res:
452
- te_cols = FEATURES + [c for c in [TARGET, PRED_COL] if c in res["Test"].columns]
453
- _add("Testing_Summary", _summary_table(res["Test"], te_cols))
454
- if "Info" in selected:
455
- info = pd.DataFrame([
456
- {"Key":"AppName","Value":APP_NAME},
457
- {"Key":"Tagline","Value":TAGLINE},
458
- {"Key":"Target","Value":TARGET},
459
- {"Key":"PredColumn","Value":PRED_COL},
460
- {"Key":"Features","Value":", ".join(FEATURES)},
461
- {"Key":"ExportedAt","Value":datetime.now().strftime("%Y-%m-%d %H:%M:%S")},
462
- ])
463
- _add("Info", info)
464
- if not order: return None, None
465
- bio = io.BytesIO()
466
- with pd.ExcelWriter(bio, engine=_excel_engine()) as w:
467
- for name in order:
468
- df = sheets[name]; df.to_excel(w, sheet_name=_excel_safe_name(name), index=False)
469
- bio.seek(0)
470
- return bio.getvalue(), f"MinStress_Export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
471
- data, fname = build_export(selected)
472
- st.download_button("⬇️ Export Excel", data=(data or b""), file_name=(fname or "MinStress_Export.xlsx"),
473
- mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
474
- disabled=(data is None))
475
 
476
  # =========================
477
  # VALIDATION (with actual)
478
  # =========================
479
  if st.session_state.app_step == "validate":
480
- st.sidebar.header("Validate the Model")
481
  up = st.sidebar.file_uploader("Upload Validation Excel", type=["xlsx","xls"])
482
  if up is not None:
483
  book = read_book_bytes(up.getvalue())
@@ -496,10 +787,12 @@ if st.session_state.app_step == "validate":
496
  book = read_book_bytes(up.getvalue())
497
  names = list(book.keys())
498
  name = next((s for s in names if s.lower() in ("validation","validate","validation2","val","val2")), names[0])
499
- df0 = _normalize_columns(book[name].copy(), FEATURES, TARGET, ALIASES)
500
- act_col = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in df0.columns) else TARGET
501
- if not ensure_cols(df0, FEATURES+[act_col]):
 
502
  st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
 
503
  df = df0.copy()
504
  df[PRED_COL] = _inv_transform(model.predict(_make_X(df0, FEATURES)), TRANSFORM)
505
  st.session_state.results["Validate"] = df
@@ -517,9 +810,9 @@ if st.session_state.app_step == "validate":
517
  )
518
 
519
  st.session_state.results["m_val"] = {
520
- "R": pearson_r(df[act_col], df[PRED_COL]),
521
- "RMSE": rmse(df[act_col], df[PRED_COL]),
522
- "MAPE%": mape(df[act_col], df[PRED_COL]),
523
  }
524
  st.session_state.results["sv_val"] = {"n":len(df), "pred_min":float(df[PRED_COL].min()), "pred_max":float(df[PRED_COL].max()), "oor":oor_pct}
525
  st.session_state.results["oor_tbl"] = tbl
@@ -541,28 +834,24 @@ if st.session_state.app_step == "validate":
541
  st.plotly_chart(track_plot(st.session_state.results["Validate"], include_actual=True),
542
  use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
543
  with col_cross:
544
- act_col = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in st.session_state.results["Validate"].columns) else TARGET
545
- st.pyplot(cross_plot_static(st.session_state.results["Validate"][act_col],
546
  st.session_state.results["Validate"][PRED_COL]),
547
  use_container_width=False)
548
 
549
- # Export button
550
- st.divider()
551
- val_tbl = st.session_state.results["Validate"]
552
- bio = io.BytesIO()
553
- with pd.ExcelWriter(bio, engine=_excel_engine()) as w:
554
- val_tbl.to_excel(w, sheet_name="Validation", index=False)
555
- pd.DataFrame([m]).to_excel(w, sheet_name="Validation_Metrics", index=False)
556
- bio.seek(0)
557
- st.download_button("⬇️ Export Excel", data=bio.getvalue(),
558
- file_name=f"Validation_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx",
559
- mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
560
 
561
  # =========================
562
  # PREDICTION (no actual)
563
  # =========================
564
  if st.session_state.app_step == "predict":
565
- st.sidebar.header("Prediction (No Actual)")
566
  up = st.sidebar.file_uploader("Upload Prediction Excel", type=["xlsx","xls"])
567
  if up is not None:
568
  book = read_book_bytes(up.getvalue())
@@ -578,7 +867,7 @@ if st.session_state.app_step == "predict":
578
 
579
  if go_btn and up is not None:
580
  book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
581
- df0 = _normalize_columns(book[name].copy(), FEATURES, TARGET, ALIASES)
582
  if not ensure_cols(df0, FEATURES):
583
  st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
584
  df = df0.copy()
@@ -613,6 +902,7 @@ if st.session_state.app_step == "predict":
613
  with col_right:
614
  st.plotly_chart(track_plot(df, include_actual=False),
615
  use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
 
616
 
617
  # =========================
618
  # Preview modal
@@ -632,36 +922,23 @@ if st.session_state.show_preview_modal:
632
  tabs = st.tabs(names)
633
  for t, name in zip(tabs, names):
634
  with t:
635
- df = _normalize_columns(book_to_preview[name], FEATURES, TARGET, ALIASES)
636
  t1, t2 = st.tabs(["Tracks", "Summary"])
637
  with t1:
638
- # small quick-look plot of the features
639
- cols = [c for c in FEATURES if c in df.columns]
640
- if not cols:
641
- st.info("No feature columns to preview.")
642
- else:
643
- idx = np.arange(1, len(df)+1)
644
- fig, axes = plt.subplots(1, len(cols), figsize=(2.4*len(cols), 7.0), sharey=True, dpi=100)
645
- if len(cols)==1: axes=[axes]
646
- for ax, col in zip(axes, cols):
647
- x = pd.to_numeric(df[col], errors="coerce")
648
- ax.plot(x, idx, '-', lw=1.6)
649
- ax.set_xlabel(col); ax.xaxis.set_label_position('top'); ax.xaxis.tick_top()
650
- ax.set_ylim(idx.max(), idx.min()); ax.grid(True, linestyle=":", alpha=0.3)
651
- fig.tight_layout()
652
- st.pyplot(fig, use_container_width=True)
653
  with t2:
654
- cols = [c for c in FEATURES if c in df.columns]
655
- if not cols:
656
  st.info("No feature columns found to summarize.")
657
  else:
658
  tbl = (
659
- df[cols]
660
  .agg(['min','max','mean','std'])
661
  .T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"})
662
  .reset_index(names="Feature")
663
  )
664
  df_centered_rounded(tbl)
 
665
  st.session_state.show_preview_modal = False
666
 
667
  # =========================
 
1
  # app.py — ST_Min_Horizontal_Stress (σhmin)
2
+ # Full Streamlit app trains the model inside the app (fixed best params or optional GridSearch).
3
+ # No external model file is required. Users can still download the trained .joblib + meta.json.
4
 
5
  import io, json, os, base64, math
6
  from pathlib import Path
 
29
  FEATURES = ["Q (gpm)", "SPP (psi)", "ST (kft.lbf)", "WOB (klbf)", "ROP (ft/h)"]
30
  TARGET = "σhmin (MPa)"
31
  PRED_COL = "σhmin_Pred"
32
+ ACTUAL_COL = None # If your workbook has a separate actual column, set via meta.json (actual_col)
33
  TRANSFORM = "none" # "none" | "log10" | "ln"
34
  UNITS = "MPa"
35
 
 
42
 
43
  STRICT_VERSION_CHECK = True
44
 
45
+ # Local (optional) — only used for Excel export helper sizing
46
+ MODELS_DIR = Path("models")
47
+
48
  # =========================
49
  # Page / CSS
50
  # =========================
 
74
  ]
75
 
76
  # =========================
77
+ # Password gate
78
  # =========================
79
  def inline_logo(path="logo.png") -> str:
80
  try:
 
91
  required = os.environ.get("APP_PASSWORD", "")
92
 
93
  if not required:
94
+ st.warning("Set APP_PASSWORD in Secrets (or environment) and restart.")
95
+ st.stop()
96
  if st.session_state.get("auth_ok", False):
97
  return
98
 
 
208
  X[c] = pd.to_numeric(X[c], errors="coerce")
209
  return X
210
 
211
+ # =========================
212
+ # Export helpers
213
+ # =========================
214
+ def _summary_table(df: pd.DataFrame, cols: list[str]) -> pd.DataFrame:
215
+ cols = [c for c in cols if c in df.columns]
216
+ if not cols: return pd.DataFrame()
217
+ tbl = (df[cols]
218
+ .agg(['min','max','mean','std'])
219
+ .T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"})
220
+ .reset_index(names="Field"))
221
+ return _round_numeric(tbl, 3)
222
+
223
+ def _train_ranges_df(ranges: dict[str, tuple[float, float]]) -> pd.DataFrame:
224
+ if not ranges: return pd.DataFrame()
225
+ df = pd.DataFrame(ranges).T.reset_index()
226
+ df.columns = ["Feature", "Min", "Max"]
227
+ return _round_numeric(df, 3)
228
+
229
+ def _excel_autofit(writer, sheet_name: str, df: pd.DataFrame, min_w: int = 8, max_w: int = 40):
230
+ try:
231
+ import xlsxwriter # noqa: F401
232
+ except Exception:
233
+ return
234
+ ws = writer.sheets[sheet_name]
235
+ for i, col in enumerate(df.columns):
236
+ series = df[col].astype(str)
237
+ max_len = max([len(str(col))] + series.map(len).tolist())
238
+ ws.set_column(i, i, max(min_w, min(max_len + 2, max_w)))
239
+ ws.freeze_panes(1, 0)
240
+
241
+ def _available_sections() -> list[str]:
242
+ res = st.session_state.get("results", {})
243
+ sections = []
244
+ if "Train" in res: sections += ["Training","Training_Metrics","Training_Summary"]
245
+ if "Test" in res: sections += ["Testing","Testing_Metrics","Testing_Summary"]
246
+ if "Validate" in res: sections += ["Validation","Validation_Metrics","Validation_Summary","Validation_OOR"]
247
+ if "PredictOnly" in res: sections += ["Prediction","Prediction_Summary"]
248
+ if st.session_state.get("train_ranges"): sections += ["Training_Ranges"]
249
+ sections += ["Info"]
250
+ return sections
251
+
252
+ def build_export_workbook(selected: list[str], ndigits: int = 3, do_autofit: bool = True) -> tuple[bytes|None, str|None, list[str]]:
253
+ res = st.session_state.get("results", {})
254
+ if not res: return None, None, []
255
+ sheets: dict[str, pd.DataFrame] = {}
256
+ order: list[str] = []
257
+
258
+ def _add(name: str, df: pd.DataFrame):
259
+ if df is None or (isinstance(df, pd.DataFrame) and df.empty): return
260
+ sheets[name] = _round_numeric(df, ndigits); order.append(name)
261
+
262
+ if "Training" in selected and "Train" in res: _add("Training", res["Train"])
263
+ if "Training_Metrics" in selected and res.get("m_train"): _add("Training_Metrics", pd.DataFrame([res["m_train"]]))
264
+ if "Training_Summary" in selected and "Train" in res:
265
+ tr_cols = FEATURES + [c for c in [TARGET, PRED_COL] if c in res["Train"].columns]
266
+ _add("Training_Summary", _summary_table(res["Train"], tr_cols))
267
+
268
+ if "Testing" in selected and "Test" in res: _add("Testing", res["Test"])
269
+ if "Testing_Metrics" in selected and res.get("m_test"): _add("Testing_Metrics", pd.DataFrame([res["m_test"]]))
270
+ if "Testing_Summary" in selected and "Test" in res:
271
+ te_cols = FEATURES + [c for c in [TARGET, PRED_COL] if c in res["Test"].columns]
272
+ _add("Testing_Summary", _summary_table(res["Test"], te_cols))
273
+
274
+ if "Validation" in selected and "Validate" in res: _add("Validation", res["Validate"])
275
+ if "Validation_Metrics" in selected and res.get("m_val"): _add("Validation_Metrics", pd.DataFrame([res["m_val"]]))
276
+ if "Validation_Summary" in selected and res.get("sv_val"): _add("Validation_Summary", pd.DataFrame([res["sv_val"]]))
277
+ if "Validation_OOR" in selected and isinstance(res.get("oor_tbl"), pd.DataFrame) and not res["oor_tbl"].empty:
278
+ _add("Validation_OOR", res["oor_tbl"].reset_index(drop=True))
279
+
280
+ if "Prediction" in selected and "PredictOnly" in res: _add("Prediction", res["PredictOnly"])
281
+ if "Prediction_Summary" in selected and res.get("sv_pred"): _add("Prediction_Summary", pd.DataFrame([res["sv_pred"]]))
282
+
283
+ if "Training_Ranges" in selected and st.session_state.get("train_ranges"):
284
+ _add("Training_Ranges", _train_ranges_df(st.session_state["train_ranges"]))
285
+
286
+ if "Info" in selected:
287
+ info = pd.DataFrame([
288
+ {"Key": "AppName", "Value": APP_NAME},
289
+ {"Key": "Tagline", "Value": TAGLINE},
290
+ {"Key": "Target", "Value": TARGET},
291
+ {"Key": "PredColumn", "Value": PRED_COL},
292
+ {"Key": "Features", "Value": ", ".join(FEATURES)},
293
+ {"Key": "ExportedAt", "Value": datetime.now().strftime("%Y-%m-%d %H:%M:%S")},
294
+ ])
295
+ _add("Info", info)
296
+
297
+ if not order: return None, None, []
298
+
299
+ bio = io.BytesIO()
300
+ engine = _excel_engine()
301
+ with pd.ExcelWriter(bio, engine=engine) as writer:
302
+ for name in order:
303
+ df = sheets[name]; sheet = _excel_safe_name(name)
304
+ df.to_excel(writer, sheet_name=sheet, index=False)
305
+ if do_autofit: _excel_autofit(writer, sheet, df)
306
+ bio.seek(0)
307
+ fname = f"MinStress_Export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
308
+ return bio.getvalue(), fname, order
309
+
310
+ def render_export_button(phase_key: str) -> None:
311
+ res = st.session_state.get("results", {})
312
+ if not res: return
313
+ st.divider()
314
+ st.markdown("### Export to Excel")
315
+ options = _available_sections()
316
+ selected_sheets = st.multiselect(
317
+ "Sheets to include",
318
+ options=options,
319
+ default=[],
320
+ placeholder="Choose option(s)",
321
+ help="Pick the sheets you want in the Excel export.",
322
+ key=f"sheets_{phase_key}",
323
+ )
324
+ if not selected_sheets:
325
+ st.caption("Select one or more sheets above to enable export.")
326
+ st.download_button("⬇️ Export Excel", data=b"", file_name="MinStress_Export.xlsx",
327
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
328
+ disabled=True, key=f"download_{phase_key}")
329
+ return
330
+ data, fname, names = build_export_workbook(selected=selected_sheets, ndigits=3, do_autofit=True)
331
+ if names: st.caption("Will include: " + ", ".join(names))
332
+ st.download_button("⬇️ Export Excel", data=(data or b""), file_name=(fname or "MinStress_Export.xlsx"),
333
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
334
+ disabled=(data is None), key=f"download_{phase_key}")
335
+
336
+ # =========================
337
+ # Plots
338
+ # =========================
339
+ def cross_plot_static(actual, pred):
340
+ a = pd.Series(actual, dtype=float)
341
+ p = pd.Series(pred, dtype=float)
342
+ lo = float(min(a.min(), p.min())); hi = float(max(a.max(), p.max()))
343
+ pad = 0.03 * (hi - lo if hi > lo else 1.0)
344
+ lo2, hi2 = lo - pad, hi + pad
345
+ ticks = np.linspace(lo2, hi2, 5)
346
+
347
+ dpi = 110
348
+ fig, ax = plt.subplots(figsize=(CROSS_W / dpi, CROSS_H / dpi), dpi=dpi, constrained_layout=False)
349
+ ax.scatter(a, p, s=14, c=COLORS["pred"], alpha=0.9, linewidths=0)
350
+ ax.plot([lo2, hi2], [lo2, hi2], linestyle="--", linewidth=1.2, color=COLORS["ref"])
351
+
352
+ ax.set_xlim(lo2, hi2); ax.set_ylim(lo2, hi2)
353
+ ax.set_xticks(ticks); ax.set_yticks(ticks)
354
+ ax.set_aspect("equal", adjustable="box")
355
+
356
+ fmt = FuncFormatter(lambda x, _: f"{x:.2f}")
357
+ ax.xaxis.set_major_formatter(fmt); ax.yaxis.set_major_formatter(fmt)
358
+
359
+ ax.set_xlabel(f"Actual Min Stress ({UNITS})", fontweight="bold", fontsize=10, color="black")
360
+ ax.set_ylabel(f"Predicted Min Stress ({UNITS})", fontweight="bold", fontsize=10, color="black")
361
+ ax.tick_params(labelsize=6, colors="black")
362
+ ax.grid(True, linestyle=":", alpha=0.3)
363
+ for spine in ax.spines.values():
364
+ spine.set_linewidth(1.1); spine.set_color("#444")
365
+
366
+ fig.subplots_adjust(left=0.16, bottom=0.16, right=0.98, top=0.98)
367
+ return fig
368
+
369
+ def track_plot(df, include_actual=True):
370
+ depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
371
+ if depth_col is not None:
372
+ y = pd.to_numeric(df[depth_col], errors="coerce"); ylab = depth_col
373
+ y_range = [float(np.nanmax(y)), float(np.nanmin(y))] # reversed
374
+ else:
375
+ y = pd.Series(np.arange(1, len(df) + 1)); ylab = "Point Index"
376
+ y_range = [float(y.max()), float(y.min())]
377
+
378
+ x_series = pd.Series(df.get(PRED_COL, pd.Series(dtype=float))).astype(float)
379
+ act_col = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in df.columns) else TARGET
380
+ if include_actual and act_col in df.columns:
381
+ x_series = pd.concat([x_series, pd.Series(df[act_col]).astype(float)], ignore_index=True)
382
+ x_lo, x_hi = float(x_series.min()), float(x_series.max())
383
+ x_pad = 0.03 * (x_hi - x_lo if x_hi > x_lo else 1.0)
384
+ xmin, xmax = x_lo - x_pad, x_hi + x_pad
385
+ tick0 = _nice_tick0(xmin, step=max((xmax - xmin) / 10.0, 0.1))
386
+
387
+ fig = go.Figure()
388
+ if PRED_COL in df.columns:
389
+ fig.add_trace(go.Scatter(
390
+ x=df[PRED_COL], y=y, mode="lines",
391
+ line=dict(color=COLORS["pred"], width=1.8),
392
+ name=PRED_COL,
393
+ hovertemplate=f"{PRED_COL}: "+"%{x:.2f}<br>"+ylab+": %{y}<extra></extra>"
394
+ ))
395
+ if include_actual and act_col in df.columns:
396
+ fig.add_trace(go.Scatter(
397
+ x=df[act_col], y=y, mode="lines",
398
+ line=dict(color=COLORS["actual"], width=2.0, dash="dot"),
399
+ name=f"{act_col} (actual)",
400
+ hovertemplate=f"{act_col}: "+"%{x:.2f}<br>"+ylab+": %{y}<extra></extra>"
401
+ ))
402
+
403
+ fig.update_layout(
404
+ height=TRACK_H, width=TRACK_W, autosize=False,
405
+ paper_bgcolor="#fff", plot_bgcolor="#fff",
406
+ margin=dict(l=64, r=16, t=36, b=48), hovermode="closest",
407
+ font=dict(size=FONT_SZ, color="#000"),
408
+ legend=dict(x=0.98, y=0.05, xanchor="right", yanchor="bottom",
409
+ bgcolor="rgba(255,255,255,0.75)", bordercolor="#ccc", borderwidth=1),
410
+ legend_title_text=""
411
+ )
412
+ fig.update_xaxes(
413
+ title_text=f"Min Stress ({UNITS})",
414
+ title_font=dict(size=20, family=BOLD_FONT, color="#000"),
415
+ tickfont=dict(size=15, family=BOLD_FONT, color="#000"),
416
+ side="top", range=[xmin, xmax],
417
+ ticks="outside", tickformat=",.2f", tickmode="auto", tick0=tick0,
418
+ showline=True, linewidth=1.2, linecolor="#444", mirror=True,
419
+ showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True
420
+ )
421
+ fig.update_yaxes(
422
+ title_text=ylab,
423
+ title_font=dict(size=20, family=BOLD_FONT, color="#000"),
424
+ tickfont=dict(size=15, family=BOLD_FONT, color="#000"),
425
+ range=y_range, ticks="outside",
426
+ showline=True, linewidth=1.2, linecolor="#444", mirror=True,
427
+ showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True
428
+ )
429
+ return fig
430
+
431
+ def preview_tracks(df: pd.DataFrame, cols: list[str]):
432
+ cols = [c for c in cols if c in df.columns]
433
+ n = len(cols)
434
+ if n == 0:
435
+ fig, ax = plt.subplots(figsize=(4, 2))
436
+ ax.text(0.5, 0.5, "No selected columns", ha="center", va="center")
437
+ ax.axis("off")
438
+ return fig
439
+
440
+ depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
441
+ if depth_col is not None:
442
+ idx = pd.to_numeric(df[depth_col], errors="coerce")
443
+ y_label = depth_col
444
+ y_min, y_max = float(np.nanmin(idx)), float(np.nanmax(idx))
445
+ else:
446
+ idx = pd.Series(np.arange(1, len(df) + 1))
447
+ y_label = "Point Index"
448
+ y_min, y_max = float(idx.min()), float(idx.max())
449
+
450
+ cmap = plt.get_cmap("tab20")
451
+ col_colors = {col: cmap(i % cmap.N) for i, col in enumerate(cols)}
452
+
453
+ fig, axes = plt.subplots(1, n, figsize=(2.4 * n, 7.0), sharey=True, dpi=100)
454
+ if n == 1:
455
+ axes = [axes]
456
+
457
+ for i, (ax, col) in enumerate(zip(axes, cols)):
458
+ x = pd.to_numeric(df[col], errors="coerce")
459
+ ax.plot(x, idx, '-', lw=1.6, color=col_colors[col])
460
+ ax.set_xlabel(col); ax.xaxis.set_label_position('top'); ax.xaxis.tick_top()
461
+ ax.set_ylim(y_max, y_min) # reversed depth down
462
+ ax.grid(True, linestyle=":", alpha=0.3)
463
+ if i == 0:
464
+ ax.set_ylabel(y_label)
465
+ else:
466
+ ax.tick_params(labelleft=False); ax.set_ylabel("")
467
+
468
+ fig.tight_layout()
469
+ return fig
470
+
471
  # =========================
472
  # Session state
473
  # =========================
 
479
  st.session_state.setdefault("dev_file_loaded",False)
480
  st.session_state.setdefault("dev_preview",False)
481
  st.session_state.setdefault("show_preview_modal", False)
 
 
 
482
 
483
  # =========================
484
+ # Sidebar branding
485
  # =========================
486
  st.sidebar.markdown(f"""
487
  <div class="centered-container">
 
491
  </div>
492
  """, unsafe_allow_html=True)
493
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
494
  def sticky_header(title, message):
495
  st.markdown(
496
  f"""
 
508
  unsafe_allow_html=True
509
  )
510
 
511
+ # ===============================================================
512
+ # TRAIN THE MODEL IN-APP (no external pickle needed)
513
+ # ===============================================================
514
+ from sklearn.ensemble import RandomForestRegressor
515
+ from sklearn.model_selection import train_test_split
516
+
517
+ # ❶ Set YOUR optimized hyperparameters here
518
+ BEST_PARAMS = {
519
+ "n_estimators": 300,
520
+ "max_depth": 22,
521
+ "max_features": "sqrt", # or "log2" / float in (0,1]
522
+ "min_samples_split": 2,
523
+ "min_samples_leaf": 1,
524
+ "bootstrap": True,
525
+ "random_state": 42,
526
+ "n_jobs": -1
527
+ }
528
+
529
+ st.sidebar.markdown("### Model source")
530
+ source = st.sidebar.radio(
531
+ "Choose how to get the model",
532
+ ["Train now (fixed best params)", "Train with Grid Search (optional)"],
533
+ help="Avoids uploading big pickles. Deterministic best-params training is recommended."
534
+ )
535
+
536
+ st.sidebar.markdown("### Training data")
537
+ file_train = st.sidebar.file_uploader("Upload Excel for training (has Train sheet or any sheet)", type=["xlsx","xls"])
538
+
539
+ def _train_model_fixed(X: pd.DataFrame, y: pd.Series, params: dict) -> RandomForestRegressor:
540
+ rf = RandomForestRegressor(**params)
541
+ rf.fit(X, y)
542
+ return rf
543
+
544
+ def _download_buttons(model_obj, meta_dict):
545
+ # model
546
+ buf_model = io.BytesIO()
547
+ joblib.dump(model_obj, buf_model)
548
+ buf_model.seek(0)
549
+ st.download_button("⬇️ Download trained model (.joblib)", buf_model.getvalue(), "minstress_model.joblib")
550
+
551
+ # meta
552
+ meta_bytes = json.dumps(meta_dict, indent=2).encode("utf-8")
553
+ st.download_button("⬇️ Download meta (.json)", meta_bytes, "minstress_meta.json")
554
+
555
+ if not file_train:
556
+ st.info("Upload a training Excel file in the sidebar to build the model.")
557
+ st.stop()
558
+
559
+ # Load train data
560
+ book_train = read_book_bytes(file_train.getvalue())
561
+ sheet_train = next((s for s in book_train if s.lower() in ("train", "training")), list(book_train)[0])
562
+ df_tr0 = _normalize_columns(book_train[sheet_train].copy(), FEATURES, TARGET, None)
563
+
564
+ # Build X/y
565
+ act_col = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in df_tr0.columns) else TARGET
566
+ if not ensure_cols(df_tr0, FEATURES + [act_col]):
567
+ st.stop()
568
+
569
+ X_all = _make_X(df_tr0, FEATURES).copy()
570
+ y_all = pd.to_numeric(df_tr0[act_col], errors="coerce")
571
+
572
+ # Split for reporting
573
+ tsz = st.sidebar.slider("Validation split for reporting", 0.10, 0.40, 0.20, 0.05)
574
+ seed = st.sidebar.number_input("Random seed", 0, 1_000_000, BEST_PARAMS.get("random_state", 42), step=1)
575
+ Xtr, Xva, ytr, yva = train_test_split(X_all, y_all, test_size=tsz, random_state=seed)
576
+
577
+ if source == "Train with Grid Search (optional)":
578
+ from sklearn.model_selection import GridSearchCV
579
+ st.sidebar.markdown("### Grid Search")
580
+ n_list = st.sidebar.multiselect("n_estimators", [100, 200, 300, 400], default=[BEST_PARAMS["n_estimators"]])
581
+ depth_list= st.sidebar.multiselect("max_depth", [12, 16, 20, 22, 26], default=[BEST_PARAMS["max_depth"]])
582
+ maxf_list = st.sidebar.multiselect("max_features", ["sqrt", "log2"], default=[BEST_PARAMS["max_features"]])
583
+
584
+ param_grid = {
585
+ "n_estimators": n_list or [BEST_PARAMS["n_estimators"]],
586
+ "max_depth": depth_list or [BEST_PARAMS["max_depth"]],
587
+ "max_features": maxf_list or [BEST_PARAMS["max_features"]],
588
+ "min_samples_split": [BEST_PARAMS["min_samples_split"]],
589
+ "min_samples_leaf": [BEST_PARAMS["min_samples_leaf"]],
590
+ "bootstrap": [BEST_PARAMS["bootstrap"]],
591
+ "random_state": [seed]
592
+ }
593
+ base = RandomForestRegressor(n_jobs=-1)
594
+ with st.spinner("Running GridSearchCV..."):
595
+ gs = GridSearchCV(base, param_grid=param_grid, cv=3, n_jobs=-1, refit=True)
596
+ gs.fit(Xtr, ytr)
597
+ best = gs.best_estimator_
598
+ st.success(f"GridSearch done. Best params: {gs.best_params_}")
599
+
600
+ # Validation report
601
+ pred_tr = best.predict(Xtr); pred_va = best.predict(Xva)
602
+ m_train = {"R": pearson_r(ytr, pred_tr), "RMSE": rmse(ytr, pred_tr), "MAPE%": mape(ytr, pred_tr)}
603
+ m_valid = {"R": pearson_r(yva, pred_va), "RMSE": rmse(yva, pred_va), "MAPE%": mape(yva, pred_va)}
604
+ st.write("**Training split metrics**:", m_train)
605
+ st.write("**Validation split metrics**:", m_valid)
606
+
607
+ # Final fit on all data with best params
608
+ model = RandomForestRegressor(**{**gs.best_params_, "n_jobs": -1, "random_state": seed})
609
+ model.fit(X_all, y_all)
610
+ else:
611
+ # Deterministic fixed-params training (recommended)
612
+ params = {**BEST_PARAMS, "random_state": seed}
613
+ with st.spinner("Training fixed-params model..."):
614
+ tmp_model = _train_model_fixed(Xtr, ytr, params)
615
+ pred_tr = tmp_model.predict(Xtr); pred_va = tmp_model.predict(Xva)
616
+ m_train = {"R": pearson_r(ytr, pred_tr), "RMSE": rmse(ytr, pred_tr), "MAPE%": mape(ytr, pred_tr)}
617
+ m_valid = {"R": pearson_r(yva, pred_va), "RMSE": rmse(yva, pred_va), "MAPE%": mape(yva, pred_va)}
618
+ st.write("**Training split metrics**:", m_train)
619
+ st.write("**Validation split metrics**:", m_valid)
620
+
621
+ model = _train_model_fixed(X_all, y_all, params)
622
+
623
+ # Create meta + training ranges for OOR checks later
624
+ meta = {
625
+ "features": FEATURES,
626
+ "target": TARGET,
627
+ "pred_col": PRED_COL,
628
+ "actual_col": ACTUAL_COL,
629
+ "transform": TRANSFORM,
630
+ "units": UNITS,
631
+ "versions": {
632
+ "numpy": np.__version__,
633
+ "scikit_learn": __import__("sklearn").__version__
634
+ },
635
+ "training": {
636
+ "n_rows": int(len(X_all)),
637
+ "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
638
+ "used_grid_search": (source == "Train with Grid Search (optional)")
639
+ }
640
+ }
641
+
642
+ tr_min = X_all.min().to_dict()
643
+ tr_max = X_all.max().to_dict()
644
+ st.session_state.train_ranges = {f: (float(tr_min[f]), float(tr_max[f])) for f in FEATURES}
645
+
646
+ st.success("Model ready ✓ — proceed to **Case Building**, **Validation**, or **Prediction**.")
647
+ _download_buttons(model, meta)
648
+
649
  # =========================
650
  # INTRO
651
  # =========================
652
  if st.session_state.app_step == "intro":
653
+ st.header("Welcome!")
654
+ st.markdown(f"This software is developed by *Smart Thinking AI-Solutions Team* to estimate **Minimum Horizontal Stress** ({UNITS}) from drilling/offset data.")
655
+ st.subheader("How It Works")
656
  st.markdown(
657
+ "1) **Upload your data to build the case and preview the model performance.** \n"
658
+ "2) Click **Run Model** to compute metrics and plots. \n"
659
+ "3) **Proceed to Validation** (with actual) or **Proceed to Prediction** (no actual)."
660
  )
661
  if st.button("Start Showcase", type="primary"):
662
  st.session_state.app_step = "dev"; st.rerun()
663
 
664
  # =========================
665
+ # CASE BUILDING (Train/Test) — optional evaluation stage
666
  # =========================
667
  def _find_sheet(book, names):
668
  low2orig = {k.lower(): k for k in book.keys()}
 
671
  return None
672
 
673
  if st.session_state.app_step == "dev":
674
+ st.sidebar.header("Case Building")
675
+ up = st.sidebar.file_uploader("Upload Your Data File (Train/Test sheets)", type=["xlsx","xls"])
676
  if up is not None:
677
  st.session_state.dev_file_bytes = up.getvalue()
678
  st.session_state.dev_file_name = up.name
 
707
  st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training and Test/Testing sheets.</div>', unsafe_allow_html=True)
708
  st.stop()
709
 
710
+ tr0 = _normalize_columns(book[sh_train].copy(), FEATURES, TARGET, None)
711
+ te0 = _normalize_columns(book[sh_test].copy(), FEATURES, TARGET, None)
712
 
713
  actual_col = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in tr0.columns) else TARGET
714
  if not (ensure_cols(tr0, FEATURES+[actual_col]) and ensure_cols(te0, FEATURES+[actual_col])):
 
732
  "MAPE%": mape(te[actual_col], te[PRED_COL]),
733
  }
734
 
735
+ tr_min2 = tr[FEATURES].min().to_dict(); tr_max2 = tr[FEATURES].max().to_dict()
736
+ st.session_state.train_ranges = {f:(float(tr_min2[f]), float(tr_max2[f])) for f in FEATURES}
737
  st.markdown('<div class="st-message-box st-success">Case has been built and results are displayed below.</div>', unsafe_allow_html=True)
738
 
739
  def _dev_block(df, m):
 
753
  st.plotly_chart(track_plot(df, include_actual=True),
754
  use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
755
  with col_cross:
756
+ act_col2 = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in df.columns) else TARGET
757
+ st.pyplot(cross_plot_static(df[act_col2], df[PRED_COL]), use_container_width=False)
758
 
759
  if "Train" in st.session_state.results or "Test" in st.session_state.results:
760
  tab1, tab2 = st.tabs(["Training", "Testing"])
 
762
  with tab1: _dev_block(st.session_state.results["Train"], st.session_state.results["m_train"])
763
  if "Test" in st.session_state.results:
764
  with tab2: _dev_block(st.session_state.results["Test"], st.session_state.results["m_test"])
765
+ render_export_button(phase_key="dev")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
766
 
767
  # =========================
768
  # VALIDATION (with actual)
769
  # =========================
770
  if st.session_state.app_step == "validate":
771
+ st.sidebar.header("Validate the Model")
772
  up = st.sidebar.file_uploader("Upload Validation Excel", type=["xlsx","xls"])
773
  if up is not None:
774
  book = read_book_bytes(up.getvalue())
 
787
  book = read_book_bytes(up.getvalue())
788
  names = list(book.keys())
789
  name = next((s for s in names if s.lower() in ("validation","validate","validation2","val","val2")), names[0])
790
+ df0 = _normalize_columns(book[name].copy(), FEATURES, TARGET, None)
791
+
792
+ act_col2 = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in df0.columns) else TARGET
793
+ if not ensure_cols(df0, FEATURES+[act_col2]):
794
  st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
795
+
796
  df = df0.copy()
797
  df[PRED_COL] = _inv_transform(model.predict(_make_X(df0, FEATURES)), TRANSFORM)
798
  st.session_state.results["Validate"] = df
 
810
  )
811
 
812
  st.session_state.results["m_val"] = {
813
+ "R": pearson_r(df[act_col2], df[PRED_COL]),
814
+ "RMSE": rmse(df[act_col2], df[PRED_COL]),
815
+ "MAPE%": mape(df[act_col2], df[PRED_COL]),
816
  }
817
  st.session_state.results["sv_val"] = {"n":len(df), "pred_min":float(df[PRED_COL].min()), "pred_max":float(df[PRED_COL].max()), "oor":oor_pct}
818
  st.session_state.results["oor_tbl"] = tbl
 
834
  st.plotly_chart(track_plot(st.session_state.results["Validate"], include_actual=True),
835
  use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
836
  with col_cross:
837
+ act_col3 = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in st.session_state.results["Validate"].columns) else TARGET
838
+ st.pyplot(cross_plot_static(st.session_state.results["Validate"][act_col3],
839
  st.session_state.results["Validate"][PRED_COL]),
840
  use_container_width=False)
841
 
842
+ render_export_button(phase_key="validate")
843
+
844
+ sv = st.session_state.results["sv_val"]
845
+ if sv["oor"] > 0: st.markdown('<div class="st-message-box st-warning">Some inputs fall outside **training min–max** ranges.</div>', unsafe_allow_html=True)
846
+ if st.session_state.results["oor_tbl"] is not None:
847
+ st.write("*Out-of-range rows (vs. Training min–max):*")
848
+ df_centered_rounded(st.session_state.results["oor_tbl"])
 
 
 
 
849
 
850
  # =========================
851
  # PREDICTION (no actual)
852
  # =========================
853
  if st.session_state.app_step == "predict":
854
+ st.sidebar.header("Prediction (No Actual)")
855
  up = st.sidebar.file_uploader("Upload Prediction Excel", type=["xlsx","xls"])
856
  if up is not None:
857
  book = read_book_bytes(up.getvalue())
 
867
 
868
  if go_btn and up is not None:
869
  book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
870
+ df0 = _normalize_columns(book[name].copy(), FEATURES, TARGET, None)
871
  if not ensure_cols(df0, FEATURES):
872
  st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
873
  df = df0.copy()
 
902
  with col_right:
903
  st.plotly_chart(track_plot(df, include_actual=False),
904
  use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
905
+ render_export_button(phase_key="predict")
906
 
907
  # =========================
908
  # Preview modal
 
922
  tabs = st.tabs(names)
923
  for t, name in zip(tabs, names):
924
  with t:
925
+ df = _normalize_columns(book_to_preview[name], FEATURES, TARGET, None)
926
  t1, t2 = st.tabs(["Tracks", "Summary"])
927
  with t1:
928
+ st.pyplot(preview_tracks(df, FEATURES), use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
929
  with t2:
930
+ feat_present = [c for c in FEATURES if c in df.columns]
931
+ if not feat_present:
932
  st.info("No feature columns found to summarize.")
933
  else:
934
  tbl = (
935
+ df[feat_present]
936
  .agg(['min','max','mean','std'])
937
  .T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"})
938
  .reset_index(names="Feature")
939
  )
940
  df_centered_rounded(tbl)
941
+
942
  st.session_state.show_preview_modal = False
943
 
944
  # =========================