UCS2014 commited on
Commit
5408123
·
verified ·
1 Parent(s): 7d3aa07

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +312 -288
app.py CHANGED
@@ -1,12 +1,14 @@
1
- # app_ucs.pyST_GeoMech_UCS (Unified workflow like Tc) — MAPE instead of MAE
 
2
 
3
  import io, json, os, base64, math
4
  from pathlib import Path
 
 
5
  import streamlit as st
6
  import pandas as pd
7
  import numpy as np
8
  import joblib
9
- from datetime import datetime
10
 
11
  # Matplotlib (preview + cross-plot)
12
  import matplotlib
@@ -15,7 +17,7 @@ import matplotlib.pyplot as plt
15
  from matplotlib.ticker import FuncFormatter
16
 
17
  import plotly.graph_objects as go
18
- from sklearn.metrics import mean_squared_error # MAE removed
19
 
20
  # =========================
21
  # Constants / Defaults
@@ -23,16 +25,16 @@ from sklearn.metrics import mean_squared_error # MAE removed
23
  APP_NAME = "ST_TOC"
24
  TAGLINE = "Total Organic Carbon Estimation Using AI"
25
 
 
26
  FEATURES = ["AHT90", "DT", "GR", "K", "RHOB", "TNPH", "Th", "Ur"]
27
  TARGET = "TOC"
28
  PRED_COL = "TOC_Pred"
29
 
30
  MODELS_DIR = Path("models")
31
  DEFAULT_MODEL = MODELS_DIR / "toc_rf.joblib"
32
-
33
  MODEL_FALLBACKS = [MODELS_DIR / "model.joblib", MODELS_DIR / "model.pkl"]
34
- COLORS = {"pred": "#1f77b4", "actual": "#f2b702", "ref": "#5a5a5a"}
35
 
 
36
  STRICT_VERSION_CHECK = False # optional env banner
37
 
38
  # ---- Plot sizing ----
@@ -131,10 +133,6 @@ def pearson_r(y_true, y_pred) -> float:
131
  return float(np.corrcoef(a, p)[0, 1])
132
 
133
  def mape(y_true, y_pred, eps: float = 1e-8) -> float:
134
- """
135
- Mean Absolute Percentage Error in PERCENT.
136
- Ignores rows where |y_true| < eps to avoid division blowups.
137
- """
138
  a = np.asarray(y_true, dtype=float)
139
  p = np.asarray(y_pred, dtype=float)
140
  denom = np.where(np.abs(a) < eps, np.nan, np.abs(a))
@@ -155,68 +153,87 @@ def parse_excel(data_bytes: bytes):
155
  def read_book_bytes(b: bytes):
156
  return parse_excel(b) if b else {}
157
 
158
- # ---- Canonical feature aliasing (accept legacy headers) ----
159
- def _build_alias_map(canonical_features: list[str], target_name: str) -> dict:
160
- # Helper: prefer a canonical name from FEATURES if present
161
- def pick(expected, variants):
162
- for v in variants:
163
- if v in expected:
164
- return v
165
- return variants[0] # fall back
166
-
167
- # Canonical abbreviations (what we WANT in the DataFrame)
168
- can_AHT = pick(canonical_features, ["AHT90"])
169
- can_DT = pick(canonical_features, ["DT"])
170
- can_GR = pick(canonical_features, ["GR"])
171
- can_K = pick(canonical_features, ["K"])
172
- can_RHOB = pick(canonical_features, ["RHOB"])
173
- can_TNPH = pick(canonical_features, ["TNPH"])
174
- can_Th = pick(canonical_features, ["Th"])
175
- can_Ur = pick(canonical_features, ["Ur"])
176
-
177
  alias = {
178
- # Abbrev ↔ variants (map everything to the abbrev)
179
- "AHT90": can_AHT, "AHT_90": can_AHT, "AHT-90": can_AHT,
180
- "AHT90 (Average Hydrocarbon Tool 90° Phase)": can_AHT,
181
-
182
- "DT": can_DT, "AC": can_DT, "DT (us/ft)": can_DT,
183
- "DT (Delta-T Sonic Travel Time)": can_DT,
184
-
185
- "GR": can_GR, "Gamma Ray": can_GR, "GR (API)": can_GR,
186
- "GR (Gamma Ray)": can_GR,
187
-
188
- "K": can_K, "Potassium": can_K, "K (%)": can_K,
189
-
190
- "RHOB": can_RHOB, "Bulk Density": can_RHOB, "RHOB (g/cc)": can_RHOB,
191
- "RHOB (Bulk Density)": can_RHOB,
192
-
193
- "TNPH": can_TNPH, "NPHI": can_TNPH, "TNPH (%)": can_TNPH,
194
- "TNPH (Thermal Neutron Porosity)": can_TNPH,
195
-
196
- "Th": can_Th, "Thorium": can_Th, "Th (ppm)": can_Th,
197
-
198
- "Ur": can_Ur, "U": can_Ur, "U (ppm)": can_Ur,
199
-
200
- # Optional depth aliases (if you add later)
201
- "Depth": "Depth", "Depth (ft)": "Depth", "DEPTH": "Depth", "MD (ft)": "Depth",
202
-
203
- # Target aliases → canonical TARGET
204
- "TOC": target_name, "TOC (%)": target_name, "Total Organic Carbon": target_name,
205
  }
206
- return alias
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
 
209
- def _normalize_columns(df: pd.DataFrame, canonical_features: list[str], target_name: str) -> pd.DataFrame:
210
- out = df.copy()
211
- out.columns = [str(c).strip().replace(" ,", ",").replace(", ", ", ").replace(" ", " ") for c in out.columns]
212
- alias = _build_alias_map(canonical_features, target_name)
213
- actual = {k: v for k, v in alias.items() if k in out.columns and k != v}
214
- return out.rename(columns=actual)
 
215
 
216
- def ensure_cols(df: pd.DataFrame, cols: list[str]) -> bool:
217
- miss = [c for c in cols if c not in df.columns]
 
 
 
218
  if miss:
219
- st.error(f"Missing columns: {miss}\nFound: {list(df.columns)}")
220
  return False
221
  return True
222
 
@@ -240,182 +257,6 @@ def df_centered_rounded(df: pd.DataFrame, hide_index=True):
240
  )
241
  st.dataframe(styler, use_container_width=True, hide_index=hide_index)
242
 
243
- # ---------- Build X exactly as trained ----------
244
- def _make_X(df: pd.DataFrame, features: list[str]) -> pd.DataFrame:
245
- X = df.reindex(columns=features, copy=False)
246
- for c in features:
247
- X[c] = pd.to_numeric(X[c], errors="coerce")
248
- return X
249
-
250
- # === Excel export helpers =================================================
251
- def _excel_engine() -> str:
252
- try:
253
- import xlsxwriter # noqa: F401
254
- return "xlsxwriter"
255
- except Exception:
256
- return "openpyxl"
257
-
258
- def _excel_safe_name(name: str) -> str:
259
- bad = '[]:*?/\\'
260
- safe = ''.join('_' if ch in bad else ch for ch in str(name))
261
- return safe[:31]
262
-
263
- def _round_numeric(df: pd.DataFrame, ndigits: int = 2) -> pd.DataFrame:
264
- out = df.copy()
265
- for c in out.columns:
266
- if pd.api.types.is_float_dtype(out[c]) or pd.api.types.is_integer_dtype(out[c]):
267
- out[c] = pd.to_numeric(out[c], errors="coerce").round(ndigits)
268
- return out
269
-
270
- def _summary_table(df: pd.DataFrame, cols: list[str]) -> pd.DataFrame:
271
- cols = [c for c in cols if c in df.columns]
272
- if not cols:
273
- return pd.DataFrame()
274
- tbl = (df[cols]
275
- .agg(['min','max','mean','std'])
276
- .T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"})
277
- .reset_index(names="Field"))
278
- return _round_numeric(tbl)
279
-
280
- def _train_ranges_df(ranges: dict[str, tuple[float, float]]) -> pd.DataFrame:
281
- if not ranges:
282
- return pd.DataFrame()
283
- df = pd.DataFrame(ranges).T.reset_index()
284
- df.columns = ["Feature", "Min", "Max"]
285
- return _round_numeric(df)
286
-
287
- def _available_sections() -> list[str]:
288
- res = st.session_state.get("results", {})
289
- sections = []
290
- if "Train" in res: sections += ["Training","Training_Metrics","Training_Summary"]
291
- if "Test" in res: sections += ["Testing","Testing_Metrics","Testing_Summary"]
292
- if "Validate" in res: sections += ["Validation","Validation_Metrics","Validation_Summary","Validation_OOR"]
293
- if "PredictOnly" in res: sections += ["Prediction","Prediction_Summary"]
294
- if st.session_state.get("train_ranges"): sections += ["Training_Ranges"]
295
- sections += ["Info"]
296
- return sections
297
-
298
- def build_export_workbook(selected: list[str] | None = None) -> tuple[bytes|None, str|None, list[str]]:
299
- res = st.session_state.get("results", {})
300
- if not res: return None, None, []
301
-
302
- sheets: dict[str, pd.DataFrame] = {}
303
- order: list[str] = []
304
-
305
- # Training
306
- if ("Training" in (selected or _available_sections())) and "Train" in res:
307
- tr = _round_numeric(res["Train"])
308
- sheets["Training"] = tr; order.append("Training")
309
- m = res.get("m_train", {})
310
- if m:
311
- sheets["Training_Metrics"] = _round_numeric(pd.DataFrame([m])); order.append("Training_Metrics")
312
- tr_cols = FEATURES + [c for c in [TARGET, PRED_COL] if c in tr.columns]
313
- s = _summary_table(tr, tr_cols)
314
- if not s.empty:
315
- sheets["Training_Summary"] = s; order.append("Training_Summary")
316
-
317
- # Testing
318
- if ("Testing" in (selected or _available_sections())) and "Test" in res:
319
- te = _round_numeric(res["Test"])
320
- sheets["Testing"] = te; order.append("Testing")
321
- m = res.get("m_test", {})
322
- if m:
323
- sheets["Testing_Metrics"] = _round_numeric(pd.DataFrame([m])); order.append("Testing_Metrics")
324
- te_cols = FEATURES + [c for c in [TARGET, PRED_COL] if c in te.columns]
325
- s = _summary_table(te, te_cols)
326
- if not s.empty:
327
- sheets["Testing_Summary"] = s; order.append("Testing_Summary")
328
-
329
- # Validation
330
- if ("Validation" in (selected or _available_sections())) and "Validate" in res:
331
- va = _round_numeric(res["Validate"])
332
- sheets["Validation"] = va; order.append("Validation")
333
- m = res.get("m_val", {})
334
- if m:
335
- sheets["Validation_Metrics"] = _round_numeric(pd.DataFrame([m])); order.append("Validation_Metrics")
336
- sv = res.get("sv_val", {})
337
- if sv:
338
- sheets["Validation_Summary"] = _round_numeric(pd.DataFrame([sv])); order.append("Validation_Summary")
339
- oor_tbl = res.get("oor_tbl")
340
- if oor_tbl is not None and isinstance(oor_tbl, pd.DataFrame) and not oor_tbl.empty:
341
- sheets["Validation_OOR"] = _round_numeric(oor_tbl.reset_index(drop=True)); order.append("Validation_OOR")
342
-
343
- # Prediction
344
- if ("Prediction" in (selected or _available_sections())) and "PredictOnly" in res:
345
- pr = _round_numeric(res["PredictOnly"])
346
- sheets["Prediction"] = pr; order.append("Prediction")
347
- sv = res.get("sv_pred", {})
348
- if sv:
349
- sheets["Prediction_Summary"] = _round_numeric(pd.DataFrame([sv])); order.append("Prediction_Summary")
350
-
351
- # Ranges
352
- tr_ranges = st.session_state.get("train_ranges")
353
- if ("Training_Ranges" in (selected or _available_sections())) and tr_ranges:
354
- rr = _train_ranges_df(tr_ranges)
355
- if not rr.empty:
356
- sheets["Training_Ranges"] = rr; order.append("Training_Ranges")
357
-
358
- # Info
359
- info = pd.DataFrame([
360
- {"Key": "AppName", "Value": APP_NAME},
361
- {"Key": "Tagline", "Value": TAGLINE},
362
- {"Key": "Target", "Value": TARGET},
363
- {"Key": "PredColumn", "Value": PRED_COL},
364
- {"Key": "Features", "Value": ", ".join(FEATURES)},
365
- {"Key": "ExportedAt", "Value": datetime.now().strftime("%Y-%m-%d %H:%M:%S")},
366
- ])
367
- sheets["Info"] = info; order.append("Info")
368
-
369
- bio = io.BytesIO()
370
- with pd.ExcelWriter(bio, engine=_excel_engine()) as writer:
371
- for name in order:
372
- df = sheets[name]
373
- df.to_excel(writer, sheet_name=_excel_safe_name(name), index=False)
374
- bio.seek(0)
375
-
376
- fname = f"TOC_Export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
377
- return bio.getvalue(), fname, order
378
-
379
- def render_export_button(phase_key: str) -> None:
380
- res = st.session_state.get("results", {})
381
- if not res: return
382
- st.divider()
383
- st.markdown("### Export to Excel")
384
-
385
- options = _available_sections()
386
- selected_sheets = st.multiselect(
387
- "Sheets to include",
388
- options=options,
389
- default=[],
390
- placeholder="Choose option(s)",
391
- help="Pick the sheets you want to include in the Excel export.",
392
- key=f"sheets_{phase_key}",
393
- )
394
-
395
- if not selected_sheets:
396
- st.caption("Select one or more sheets above to enable the export.")
397
- st.download_button(
398
- label="⬇️ Export Excel",
399
- data=b"",
400
- file_name="TOC_Export.xlsx",
401
- mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
402
- disabled=True,
403
- key=f"download_{phase_key}",
404
- )
405
- return
406
-
407
- data, fname, names = build_export_workbook(selected=selected_sheets)
408
- if names:
409
- st.caption("Will include: " + ", ".join(names))
410
- st.download_button(
411
- "⬇️ Export Excel",
412
- data=(data or b""),
413
- file_name=(fname or "TOC_Export.xlsx"),
414
- mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
415
- disabled=(data is None),
416
- key=f"download_{phase_key}",
417
- )
418
-
419
  # =========================
420
  # Cross plot (Matplotlib)
421
  # =========================
@@ -439,7 +280,7 @@ def cross_plot_static(actual, pred):
439
  ax.set_xticks(ticks); ax.set_yticks(ticks)
440
  ax.set_aspect("equal", adjustable="box")
441
 
442
- fmt = FuncFormatter(lambda x, _: f"{x:,.0f}")
443
  ax.xaxis.set_major_formatter(fmt); ax.yaxis.set_major_formatter(fmt)
444
 
445
  ax.set_xlabel("Actual TOC (%)", fontweight="bold", fontsize=10, color="black")
@@ -457,7 +298,8 @@ def cross_plot_static(actual, pred):
457
  # Track plot (Plotly)
458
  # =========================
459
  def track_plot(df, include_actual=True):
460
- depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
 
461
  if depth_col is not None:
462
  y = pd.Series(df[depth_col]).astype(float); ylab = depth_col
463
  y_range = [float(y.max()), float(y.min())] # reversed
@@ -471,7 +313,7 @@ def track_plot(df, include_actual=True):
471
  x_lo, x_hi = float(x_series.min()), float(x_series.max())
472
  x_pad = 0.03 * (x_hi - x_lo if x_hi > x_lo else 1.0)
473
  xmin, xmax = x_lo - x_pad, x_hi + x_pad
474
- tick0 = _nice_tick0(xmin, step=100)
475
 
476
  fig = go.Figure()
477
  if PRED_COL in df.columns:
@@ -479,14 +321,14 @@ def track_plot(df, include_actual=True):
479
  x=df[PRED_COL], y=y, mode="lines",
480
  line=dict(color=COLORS["pred"], width=1.8),
481
  name=PRED_COL,
482
- hovertemplate=f"{PRED_COL}: "+"%{x:.0f}<br>"+ylab+": %{y}<extra></extra>"
483
  ))
484
  if include_actual and TARGET in df.columns:
485
  fig.add_trace(go.Scatter(
486
  x=df[TARGET], y=y, mode="lines",
487
  line=dict(color=COLORS["actual"], width=2.0, dash="dot"),
488
  name=f"{TARGET} (actual)",
489
- hovertemplate=f"{TARGET}: "+"%{x:.0f}<br>"+ylab+": %{y}<extra></extra>"
490
  ))
491
 
492
  fig.update_layout(
@@ -497,13 +339,13 @@ def track_plot(df, include_actual=True):
497
  legend=dict(x=0.98, y=0.05, xanchor="right", yanchor="bottom",
498
  bgcolor="rgba(255,255,255,0.75)", bordercolor="#ccc", borderwidth=1),
499
  legend_title_text=""
500
- )
501
  fig.update_xaxes(
502
  title_text="TOC (%)",
503
  title_font=dict(size=20, family=BOLD_FONT, color="#000"),
504
  tickfont=dict(size=12, family=BOLD_FONT, color="#000"),
505
  side="top", range=[xmin, xmax],
506
- ticks="outside", tickformat=",.0f", tickmode="auto", tick0=tick0,
507
  showline=True, linewidth=1.2, linecolor="#444", mirror=True,
508
  showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True
509
  )
@@ -517,13 +359,9 @@ def track_plot(df, include_actual=True):
517
  )
518
  return fig
519
 
520
- # ---------- Preview (Matplotlib) ----------
521
  def preview_tracks(df: pd.DataFrame, cols: list[str]):
522
- """
523
- Multi-track quick-look:
524
- - distinct color per input (stable tab20 palette)
525
- - shared Y & reversed (Depth down if available)
526
- """
527
  cols = [c for c in cols if c in df.columns]
528
  n = len(cols)
529
  if n == 0:
@@ -531,8 +369,7 @@ def preview_tracks(df: pd.DataFrame, cols: list[str]):
531
  ax.text(0.5, 0.5, "No selected columns", ha="center", va="center"); ax.axis("off")
532
  return fig
533
 
534
- # Depth or fallback
535
- depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
536
  if depth_col is not None:
537
  idx = pd.to_numeric(df[depth_col], errors="coerce")
538
  y_label = depth_col
@@ -550,8 +387,9 @@ def preview_tracks(df: pd.DataFrame, cols: list[str]):
550
  for i, (ax, col) in enumerate(zip(axes, cols)):
551
  x = pd.to_numeric(df[col], errors="coerce")
552
  ax.plot(x, idx, '-', lw=1.8, color=col_colors[col])
553
- ax.set_xlabel(col); ax.xaxis.set_label_position('top'); ax.xaxis.tick_top()
554
- ax.set_ylim(y_max, y_min) # reversed
 
555
  ax.grid(True, linestyle=":", alpha=0.3)
556
  if i == 0:
557
  ax.set_ylabel(y_label)
@@ -591,7 +429,7 @@ except Exception as e:
591
  st.error(f"Failed to load model: {e}")
592
  st.stop()
593
 
594
- # Prefer toc-specific meta
595
  meta = {}
596
  meta_candidates = [MODELS_DIR / "toc_meta.json", MODELS_DIR / "meta.json"]
597
  meta_path = next((p for p in meta_candidates if p.exists()), None)
@@ -604,7 +442,6 @@ if meta_path:
604
  except Exception as e:
605
  st.warning(f"Could not parse meta file ({meta_path.name}): {e}")
606
 
607
- # Optional: version banner
608
  if STRICT_VERSION_CHECK and meta.get("versions"):
609
  import numpy as _np, sklearn as _skl
610
  mv = meta["versions"]; msg=[]
@@ -711,15 +548,17 @@ if st.session_state.app_step == "dev":
711
  st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training/training2 and Test/Testing/testing2 sheets.</div>', unsafe_allow_html=True)
712
  st.stop()
713
 
714
- tr = _normalize_columns(book[sh_train].copy(), FEATURES, TARGET)
715
- te = _normalize_columns(book[sh_test].copy(), FEATURES, TARGET)
716
 
717
- if not (ensure_cols(tr, FEATURES+[TARGET]) and ensure_cols(te, FEATURES+[TARGET])):
718
- st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True)
719
  st.stop()
720
 
721
- tr[PRED_COL] = model.predict(_make_X(tr, FEATURES))
722
- te[PRED_COL] = model.predict(_make_X(te, FEATURES))
 
 
 
723
 
724
  st.session_state.results["Train"]=tr; st.session_state.results["Test"]=te
725
  st.session_state.results["m_train"]={
@@ -761,10 +600,137 @@ if st.session_state.app_step == "dev":
761
  with tab1: _dev_block(st.session_state.results["Train"], st.session_state.results["m_train"])
762
  if "Test" in st.session_state.results:
763
  with tab2: _dev_block(st.session_state.results["Test"], st.session_state.results["m_test"])
764
- render_export_button(phase_key="dev")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
765
 
766
  # =========================
767
- # VALIDATION (with actual UCS)
768
  # =========================
769
  if st.session_state.app_step == "validate":
770
  st.sidebar.header("Validate the Model")
@@ -780,26 +746,29 @@ if st.session_state.app_step == "validate":
780
  if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
781
  if st.sidebar.button("Proceed to Prediction ▶", use_container_width=True): st.session_state.app_step="predict"; st.rerun()
782
 
783
- sticky_header("Validate the Model", "Upload a dataset with the same **features** and **UCS** to evaluate performance.")
784
 
785
  if go_btn and up is not None:
786
  book = read_book_bytes(up.getvalue())
787
  name = find_sheet(book, ["Validation","Validate","validation2","Val","val"]) or list(book.keys())[0]
788
- df = _normalize_columns(book[name].copy(), FEATURES, TARGET)
789
- if not ensure_cols(df, FEATURES+[TARGET]):
790
- st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
791
- df[PRED_COL] = model.predict(_make_X(df, FEATURES))
 
 
 
792
  st.session_state.results["Validate"]=df
793
 
794
  ranges = st.session_state.train_ranges; oor_pct = 0.0; tbl=None
795
  if ranges:
796
- any_viol = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in FEATURES}).any(axis=1)
797
  oor_pct = float(any_viol.mean()*100.0)
798
  if any_viol.any():
799
- tbl = df.loc[any_viol, FEATURES].copy()
800
- for c in FEATURES:
801
  if pd.api.types.is_numeric_dtype(tbl[c]): tbl[c] = tbl[c].round(2)
802
- tbl["Violations"] = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in FEATURES}).loc[any_viol].apply(
803
  lambda r:", ".join([c for c,v in r.items() if v]), axis=1
804
  )
805
  st.session_state.results["m_val"]={
@@ -833,7 +802,40 @@ if st.session_state.app_step == "validate":
833
  st.session_state.results["Validate"][PRED_COL]),
834
  use_container_width=False)
835
 
836
- render_export_button(phase_key="validate")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
837
 
838
  sv = st.session_state.results["sv_val"]
839
  if sv["oor"] > 0: st.markdown('<div class="st-message-box st-warning">Some inputs fall outside **training min–max** ranges.</div>', unsafe_allow_html=True)
@@ -842,10 +844,10 @@ if st.session_state.app_step == "validate":
842
  df_centered_rounded(st.session_state.results["oor_tbl"])
843
 
844
  # =========================
845
- # PREDICTION (no actual UCS)
846
  # =========================
847
  if st.session_state.app_step == "predict":
848
- st.sidebar.header("Prediction (No Actual UCS)")
849
  up = st.sidebar.file_uploader("Upload Prediction Excel", type=["xlsx","xls"])
850
  if up is not None:
851
  book = read_book_bytes(up.getvalue())
@@ -857,19 +859,22 @@ if st.session_state.app_step == "predict":
857
  go_btn = st.sidebar.button("Predict", type="primary", use_container_width=True)
858
  if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
859
 
860
- sticky_header("Prediction", "Upload a dataset with the feature columns (no **UCS**).")
861
 
862
  if go_btn and up is not None:
863
  book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
864
- df = _normalize_columns(book[name].copy(), FEATURES, TARGET)
865
- if not ensure_cols(df, FEATURES):
866
- st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
867
- df[PRED_COL] = model.predict(_make_X(df, FEATURES))
 
 
 
868
  st.session_state.results["PredictOnly"]=df
869
 
870
  ranges = st.session_state.train_ranges; oor_pct = 0.0
871
  if ranges:
872
- any_viol = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in FEATURES}).any(axis=1)
873
  oor_pct = float(any_viol.mean()*100.0)
874
  st.session_state.results["sv_pred"]={
875
  "n":len(df),
@@ -897,7 +902,26 @@ if st.session_state.app_step == "predict":
897
  st.plotly_chart(track_plot(df, include_actual=False),
898
  use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
899
 
900
- render_export_button(phase_key="predict")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
901
 
902
  # =========================
903
  # Preview modal
@@ -917,7 +941,7 @@ if st.session_state.show_preview_modal:
917
  tabs = st.tabs(names)
918
  for t, name in zip(tabs, names):
919
  with t:
920
- df = _normalize_columns(book_to_preview[name], FEATURES, TARGET)
921
  t1, t2 = st.tabs(["Tracks", "Summary"])
922
  with t1:
923
  st.pyplot(preview_tracks(df, FEATURES), use_container_width=True)
 
1
+ # ST_TOCTotal Organic Carbon Estimation Using AI
2
+ # Abbrev-only UI + model-order-safe predictions (bypass sklearn feature-name check)
3
 
4
  import io, json, os, base64, math
5
  from pathlib import Path
6
+ from datetime import datetime
7
+
8
  import streamlit as st
9
  import pandas as pd
10
  import numpy as np
11
  import joblib
 
12
 
13
  # Matplotlib (preview + cross-plot)
14
  import matplotlib
 
17
  from matplotlib.ticker import FuncFormatter
18
 
19
  import plotly.graph_objects as go
20
+ from sklearn.metrics import mean_squared_error
21
 
22
  # =========================
23
  # Constants / Defaults
 
25
  APP_NAME = "ST_TOC"
26
  TAGLINE = "Total Organic Carbon Estimation Using AI"
27
 
28
+ # UI feature list (abbreviations only)
29
  FEATURES = ["AHT90", "DT", "GR", "K", "RHOB", "TNPH", "Th", "Ur"]
30
  TARGET = "TOC"
31
  PRED_COL = "TOC_Pred"
32
 
33
  MODELS_DIR = Path("models")
34
  DEFAULT_MODEL = MODELS_DIR / "toc_rf.joblib"
 
35
  MODEL_FALLBACKS = [MODELS_DIR / "model.joblib", MODELS_DIR / "model.pkl"]
 
36
 
37
+ COLORS = {"pred": "#1f77b4", "actual": "#f2b702", "ref": "#5a5a5a"}
38
  STRICT_VERSION_CHECK = False # optional env banner
39
 
40
  # ---- Plot sizing ----
 
133
  return float(np.corrcoef(a, p)[0, 1])
134
 
135
  def mape(y_true, y_pred, eps: float = 1e-8) -> float:
 
 
 
 
136
  a = np.asarray(y_true, dtype=float)
137
  p = np.asarray(y_pred, dtype=float)
138
  denom = np.where(np.abs(a) < eps, np.nan, np.abs(a))
 
153
  def read_book_bytes(b: bytes):
154
  return parse_excel(b) if b else {}
155
 
156
+ # ---------- Header normalization (to abbreviations for UI) ----------
157
+ def _strip_parens(name: str) -> str:
158
+ s = str(name).strip()
159
+ if "(" in s and s.endswith(")"):
160
+ s = s.split("(", 1)[0].strip()
161
+ return s
162
+
163
+ def _abbr(name: str) -> str:
164
+ """Turn any variant into the canonical abbreviation used in UI FEATURES."""
165
+ n = _strip_parens(name)
166
+ n = n.replace(" ", "").replace("_", "").replace("-", "")
 
 
 
 
 
 
 
 
167
  alias = {
168
+ "AC": "DT",
169
+ "DTus/ft": "DT", "DTusft": "DT",
170
+ "NPHI": "TNPH", "TNPHPercent": "TNPH", "TNPH%": "TNPH",
171
+ "GammaRay": "GR", "GRAPI": "GR",
172
+ "BulkDensity": "RHOB", "RHOBgcc": "RHOB",
173
+ "Thorium": "Th", "TH": "Th",
174
+ "U": "Ur", "UR": "Ur", "Uranium": "Ur",
175
+ "KPercent": "K", "K%": "K", "Potassium": "K",
176
+ "AHT_90": "AHT90", "AHT90AverageHydrocarbonTool90°Phase": "AHT90",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  }
178
+ # preserve core mnemonics
179
+ if n.upper() in {"GR", "DT", "RHOB"}: return n.upper() if n.upper() != "DT" else "DT"
180
+ if n.upper() == "AHT90": return "AHT90"
181
+ if n.upper() == "TNPH": return "TNPH"
182
+ if n.capitalize() == "Th": return "Th"
183
+ if n.capitalize() == "Ur": return "Ur"
184
+ return alias.get(n, n)
185
+
186
+ def normalize_to_abbr(df: pd.DataFrame) -> pd.DataFrame:
187
+ out = df.copy()
188
+ newcols = []
189
+ for c in out.columns:
190
+ ac = _abbr(c)
191
+ if ac in FEATURES: # map features to their abbreviations
192
+ newcols.append(ac)
193
+ elif str(c).strip().lower() in {"toc", "toc (%)", "totalorganiccarbon"}:
194
+ newcols.append(TARGET)
195
+ elif "depth" in str(c).lower():
196
+ newcols.append("Depth") # for track plotting only
197
+ else:
198
+ newcols.append(str(c))
199
+ out.columns = newcols
200
+ return out
201
 
202
+ # ---- Build X in the model's training order & avoid name check ----
203
+ def _make_X_for_model(df: pd.DataFrame, model, fallback_features: list[str]) -> np.ndarray:
204
+ """
205
+ Returns a NumPy array with columns ordered exactly as in model training.
206
+ Using np.ndarray bypasses sklearn's feature-name validation.
207
+ """
208
+ df_abbr = normalize_to_abbr(df)
209
+ # mapping abbr -> actual column present
210
+ colmap = { _abbr(c): c for c in df_abbr.columns }
211
+
212
+ train_names = list(getattr(model, "feature_names_in_", fallback_features))
213
+ order_cols = []
214
+ missing = []
215
+ for nm in train_names:
216
+ ab = _abbr(nm)
217
+ if ab in colmap:
218
+ order_cols.append(colmap[ab])
219
+ else:
220
+ missing.append(nm)
221
 
222
+ if missing:
223
+ st.markdown('<div class="st-message-box st-error">Missing required columns for prediction (by model training): '
224
+ + ", ".join(missing) + '</div>', unsafe_allow_html=True)
225
+ st.stop()
226
+
227
+ X = df_abbr[order_cols].apply(pd.to_numeric, errors="coerce")
228
+ return X.to_numpy()
229
 
230
+ def ensure_required_features(df: pd.DataFrame, model, fallback_features: list[str]) -> bool:
231
+ df_abbr = normalize_to_abbr(df)
232
+ need = [_abbr(nm) for nm in list(getattr(model, "feature_names_in_", fallback_features))]
233
+ have = {_abbr(c) for c in df_abbr.columns}
234
+ miss = [n for n in need if n not in have]
235
  if miss:
236
+ st.error(f"Missing columns: {miss}\nFound: {sorted(list(have))}")
237
  return False
238
  return True
239
 
 
257
  )
258
  st.dataframe(styler, use_container_width=True, hide_index=hide_index)
259
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
  # =========================
261
  # Cross plot (Matplotlib)
262
  # =========================
 
280
  ax.set_xticks(ticks); ax.set_yticks(ticks)
281
  ax.set_aspect("equal", adjustable="box")
282
 
283
+ fmt = FuncFormatter(lambda x, _: f"{x:,.1f}")
284
  ax.xaxis.set_major_formatter(fmt); ax.yaxis.set_major_formatter(fmt)
285
 
286
  ax.set_xlabel("Actual TOC (%)", fontweight="bold", fontsize=10, color="black")
 
298
  # Track plot (Plotly)
299
  # =========================
300
  def track_plot(df, include_actual=True):
301
+ df = normalize_to_abbr(df)
302
+ depth_col = next((c for c in df.columns if 'depth' in str(c).lower() or c == "Depth"), None)
303
  if depth_col is not None:
304
  y = pd.Series(df[depth_col]).astype(float); ylab = depth_col
305
  y_range = [float(y.max()), float(y.min())] # reversed
 
313
  x_lo, x_hi = float(x_series.min()), float(x_series.max())
314
  x_pad = 0.03 * (x_hi - x_lo if x_hi > x_lo else 1.0)
315
  xmin, xmax = x_lo - x_pad, x_hi + x_pad
316
+ tick0 = _nice_tick0(xmin, step=0.5)
317
 
318
  fig = go.Figure()
319
  if PRED_COL in df.columns:
 
321
  x=df[PRED_COL], y=y, mode="lines",
322
  line=dict(color=COLORS["pred"], width=1.8),
323
  name=PRED_COL,
324
+ hovertemplate=f"{PRED_COL}: "+"%{x:.2f}<br>"+ylab+": %{y}<extra></extra>"
325
  ))
326
  if include_actual and TARGET in df.columns:
327
  fig.add_trace(go.Scatter(
328
  x=df[TARGET], y=y, mode="lines",
329
  line=dict(color=COLORS["actual"], width=2.0, dash="dot"),
330
  name=f"{TARGET} (actual)",
331
+ hovertemplate=f"{TARGET}: "+"%{x:.2f}<br>"+ylab+": %{y}<extra></extra>"
332
  ))
333
 
334
  fig.update_layout(
 
339
  legend=dict(x=0.98, y=0.05, xanchor="right", yanchor="bottom",
340
  bgcolor="rgba(255,255,255,0.75)", bordercolor="#ccc", borderwidth=1),
341
  legend_title_text=""
342
+ )
343
  fig.update_xaxes(
344
  title_text="TOC (%)",
345
  title_font=dict(size=20, family=BOLD_FONT, color="#000"),
346
  tickfont=dict(size=12, family=BOLD_FONT, color="#000"),
347
  side="top", range=[xmin, xmax],
348
+ ticks="outside", tickformat=",.2f", tickmode="auto", tick0=tick0,
349
  showline=True, linewidth=1.2, linecolor="#444", mirror=True,
350
  showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True
351
  )
 
359
  )
360
  return fig
361
 
362
+ # ---------- Preview tracks (Matplotlib) ----------
363
  def preview_tracks(df: pd.DataFrame, cols: list[str]):
364
+ df = normalize_to_abbr(df)
 
 
 
 
365
  cols = [c for c in cols if c in df.columns]
366
  n = len(cols)
367
  if n == 0:
 
369
  ax.text(0.5, 0.5, "No selected columns", ha="center", va="center"); ax.axis("off")
370
  return fig
371
 
372
+ depth_col = next((c for c in df.columns if 'depth' in str(c).lower() or c == "Depth"), None)
 
373
  if depth_col is not None:
374
  idx = pd.to_numeric(df[depth_col], errors="coerce")
375
  y_label = depth_col
 
387
  for i, (ax, col) in enumerate(zip(axes, cols)):
388
  x = pd.to_numeric(df[col], errors="coerce")
389
  ax.plot(x, idx, '-', lw=1.8, color=col_colors[col])
390
+ ax.set_xlabel(col) # abbreviations only
391
+ ax.xaxis.set_label_position('top'); ax.xaxis.tick_top()
392
+ ax.set_ylim(y_max, y_min) # reversed
393
  ax.grid(True, linestyle=":", alpha=0.3)
394
  if i == 0:
395
  ax.set_ylabel(y_label)
 
429
  st.error(f"Failed to load model: {e}")
430
  st.stop()
431
 
432
+ # Optional meta to override defaults
433
  meta = {}
434
  meta_candidates = [MODELS_DIR / "toc_meta.json", MODELS_DIR / "meta.json"]
435
  meta_path = next((p for p in meta_candidates if p.exists()), None)
 
442
  except Exception as e:
443
  st.warning(f"Could not parse meta file ({meta_path.name}): {e}")
444
 
 
445
  if STRICT_VERSION_CHECK and meta.get("versions"):
446
  import numpy as _np, sklearn as _skl
447
  mv = meta["versions"]; msg=[]
 
548
  st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training/training2 and Test/Testing/testing2 sheets.</div>', unsafe_allow_html=True)
549
  st.stop()
550
 
551
+ tr_raw = book[sh_train].copy()
552
+ te_raw = book[sh_test].copy()
553
 
554
+ if not (ensure_required_features(tr_raw, model, FEATURES) and ensure_required_features(te_raw, model, FEATURES)):
 
555
  st.stop()
556
 
557
+ tr = normalize_to_abbr(tr_raw)
558
+ te = normalize_to_abbr(te_raw)
559
+
560
+ tr[PRED_COL] = model.predict(_make_X_for_model(tr_raw, model, FEATURES))
561
+ te[PRED_COL] = model.predict(_make_X_for_model(te_raw, model, FEATURES))
562
 
563
  st.session_state.results["Train"]=tr; st.session_state.results["Test"]=te
564
  st.session_state.results["m_train"]={
 
600
  with tab1: _dev_block(st.session_state.results["Train"], st.session_state.results["m_train"])
601
  if "Test" in st.session_state.results:
602
  with tab2: _dev_block(st.session_state.results["Test"], st.session_state.results["m_test"])
603
+ st.divider()
604
+ st.markdown("### Export to Excel")
605
+
606
+ # Export builder
607
+ def _excel_engine() -> str:
608
+ try:
609
+ import xlsxwriter # noqa: F401
610
+ return "xlsxwriter"
611
+ except Exception:
612
+ return "openpyxl"
613
+
614
+ def _excel_safe_name(name: str) -> str:
615
+ bad = '[]:*?/\\'
616
+ safe = ''.join('_' if ch in bad else ch for ch in str(name))
617
+ return safe[:31]
618
+
619
+ def _round_numeric(df: pd.DataFrame, ndigits: int = 2) -> pd.DataFrame:
620
+ out = df.copy()
621
+ for c in out.columns:
622
+ if pd.api.types.is_float_dtype(out[c]) or pd.api.types.is_integer_dtype(out[c]):
623
+ out[c] = pd.to_numeric(out[c], errors="coerce").round(ndigits)
624
+ return out
625
+
626
+ def _summary_table(df: pd.DataFrame, cols: list[str]) -> pd.DataFrame:
627
+ cols = [c for c in cols if c in df.columns]
628
+ if not cols:
629
+ return pd.DataFrame()
630
+ tbl = (df[cols]
631
+ .agg(['min','max','mean','std'])
632
+ .T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"})
633
+ .reset_index(names="Field"))
634
+ return _round_numeric(tbl)
635
+
636
+ def _train_ranges_df(ranges: dict[str, tuple[float, float]]) -> pd.DataFrame:
637
+ if not ranges:
638
+ return pd.DataFrame()
639
+ df = pd.DataFrame(ranges).T.reset_index()
640
+ df.columns = ["Feature", "Min", "Max"]
641
+ return _round_numeric(df)
642
+
643
+ def _available_sections() -> list[str]:
644
+ res = st.session_state.get("results", {})
645
+ sections = []
646
+ if "Train" in res: sections += ["Training","Training_Metrics","Training_Summary"]
647
+ if "Test" in res: sections += ["Testing","Testing_Metrics","Testing_Summary"]
648
+ if "Validate" in res: sections += ["Validation","Validation_Metrics","Validation_Summary","Validation_OOR"]
649
+ if "PredictOnly" in res: sections += ["Prediction","Prediction_Summary"]
650
+ if st.session_state.get("train_ranges"): sections += ["Training_Ranges"]
651
+ sections += ["Info"]
652
+ return sections
653
+
654
+ def build_export_workbook(selected: list[str] | None = None) -> tuple[bytes|None, str|None, list[str]]:
655
+ res = st.session_state.get("results", {})
656
+ if not res: return None, None, []
657
+ sheets: dict[str, pd.DataFrame] = {}
658
+ order: list[str] = []
659
+
660
+ if ("Training" in (selected or _available_sections())) and "Train" in res:
661
+ tr = _round_numeric(res["Train"]); sheets["Training"] = tr; order.append("Training")
662
+ m = st.session_state.get("results", {}).get("m_train", {})
663
+ if m: sheets["Training_Metrics"] = _round_numeric(pd.DataFrame([m])); order.append("Training_Metrics")
664
+ s = _summary_table(tr, FEATURES + [c for c in [TARGET, PRED_COL] if c in tr.columns])
665
+ if not s.empty: sheets["Training_Summary"] = s; order.append("Training_Summary")
666
+
667
+ if ("Testing" in (selected or _available_sections())) and "Test" in res:
668
+ te = _round_numeric(res["Test"]); sheets["Testing"] = te; order.append("Testing")
669
+ m = st.session_state.get("results", {}).get("m_test", {})
670
+ if m: sheets["Testing_Metrics"] = _round_numeric(pd.DataFrame([m])); order.append("Testing_Metrics")
671
+ s = _summary_table(te, FEATURES + [c for c in [TARGET, PRED_COL] if c in te.columns])
672
+ if not s.empty: sheets["Testing_Summary"] = s; order.append("Testing_Summary")
673
+
674
+ if ("Validation" in (selected or _available_sections())) and "Validate" in res:
675
+ va = _round_numeric(res["Validate"]); sheets["Validation"] = va; order.append("Validation")
676
+ m = st.session_state.get("results", {}).get("m_val", {})
677
+ if m: sheets["Validation_Metrics"] = _round_numeric(pd.DataFrame([m])); order.append("Validation_Metrics")
678
+ sv = st.session_state.get("results", {}).get("sv_val", {})
679
+ if sv: sheets["Validation_Summary"] = _round_numeric(pd.DataFrame([sv])); order.append("Validation_Summary")
680
+ oor_tbl = st.session_state.get("results", {}).get("oor_tbl")
681
+ if isinstance(oor_tbl, pd.DataFrame) and not oor_tbl.empty:
682
+ sheets["Validation_OOR"] = _round_numeric(oor_tbl.reset_index(drop=True)); order.append("Validation_OOR")
683
+
684
+ if ("Prediction" in (selected or _available_sections())) and "PredictOnly" in res:
685
+ pr = _round_numeric(res["PredictOnly"]); sheets["Prediction"] = pr; order.append("Prediction")
686
+ sv = st.session_state.get("results", {}).get("sv_pred", {})
687
+ if sv: sheets["Prediction_Summary"] = _round_numeric(pd.DataFrame([sv])); order.append("Prediction_Summary")
688
+
689
+ tr_ranges = st.session_state.get("train_ranges")
690
+ if ("Training_Ranges" in (selected or _available_sections())) and tr_ranges:
691
+ rr = _train_ranges_df(tr_ranges)
692
+ if not rr.empty: sheets["Training_Ranges"] = rr; order.append("Training_Ranges")
693
+
694
+ info = pd.DataFrame([
695
+ {"Key": "AppName", "Value": APP_NAME},
696
+ {"Key": "Tagline", "Value": TAGLINE},
697
+ {"Key": "Target", "Value": TARGET},
698
+ {"Key": "PredColumn", "Value": PRED_COL},
699
+ {"Key": "Features", "Value": ", ".join(FEATURES)},
700
+ {"Key": "ExportedAt", "Value": datetime.now().strftime("%Y-%m-%d %H:%M:%S")},
701
+ ])
702
+ sheets["Info"] = info; order.append("Info")
703
+
704
+ bio = io.BytesIO()
705
+ with pd.ExcelWriter(bio, engine=_excel_engine()) as writer:
706
+ for name in order:
707
+ sheets[name].to_excel(writer, sheet_name=_excel_safe_name(name), index=False)
708
+ bio.seek(0)
709
+ fname = f"TOC_Export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
710
+ return bio.getvalue(), fname, order
711
+
712
+ options = _available_sections()
713
+ selected_sheets = st.multiselect(
714
+ "Sheets to include",
715
+ options=options, default=[],
716
+ placeholder="Choose option(s)",
717
+ help="Pick the sheets you want to include in the Excel export.",
718
+ key="sheets_dev",
719
+ )
720
+ if not selected_sheets:
721
+ st.caption("Select one or more sheets above to enable the export.")
722
+ st.download_button("⬇️ Export Excel", data=b"", file_name="TOC_Export.xlsx",
723
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
724
+ disabled=True, key="download_dev_disabled")
725
+ else:
726
+ data, fname, names = build_export_workbook(selected=selected_sheets)
727
+ if names: st.caption("Will include: " + ", ".join(names))
728
+ st.download_button("⬇️ Export Excel", data=(data or b""), file_name=(fname or "TOC_Export.xlsx"),
729
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
730
+ disabled=(data is None), key="download_dev")
731
 
732
  # =========================
733
+ # VALIDATION (with actual TOC)
734
  # =========================
735
  if st.session_state.app_step == "validate":
736
  st.sidebar.header("Validate the Model")
 
746
  if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
747
  if st.sidebar.button("Proceed to Prediction ▶", use_container_width=True): st.session_state.app_step="predict"; st.rerun()
748
 
749
+ sticky_header("Validate the Model", "Upload a dataset with the same **features** and **TOC** to evaluate performance.")
750
 
751
  if go_btn and up is not None:
752
  book = read_book_bytes(up.getvalue())
753
  name = find_sheet(book, ["Validation","Validate","validation2","Val","val"]) or list(book.keys())[0]
754
+ df_raw = book[name].copy()
755
+
756
+ if not ensure_required_features(df_raw, model, FEATURES):
757
+ st.stop()
758
+
759
+ df = normalize_to_abbr(df_raw)
760
+ df[PRED_COL] = model.predict(_make_X_for_model(df_raw, model, FEATURES))
761
  st.session_state.results["Validate"]=df
762
 
763
  ranges = st.session_state.train_ranges; oor_pct = 0.0; tbl=None
764
  if ranges:
765
+ any_viol = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in FEATURES if f in df.columns}).any(axis=1)
766
  oor_pct = float(any_viol.mean()*100.0)
767
  if any_viol.any():
768
+ tbl = df.loc[any_viol, [c for c in FEATURES if c in df.columns]].copy()
769
+ for c in [c for c in FEATURES if c in tbl.columns]:
770
  if pd.api.types.is_numeric_dtype(tbl[c]): tbl[c] = tbl[c].round(2)
771
+ tbl["Violations"] = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in FEATURES if f in df.columns}).loc[any_viol].apply(
772
  lambda r:", ".join([c for c,v in r.items() if v]), axis=1
773
  )
774
  st.session_state.results["m_val"]={
 
802
  st.session_state.results["Validate"][PRED_COL]),
803
  use_container_width=False)
804
 
805
+ st.divider()
806
+ st.markdown("### Export to Excel")
807
+ # Reuse export from dev by enabling chosen sections
808
+ def _available_sections_val():
809
+ res = st.session_state.get("results", {})
810
+ sections = ["Validation","Validation_Metrics","Validation_Summary"]
811
+ if isinstance(res.get("oor_tbl"), pd.DataFrame) and not res["oor_tbl"].empty:
812
+ sections += ["Validation_OOR"]
813
+ sections += ["Info"]
814
+ return sections
815
+ # Minimal export for validation
816
+ def _export_val():
817
+ res = st.session_state.get("results", {})
818
+ sheets = {}
819
+ sheets["Validation"] = res["Validate"]
820
+ sheets["Validation_Metrics"] = pd.DataFrame([res.get("m_val", {})])
821
+ if "sv_val" in res: sheets["Validation_Summary"] = pd.DataFrame([res["sv_val"]])
822
+ if isinstance(res.get("oor_tbl"), pd.DataFrame) and not res["oor_tbl"].empty:
823
+ sheets["Validation_OOR"] = res["oor_tbl"].reset_index(drop=True)
824
+ sheets["Info"] = pd.DataFrame([
825
+ {"Key":"AppName","Value":APP_NAME},
826
+ {"Key":"Target","Value":TARGET},
827
+ {"Key":"PredColumn","Value":PRED_COL},
828
+ {"Key":"ExportedAt","Value":datetime.now().strftime("%Y-%m-%d %H:%M:%S")},
829
+ ])
830
+ bio = io.BytesIO()
831
+ with pd.ExcelWriter(bio, engine="xlsxwriter") as writer:
832
+ for k,v in sheets.items():
833
+ v.to_excel(writer, sheet_name=k[:31], index=False)
834
+ bio.seek(0)
835
+ return bio.getvalue(), f"TOC_Validation_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
836
+ data_x, fn_x = _export_val()
837
+ st.download_button("⬇️ Export Excel", data=data_x, file_name=fn_x,
838
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
839
 
840
  sv = st.session_state.results["sv_val"]
841
  if sv["oor"] > 0: st.markdown('<div class="st-message-box st-warning">Some inputs fall outside **training min–max** ranges.</div>', unsafe_allow_html=True)
 
844
  df_centered_rounded(st.session_state.results["oor_tbl"])
845
 
846
  # =========================
847
+ # PREDICTION (no actual TOC)
848
  # =========================
849
  if st.session_state.app_step == "predict":
850
+ st.sidebar.header("Prediction (No Actual TOC)")
851
  up = st.sidebar.file_uploader("Upload Prediction Excel", type=["xlsx","xls"])
852
  if up is not None:
853
  book = read_book_bytes(up.getvalue())
 
859
  go_btn = st.sidebar.button("Predict", type="primary", use_container_width=True)
860
  if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
861
 
862
+ sticky_header("Prediction", "Upload a dataset with the feature columns (no **TOC**).")
863
 
864
  if go_btn and up is not None:
865
  book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
866
+ df_raw = book[name].copy()
867
+
868
+ if not ensure_required_features(df_raw, model, FEATURES):
869
+ st.stop()
870
+
871
+ df = normalize_to_abbr(df_raw)
872
+ df[PRED_COL] = model.predict(_make_X_for_model(df_raw, model, FEATURES))
873
  st.session_state.results["PredictOnly"]=df
874
 
875
  ranges = st.session_state.train_ranges; oor_pct = 0.0
876
  if ranges:
877
+ any_viol = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in FEATURES if f in df.columns}).any(axis=1)
878
  oor_pct = float(any_viol.mean()*100.0)
879
  st.session_state.results["sv_pred"]={
880
  "n":len(df),
 
902
  st.plotly_chart(track_plot(df, include_actual=False),
903
  use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
904
 
905
+ st.divider()
906
+ # Simple export
907
+ def _export_pred():
908
+ res = st.session_state.get("results", {})
909
+ sheets = {"Prediction": res["PredictOnly"], "Prediction_Summary": pd.DataFrame([sv])}
910
+ sheets["Info"] = pd.DataFrame([
911
+ {"Key":"AppName","Value":APP_NAME},
912
+ {"Key":"Target","Value":TARGET},
913
+ {"Key":"PredColumn","Value":PRED_COL},
914
+ {"Key":"ExportedAt","Value":datetime.now().strftime("%Y-%m-%d %H:%M:%S")},
915
+ ])
916
+ bio = io.BytesIO()
917
+ with pd.ExcelWriter(bio, engine="xlsxwriter") as writer:
918
+ for k,v in sheets.items():
919
+ v.to_excel(writer, sheet_name=k[:31], index=False)
920
+ bio.seek(0)
921
+ return bio.getvalue(), f"TOC_Prediction_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
922
+ data_x, fn_x = _export_pred()
923
+ st.download_button("⬇️ Export Excel", data=data_x, file_name=fn_x,
924
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
925
 
926
  # =========================
927
  # Preview modal
 
941
  tabs = st.tabs(names)
942
  for t, name in zip(tabs, names):
943
  with t:
944
+ df = normalize_to_abbr(book_to_preview[name])
945
  t1, t2 = st.tabs(["Tracks", "Summary"])
946
  with t1:
947
  st.pyplot(preview_tracks(df, FEATURES), use_container_width=True)