UCS2014 commited on
Commit
2308c33
·
verified ·
1 Parent(s): 4d43e75

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +104 -199
app.py CHANGED
@@ -1,6 +1,6 @@
1
  # app.py — ST_Min_Horizontal_Stress (σhmin)
2
- # Full Streamlit app trains the model inside the app (fixed best params or optional GridSearch).
3
- # No external model file is required. Users can still download the trained .joblib + meta.json.
4
 
5
  import io, json, os, base64, math
6
  from pathlib import Path
@@ -9,7 +9,7 @@ from datetime import datetime
9
  import streamlit as st
10
  import pandas as pd
11
  import numpy as np
12
- import joblib
13
 
14
  # Matplotlib for static previews & cross-plot
15
  import matplotlib
@@ -19,6 +19,9 @@ from matplotlib.ticker import FuncFormatter
19
 
20
  import plotly.graph_objects as go
21
  from sklearn.metrics import mean_squared_error
 
 
 
22
 
23
  # =========================
24
  # App constants / defaults
@@ -26,13 +29,26 @@ from sklearn.metrics import mean_squared_error
26
  APP_NAME = "ST_Min_Horizontal_Stress"
27
  TAGLINE = "Real-Time Minimum Horizontal Stress Prediction"
28
 
29
- FEATURES = ["Q (gpm)", "SPP (psi)", "ST (kft.lbf)", "WOB (klbf)", "ROP (ft/h)"]
30
- TARGET = "σhmin (MPa)"
31
- PRED_COL = "σhmin_Pred"
32
- ACTUAL_COL = None # If your workbook has a separate actual column, set via meta.json (actual_col)
33
- TRANSFORM = "none" # "none" | "log10" | "ln"
 
34
  UNITS = "MPa"
35
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  # Color / layout
37
  COLORS = {"pred": "#1f77b4", "actual": "#f2b702", "ref": "#5a5a5a"}
38
  CROSS_W, CROSS_H = 350, 350
@@ -40,10 +56,7 @@ TRACK_H, TRACK_W = 1000, 500
40
  FONT_SZ = 13
41
  BOLD_FONT = "Arial Black, Arial, sans-serif"
42
 
43
- STRICT_VERSION_CHECK = True
44
-
45
- # Local (optional) — only used for Excel export helper sizing
46
- MODELS_DIR = Path("models")
47
 
48
  # =========================
49
  # Page / CSS
@@ -74,7 +87,7 @@ TABLE_CENTER_CSS = [
74
  ]
75
 
76
  # =========================
77
- # Password gate
78
  # =========================
79
  def inline_logo(path="logo.png") -> str:
80
  try:
@@ -91,8 +104,8 @@ def add_password_gate() -> None:
91
  required = os.environ.get("APP_PASSWORD", "")
92
 
93
  if not required:
94
- st.warning("Set APP_PASSWORD in Secrets (or environment) and restart.")
95
- st.stop()
96
  if st.session_state.get("auth_ok", False):
97
  return
98
 
@@ -182,18 +195,6 @@ def ensure_cols(df: pd.DataFrame, cols: list[str]) -> bool:
182
  def _nice_tick0(xmin: float, step: float = 0.1) -> float:
183
  return step * math.floor(xmin / step) if np.isfinite(xmin) else xmin
184
 
185
- # ---------- Aliasing / Normalization ----------
186
- def _normalize_columns(df: pd.DataFrame, canonical_features: list[str], target_name: str, aliases: dict|None) -> pd.DataFrame:
187
- out = df.copy()
188
- out.columns = [str(c).strip().replace(" ,", ",").replace(", ", ", ").replace(" ", " ") for c in out.columns]
189
- if not aliases: return out
190
- mapping = {}
191
- for can, alist in aliases.items():
192
- for a in alist:
193
- if a in out.columns and can != a:
194
- mapping[a] = can
195
- return out.rename(columns=mapping)
196
-
197
  # ---------- Transform helpers ----------
198
  def _inv_transform(x: np.ndarray, transform: str) -> np.ndarray:
199
  t = (transform or "none").lower()
@@ -468,6 +469,23 @@ def preview_tracks(df: pd.DataFrame, cols: list[str]):
468
  fig.tight_layout()
469
  return fig
470
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
471
  # =========================
472
  # Session state
473
  # =========================
@@ -479,6 +497,7 @@ st.session_state.setdefault("dev_file_bytes",b"")
479
  st.session_state.setdefault("dev_file_loaded",False)
480
  st.session_state.setdefault("dev_preview",False)
481
  st.session_state.setdefault("show_preview_modal", False)
 
482
 
483
  # =========================
484
  # Sidebar branding
@@ -508,144 +527,6 @@ def sticky_header(title, message):
508
  unsafe_allow_html=True
509
  )
510
 
511
- # ===============================================================
512
- # TRAIN THE MODEL IN-APP (no external pickle needed)
513
- # ===============================================================
514
- from sklearn.ensemble import RandomForestRegressor
515
- from sklearn.model_selection import train_test_split
516
-
517
- # ❶ Set YOUR optimized hyperparameters here
518
- BEST_PARAMS = {
519
- "n_estimators": 300,
520
- "max_depth": 22,
521
- "max_features": "sqrt", # or "log2" / float in (0,1]
522
- "min_samples_split": 2,
523
- "min_samples_leaf": 1,
524
- "bootstrap": True,
525
- "random_state": 42,
526
- "n_jobs": -1
527
- }
528
-
529
- st.sidebar.markdown("### Model source")
530
- source = st.sidebar.radio(
531
- "Choose how to get the model",
532
- ["Train now (fixed best params)", "Train with Grid Search (optional)"],
533
- help="Avoids uploading big pickles. Deterministic best-params training is recommended."
534
- )
535
-
536
- st.sidebar.markdown("### Training data")
537
- file_train = st.sidebar.file_uploader("Upload Excel for training (has Train sheet or any sheet)", type=["xlsx","xls"])
538
-
539
- def _train_model_fixed(X: pd.DataFrame, y: pd.Series, params: dict) -> RandomForestRegressor:
540
- rf = RandomForestRegressor(**params)
541
- rf.fit(X, y)
542
- return rf
543
-
544
- def _download_buttons(model_obj, meta_dict):
545
- # model
546
- buf_model = io.BytesIO()
547
- joblib.dump(model_obj, buf_model)
548
- buf_model.seek(0)
549
- st.download_button("⬇️ Download trained model (.joblib)", buf_model.getvalue(), "minstress_model.joblib")
550
-
551
- # meta
552
- meta_bytes = json.dumps(meta_dict, indent=2).encode("utf-8")
553
- st.download_button("⬇️ Download meta (.json)", meta_bytes, "minstress_meta.json")
554
-
555
- if not file_train:
556
- st.info("Upload a training Excel file in the sidebar to build the model.")
557
- st.stop()
558
-
559
- # Load train data
560
- book_train = read_book_bytes(file_train.getvalue())
561
- sheet_train = next((s for s in book_train if s.lower() in ("train", "training")), list(book_train)[0])
562
- df_tr0 = _normalize_columns(book_train[sheet_train].copy(), FEATURES, TARGET, None)
563
-
564
- # Build X/y
565
- act_col = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in df_tr0.columns) else TARGET
566
- if not ensure_cols(df_tr0, FEATURES + [act_col]):
567
- st.stop()
568
-
569
- X_all = _make_X(df_tr0, FEATURES).copy()
570
- y_all = pd.to_numeric(df_tr0[act_col], errors="coerce")
571
-
572
- # Split for reporting
573
- tsz = st.sidebar.slider("Validation split for reporting", 0.10, 0.40, 0.20, 0.05)
574
- seed = st.sidebar.number_input("Random seed", 0, 1_000_000, BEST_PARAMS.get("random_state", 42), step=1)
575
- Xtr, Xva, ytr, yva = train_test_split(X_all, y_all, test_size=tsz, random_state=seed)
576
-
577
- if source == "Train with Grid Search (optional)":
578
- from sklearn.model_selection import GridSearchCV
579
- st.sidebar.markdown("### Grid Search")
580
- n_list = st.sidebar.multiselect("n_estimators", [100, 200, 300, 400], default=[BEST_PARAMS["n_estimators"]])
581
- depth_list= st.sidebar.multiselect("max_depth", [12, 16, 20, 22, 26], default=[BEST_PARAMS["max_depth"]])
582
- maxf_list = st.sidebar.multiselect("max_features", ["sqrt", "log2"], default=[BEST_PARAMS["max_features"]])
583
-
584
- param_grid = {
585
- "n_estimators": n_list or [BEST_PARAMS["n_estimators"]],
586
- "max_depth": depth_list or [BEST_PARAMS["max_depth"]],
587
- "max_features": maxf_list or [BEST_PARAMS["max_features"]],
588
- "min_samples_split": [BEST_PARAMS["min_samples_split"]],
589
- "min_samples_leaf": [BEST_PARAMS["min_samples_leaf"]],
590
- "bootstrap": [BEST_PARAMS["bootstrap"]],
591
- "random_state": [seed]
592
- }
593
- base = RandomForestRegressor(n_jobs=-1)
594
- with st.spinner("Running GridSearchCV..."):
595
- gs = GridSearchCV(base, param_grid=param_grid, cv=3, n_jobs=-1, refit=True)
596
- gs.fit(Xtr, ytr)
597
- best = gs.best_estimator_
598
- st.success(f"GridSearch done. Best params: {gs.best_params_}")
599
-
600
- # Validation report
601
- pred_tr = best.predict(Xtr); pred_va = best.predict(Xva)
602
- m_train = {"R": pearson_r(ytr, pred_tr), "RMSE": rmse(ytr, pred_tr), "MAPE%": mape(ytr, pred_tr)}
603
- m_valid = {"R": pearson_r(yva, pred_va), "RMSE": rmse(yva, pred_va), "MAPE%": mape(yva, pred_va)}
604
- st.write("**Training split metrics**:", m_train)
605
- st.write("**Validation split metrics**:", m_valid)
606
-
607
- # Final fit on all data with best params
608
- model = RandomForestRegressor(**{**gs.best_params_, "n_jobs": -1, "random_state": seed})
609
- model.fit(X_all, y_all)
610
- else:
611
- # Deterministic fixed-params training (recommended)
612
- params = {**BEST_PARAMS, "random_state": seed}
613
- with st.spinner("Training fixed-params model..."):
614
- tmp_model = _train_model_fixed(Xtr, ytr, params)
615
- pred_tr = tmp_model.predict(Xtr); pred_va = tmp_model.predict(Xva)
616
- m_train = {"R": pearson_r(ytr, pred_tr), "RMSE": rmse(ytr, pred_tr), "MAPE%": mape(ytr, pred_tr)}
617
- m_valid = {"R": pearson_r(yva, pred_va), "RMSE": rmse(yva, pred_va), "MAPE%": mape(yva, pred_va)}
618
- st.write("**Training split metrics**:", m_train)
619
- st.write("**Validation split metrics**:", m_valid)
620
-
621
- model = _train_model_fixed(X_all, y_all, params)
622
-
623
- # Create meta + training ranges for OOR checks later
624
- meta = {
625
- "features": FEATURES,
626
- "target": TARGET,
627
- "pred_col": PRED_COL,
628
- "actual_col": ACTUAL_COL,
629
- "transform": TRANSFORM,
630
- "units": UNITS,
631
- "versions": {
632
- "numpy": np.__version__,
633
- "scikit_learn": __import__("sklearn").__version__
634
- },
635
- "training": {
636
- "n_rows": int(len(X_all)),
637
- "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
638
- "used_grid_search": (source == "Train with Grid Search (optional)")
639
- }
640
- }
641
-
642
- tr_min = X_all.min().to_dict()
643
- tr_max = X_all.max().to_dict()
644
- st.session_state.train_ranges = {f: (float(tr_min[f]), float(tr_max[f])) for f in FEATURES}
645
-
646
- st.success("Model ready ✓ — proceed to **Case Building**, **Validation**, or **Prediction**.")
647
- _download_buttons(model, meta)
648
-
649
  # =========================
650
  # INTRO
651
  # =========================
@@ -654,15 +535,15 @@ if st.session_state.app_step == "intro":
654
  st.markdown(f"This software is developed by *Smart Thinking AI-Solutions Team* to estimate **Minimum Horizontal Stress** ({UNITS}) from drilling/offset data.")
655
  st.subheader("How It Works")
656
  st.markdown(
657
- "1) **Upload your data to build the case and preview the model performance.** \n"
658
- "2) Click **Run Model** to compute metrics and plots. \n"
659
- "3) **Proceed to Validation** (with actual) or **Proceed to Prediction** (no actual)."
660
  )
661
  if st.button("Start Showcase", type="primary"):
662
  st.session_state.app_step = "dev"; st.rerun()
663
 
664
  # =========================
665
- # CASE BUILDING (Train/Test) — optional evaluation stage
666
  # =========================
667
  def _find_sheet(book, names):
668
  low2orig = {k.lower(): k for k in book.keys()}
@@ -678,6 +559,8 @@ if st.session_state.app_step == "dev":
678
  st.session_state.dev_file_name = up.name
679
  st.session_state.dev_file_loaded = True
680
  st.session_state.dev_preview = False
 
 
681
  if st.session_state.dev_file_loaded:
682
  tmp = read_book_bytes(st.session_state.dev_file_bytes)
683
  if tmp:
@@ -707,33 +590,50 @@ if st.session_state.app_step == "dev":
707
  st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training and Test/Testing sheets.</div>', unsafe_allow_html=True)
708
  st.stop()
709
 
710
- tr0 = _normalize_columns(book[sh_train].copy(), FEATURES, TARGET, None)
711
- te0 = _normalize_columns(book[sh_test].copy(), FEATURES, TARGET, None)
712
 
713
- actual_col = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in tr0.columns) else TARGET
714
- if not (ensure_cols(tr0, FEATURES+[actual_col]) and ensure_cols(te0, FEATURES+[actual_col])):
715
  st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True)
716
  st.stop()
717
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
718
  tr = tr0.copy(); te = te0.copy()
719
- tr[PRED_COL] = _inv_transform(model.predict(_make_X(tr0, FEATURES)), TRANSFORM)
720
- te[PRED_COL] = _inv_transform(model.predict(_make_X(te0, FEATURES)), TRANSFORM)
721
 
722
  st.session_state.results["Train"] = tr
723
  st.session_state.results["Test"] = te
724
  st.session_state.results["m_train"] = {
725
- "R": pearson_r(tr[actual_col], tr[PRED_COL]),
726
- "RMSE": rmse(tr[actual_col], tr[PRED_COL]),
727
- "MAPE%": mape(tr[actual_col], tr[PRED_COL]),
728
  }
729
  st.session_state.results["m_test"] = {
730
- "R": pearson_r(te[actual_col], te[PRED_COL]),
731
- "RMSE": rmse(te[actual_col], te[PRED_COL]),
732
- "MAPE%": mape(te[actual_col], te[PRED_COL]),
733
  }
734
 
735
- tr_min2 = tr[FEATURES].min().to_dict(); tr_max2 = tr[FEATURES].max().to_dict()
736
- st.session_state.train_ranges = {f:(float(tr_min2[f]), float(tr_max2[f])) for f in FEATURES}
737
  st.markdown('<div class="st-message-box st-success">Case has been built and results are displayed below.</div>', unsafe_allow_html=True)
738
 
739
  def _dev_block(df, m):
@@ -753,8 +653,7 @@ if st.session_state.app_step == "dev":
753
  st.plotly_chart(track_plot(df, include_actual=True),
754
  use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
755
  with col_cross:
756
- act_col2 = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in df.columns) else TARGET
757
- st.pyplot(cross_plot_static(df[act_col2], df[PRED_COL]), use_container_width=False)
758
 
759
  if "Train" in st.session_state.results or "Test" in st.session_state.results:
760
  tab1, tab2 = st.tabs(["Training", "Testing"])
@@ -781,20 +680,23 @@ if st.session_state.app_step == "validate":
781
  if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
782
  if st.sidebar.button("Proceed to Prediction ▶", use_container_width=True): st.session_state.app_step="predict"; st.rerun()
783
 
784
- sticky_header("Validate the Model", "Upload a dataset with the same **features** and **actual stress** to evaluate performance.")
785
 
786
  if go_btn and up is not None:
 
 
 
 
787
  book = read_book_bytes(up.getvalue())
788
  names = list(book.keys())
789
  name = next((s for s in names if s.lower() in ("validation","validate","validation2","val","val2")), names[0])
790
- df0 = _normalize_columns(book[name].copy(), FEATURES, TARGET, None)
791
 
792
- act_col2 = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in df0.columns) else TARGET
793
- if not ensure_cols(df0, FEATURES+[act_col2]):
794
  st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
795
 
796
  df = df0.copy()
797
- df[PRED_COL] = _inv_transform(model.predict(_make_X(df0, FEATURES)), TRANSFORM)
798
  st.session_state.results["Validate"] = df
799
 
800
  ranges = st.session_state.train_ranges; oor_pct = 0.0; tbl=None
@@ -810,9 +712,9 @@ if st.session_state.app_step == "validate":
810
  )
811
 
812
  st.session_state.results["m_val"] = {
813
- "R": pearson_r(df[act_col2], df[PRED_COL]),
814
- "RMSE": rmse(df[act_col2], df[PRED_COL]),
815
- "MAPE%": mape(df[act_col2], df[PRED_COL]),
816
  }
817
  st.session_state.results["sv_val"] = {"n":len(df), "pred_min":float(df[PRED_COL].min()), "pred_max":float(df[PRED_COL].max()), "oor":oor_pct}
818
  st.session_state.results["oor_tbl"] = tbl
@@ -834,8 +736,7 @@ if st.session_state.app_step == "validate":
834
  st.plotly_chart(track_plot(st.session_state.results["Validate"], include_actual=True),
835
  use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
836
  with col_cross:
837
- act_col3 = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in st.session_state.results["Validate"].columns) else TARGET
838
- st.pyplot(cross_plot_static(st.session_state.results["Validate"][act_col3],
839
  st.session_state.results["Validate"][PRED_COL]),
840
  use_container_width=False)
841
 
@@ -863,15 +764,19 @@ if st.session_state.app_step == "predict":
863
  go_btn = st.sidebar.button("Predict", type="primary", use_container_width=True)
864
  if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
865
 
866
- sticky_header("Prediction", "Upload a dataset with the feature columns (no actual column).")
867
 
868
  if go_btn and up is not None:
 
 
 
 
869
  book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
870
- df0 = _normalize_columns(book[name].copy(), FEATURES, TARGET, None)
871
  if not ensure_cols(df0, FEATURES):
872
  st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
873
  df = df0.copy()
874
- df[PRED_COL] = _inv_transform(model.predict(_make_X(df0, FEATURES)), TRANSFORM)
875
  st.session_state.results["PredictOnly"] = df
876
 
877
  ranges = st.session_state.train_ranges; oor_pct = 0.0
@@ -922,7 +827,7 @@ if st.session_state.show_preview_modal:
922
  tabs = st.tabs(names)
923
  for t, name in zip(tabs, names):
924
  with t:
925
- df = _normalize_columns(book_to_preview[name], FEATURES, TARGET, None)
926
  t1, t2 = st.tabs(["Tracks", "Summary"])
927
  with t1:
928
  st.pyplot(preview_tracks(df, FEATURES), use_container_width=True)
 
1
  # app.py — ST_Min_Horizontal_Stress (σhmin)
2
+ # Self-contained Streamlit app that TRAINS a fixed, optimized ML pipeline in-app.
3
+ # No external model files, no model-source UI. Upload Excel and go.
4
 
5
  import io, json, os, base64, math
6
  from pathlib import Path
 
9
  import streamlit as st
10
  import pandas as pd
11
  import numpy as np
12
+ import joblib # only used to cache pipeline inside session
13
 
14
  # Matplotlib for static previews & cross-plot
15
  import matplotlib
 
19
 
20
  import plotly.graph_objects as go
21
  from sklearn.metrics import mean_squared_error
22
+ from sklearn.ensemble import RandomForestRegressor
23
+ from sklearn.pipeline import Pipeline
24
+ from sklearn.impute import SimpleImputer
25
 
26
  # =========================
27
  # App constants / defaults
 
29
  APP_NAME = "ST_Min_Horizontal_Stress"
30
  TAGLINE = "Real-Time Minimum Horizontal Stress Prediction"
31
 
32
+ # -------- Option B (canonical names = what your files already have) --------
33
+ FEATURES = ["Q (gpm)", "SPP (psi)", "T (kft.lbf)", "WOB (klbf)", "ROP (ft/h)"]
34
+ TARGET = "MINStress_Actual"
35
+ PRED_COL = "MINStress_Pred"
36
+ ACTUAL_COL = TARGET
37
+ TRANSFORM = "none" # "none" | "log10" | "ln"
38
  UNITS = "MPa"
39
 
40
+ # ---- "Best" (fixed) model setup baked into the code (no grid search UI) ----
41
+ BEST_PARAMS = dict(
42
+ n_estimators=400,
43
+ max_depth=None,
44
+ min_samples_split=2,
45
+ min_samples_leaf=1,
46
+ max_features=0.6,
47
+ bootstrap=True,
48
+ random_state=42,
49
+ n_jobs=-1,
50
+ )
51
+
52
  # Color / layout
53
  COLORS = {"pred": "#1f77b4", "actual": "#f2b702", "ref": "#5a5a5a"}
54
  CROSS_W, CROSS_H = 350, 350
 
56
  FONT_SZ = 13
57
  BOLD_FONT = "Arial Black, Arial, sans-serif"
58
 
59
+ STRICT_VERSION_CHECK = False # we now train in this env, so no version pin warnings
 
 
 
60
 
61
  # =========================
62
  # Page / CSS
 
87
  ]
88
 
89
  # =========================
90
+ # Password gate (optional)
91
  # =========================
92
  def inline_logo(path="logo.png") -> str:
93
  try:
 
104
  required = os.environ.get("APP_PASSWORD", "")
105
 
106
  if not required:
107
+ return # no password configured
108
+
109
  if st.session_state.get("auth_ok", False):
110
  return
111
 
 
195
  def _nice_tick0(xmin: float, step: float = 0.1) -> float:
196
  return step * math.floor(xmin / step) if np.isfinite(xmin) else xmin
197
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  # ---------- Transform helpers ----------
199
  def _inv_transform(x: np.ndarray, transform: str) -> np.ndarray:
200
  t = (transform or "none").lower()
 
469
  fig.tight_layout()
470
  return fig
471
 
472
+ # =========================
473
+ # Fixed training pipeline
474
+ # =========================
475
+ def build_pipeline() -> Pipeline:
476
+ """
477
+ Fixed, optimized pipeline:
478
+ - Numeric imputation (median)
479
+ - RandomForestRegressor with tuned params (BEST_PARAMS)
480
+ Trees don't need scaling; robust to feature distributions.
481
+ """
482
+ model = RandomForestRegressor(**BEST_PARAMS)
483
+ pipe = Pipeline(steps=[
484
+ ("imputer", SimpleImputer(strategy="median")),
485
+ ("model", model),
486
+ ])
487
+ return pipe
488
+
489
  # =========================
490
  # Session state
491
  # =========================
 
497
  st.session_state.setdefault("dev_file_loaded",False)
498
  st.session_state.setdefault("dev_preview",False)
499
  st.session_state.setdefault("show_preview_modal", False)
500
+ st.session_state.setdefault("fitted_model", None) # cache trained pipeline
501
 
502
  # =========================
503
  # Sidebar branding
 
527
  unsafe_allow_html=True
528
  )
529
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
530
  # =========================
531
  # INTRO
532
  # =========================
 
535
  st.markdown(f"This software is developed by *Smart Thinking AI-Solutions Team* to estimate **Minimum Horizontal Stress** ({UNITS}) from drilling/offset data.")
536
  st.subheader("How It Works")
537
  st.markdown(
538
+ "1) **Upload your Train/Test file** and click **Run Model** to fit the baked-in pipeline. \n"
539
+ "2) **Validate** on held-out wells (with actual). \n"
540
+ "3) **Predict** on wells without actual."
541
  )
542
  if st.button("Start Showcase", type="primary"):
543
  st.session_state.app_step = "dev"; st.rerun()
544
 
545
  # =========================
546
+ # CASE BUILDING (Train/Test)
547
  # =========================
548
  def _find_sheet(book, names):
549
  low2orig = {k.lower(): k for k in book.keys()}
 
559
  st.session_state.dev_file_name = up.name
560
  st.session_state.dev_file_loaded = True
561
  st.session_state.dev_preview = False
562
+ st.session_state.fitted_model = None # reset
563
+
564
  if st.session_state.dev_file_loaded:
565
  tmp = read_book_bytes(st.session_state.dev_file_bytes)
566
  if tmp:
 
590
  st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training and Test/Testing sheets.</div>', unsafe_allow_html=True)
591
  st.stop()
592
 
593
+ tr0 = book[sh_train].copy()
594
+ te0 = book[sh_test].copy()
595
 
596
+ # Ensure columns exist
597
+ if not (ensure_cols(tr0, FEATURES+[TARGET]) and ensure_cols(te0, FEATURES+[TARGET])):
598
  st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True)
599
  st.stop()
600
 
601
+ # Prepare X,y
602
+ X_tr = _make_X(tr0, FEATURES)
603
+ y_tr = pd.to_numeric(tr0[TARGET], errors="coerce")
604
+ X_te = _make_X(te0, FEATURES)
605
+ y_te = pd.to_numeric(te0[TARGET], errors="coerce")
606
+
607
+ # Drop rows with NA in y
608
+ mask_tr = np.isfinite(y_tr)
609
+ X_tr, y_tr = X_tr.loc[mask_tr], y_tr.loc[mask_tr]
610
+ mask_te = np.isfinite(y_te)
611
+ X_te, y_te = X_te.loc[mask_te], y_te.loc[mask_te]
612
+
613
+ pipe = build_pipeline()
614
+ pipe.fit(X_tr, y_tr)
615
+ st.session_state.fitted_model = pipe # cache
616
+
617
+ # Predictions
618
  tr = tr0.copy(); te = te0.copy()
619
+ tr[PRED_COL] = _inv_transform(pipe.predict(_make_X(tr0, FEATURES)), TRANSFORM)
620
+ te[PRED_COL] = _inv_transform(pipe.predict(_make_X(te0, FEATURES)), TRANSFORM)
621
 
622
  st.session_state.results["Train"] = tr
623
  st.session_state.results["Test"] = te
624
  st.session_state.results["m_train"] = {
625
+ "R": pearson_r(tr[TARGET], tr[PRED_COL]),
626
+ "RMSE": rmse(tr[TARGET], tr[PRED_COL]),
627
+ "MAPE%": mape(tr[TARGET], tr[PRED_COL]),
628
  }
629
  st.session_state.results["m_test"] = {
630
+ "R": pearson_r(te[TARGET], te[PRED_COL]),
631
+ "RMSE": rmse(te[TARGET], te[PRED_COL]),
632
+ "MAPE%": mape(te[TARGET], te[PRED_COL]),
633
  }
634
 
635
+ tr_min = tr[FEATURES].min().to_dict(); tr_max = tr[FEATURES].max().to_dict()
636
+ st.session_state.train_ranges = {f:(float(tr_min[f]), float(tr_max[f])) for f in FEATURES}
637
  st.markdown('<div class="st-message-box st-success">Case has been built and results are displayed below.</div>', unsafe_allow_html=True)
638
 
639
  def _dev_block(df, m):
 
653
  st.plotly_chart(track_plot(df, include_actual=True),
654
  use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
655
  with col_cross:
656
+ st.pyplot(cross_plot_static(df[TARGET], df[PRED_COL]), use_container_width=False)
 
657
 
658
  if "Train" in st.session_state.results or "Test" in st.session_state.results:
659
  tab1, tab2 = st.tabs(["Training", "Testing"])
 
680
  if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
681
  if st.sidebar.button("Proceed to Prediction ▶", use_container_width=True): st.session_state.app_step="predict"; st.rerun()
682
 
683
+ sticky_header("Validate the Model", "Upload a dataset with the same **features** and **MINStress_Actual** to evaluate performance.")
684
 
685
  if go_btn and up is not None:
686
+ if st.session_state.fitted_model is None:
687
+ st.error("Please train the model first in Case Building.")
688
+ st.stop()
689
+
690
  book = read_book_bytes(up.getvalue())
691
  names = list(book.keys())
692
  name = next((s for s in names if s.lower() in ("validation","validate","validation2","val","val2")), names[0])
693
+ df0 = book[name].copy()
694
 
695
+ if not ensure_cols(df0, FEATURES+[TARGET]):
 
696
  st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
697
 
698
  df = df0.copy()
699
+ df[PRED_COL] = _inv_transform(st.session_state.fitted_model.predict(_make_X(df0, FEATURES)), TRANSFORM)
700
  st.session_state.results["Validate"] = df
701
 
702
  ranges = st.session_state.train_ranges; oor_pct = 0.0; tbl=None
 
712
  )
713
 
714
  st.session_state.results["m_val"] = {
715
+ "R": pearson_r(df[TARGET], df[PRED_COL]),
716
+ "RMSE": rmse(df[TARGET], df[PRED_COL]),
717
+ "MAPE%": mape(df[TARGET], df[PRED_COL]),
718
  }
719
  st.session_state.results["sv_val"] = {"n":len(df), "pred_min":float(df[PRED_COL].min()), "pred_max":float(df[PRED_COL].max()), "oor":oor_pct}
720
  st.session_state.results["oor_tbl"] = tbl
 
736
  st.plotly_chart(track_plot(st.session_state.results["Validate"], include_actual=True),
737
  use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
738
  with col_cross:
739
+ st.pyplot(cross_plot_static(st.session_state.results["Validate"][TARGET],
 
740
  st.session_state.results["Validate"][PRED_COL]),
741
  use_container_width=False)
742
 
 
764
  go_btn = st.sidebar.button("Predict", type="primary", use_container_width=True)
765
  if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
766
 
767
+ sticky_header("Prediction", "Upload a dataset with the 5 feature columns (no actual column).")
768
 
769
  if go_btn and up is not None:
770
+ if st.session_state.fitted_model is None:
771
+ st.error("Please train the model first in Case Building.")
772
+ st.stop()
773
+
774
  book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
775
+ df0 = book[name].copy()
776
  if not ensure_cols(df0, FEATURES):
777
  st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
778
  df = df0.copy()
779
+ df[PRED_COL] = _inv_transform(st.session_state.fitted_model.predict(_make_X(df0, FEATURES)), TRANSFORM)
780
  st.session_state.results["PredictOnly"] = df
781
 
782
  ranges = st.session_state.train_ranges; oor_pct = 0.0
 
827
  tabs = st.tabs(names)
828
  for t, name in zip(tabs, names):
829
  with t:
830
+ df = book_to_preview[name]
831
  t1, t2 = st.tabs(["Tracks", "Summary"])
832
  with t1:
833
  st.pyplot(preview_tracks(df, FEATURES), use_container_width=True)