Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
# app.py — ST_Min_Horizontal_Stress (σhmin)
|
| 2 |
-
# Streamlit app
|
| 3 |
-
#
|
| 4 |
|
| 5 |
import io, json, os, base64, math
|
| 6 |
from pathlib import Path
|
|
@@ -29,7 +29,7 @@ TAGLINE = "Real-Time Minimum Horizontal Stress Prediction"
|
|
| 29 |
FEATURES = ["Q (gpm)", "SPP (psi)", "ST (kft.lbf)", "WOB (klbf)", "ROP (ft/h)"]
|
| 30 |
TARGET = "σhmin (MPa)"
|
| 31 |
PRED_COL = "σhmin_Pred"
|
| 32 |
-
ACTUAL_COL = None # If your workbook has a separate actual column, set
|
| 33 |
TRANSFORM = "none" # "none" | "log10" | "ln"
|
| 34 |
UNITS = "MPa"
|
| 35 |
|
|
@@ -42,6 +42,9 @@ BOLD_FONT = "Arial Black, Arial, sans-serif"
|
|
| 42 |
|
| 43 |
STRICT_VERSION_CHECK = True
|
| 44 |
|
|
|
|
|
|
|
|
|
|
| 45 |
# =========================
|
| 46 |
# Page / CSS
|
| 47 |
# =========================
|
|
@@ -71,7 +74,7 @@ TABLE_CENTER_CSS = [
|
|
| 71 |
]
|
| 72 |
|
| 73 |
# =========================
|
| 74 |
-
# Password gate
|
| 75 |
# =========================
|
| 76 |
def inline_logo(path="logo.png") -> str:
|
| 77 |
try:
|
|
@@ -88,8 +91,8 @@ def add_password_gate() -> None:
|
|
| 88 |
required = os.environ.get("APP_PASSWORD", "")
|
| 89 |
|
| 90 |
if not required:
|
| 91 |
-
|
| 92 |
-
|
| 93 |
if st.session_state.get("auth_ok", False):
|
| 94 |
return
|
| 95 |
|
|
@@ -205,6 +208,266 @@ def _make_X(df: pd.DataFrame, features: list[str]) -> pd.DataFrame:
|
|
| 205 |
X[c] = pd.to_numeric(X[c], errors="coerce")
|
| 206 |
return X
|
| 207 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
# =========================
|
| 209 |
# Session state
|
| 210 |
# =========================
|
|
@@ -216,12 +479,9 @@ st.session_state.setdefault("dev_file_bytes",b"")
|
|
| 216 |
st.session_state.setdefault("dev_file_loaded",False)
|
| 217 |
st.session_state.setdefault("dev_preview",False)
|
| 218 |
st.session_state.setdefault("show_preview_modal", False)
|
| 219 |
-
st.session_state.setdefault("model_loaded", False)
|
| 220 |
-
st.session_state.setdefault("model_obj", None)
|
| 221 |
-
st.session_state.setdefault("meta_dict", {})
|
| 222 |
|
| 223 |
# =========================
|
| 224 |
-
# Sidebar
|
| 225 |
# =========================
|
| 226 |
st.sidebar.markdown(f"""
|
| 227 |
<div class="centered-container">
|
|
@@ -231,68 +491,6 @@ st.sidebar.markdown(f"""
|
|
| 231 |
</div>
|
| 232 |
""", unsafe_allow_html=True)
|
| 233 |
|
| 234 |
-
with st.sidebar.expander("① Load model (upload)", expanded=True):
|
| 235 |
-
up_model = st.file_uploader("Model file (.joblib)", type=["joblib","pkl"], key="mdl_up")
|
| 236 |
-
up_meta = st.file_uploader("Meta file (.json)", type=["json"], key="meta_up")
|
| 237 |
-
load_btn = st.button("Load model", type="primary")
|
| 238 |
-
|
| 239 |
-
if load_btn:
|
| 240 |
-
if not up_model:
|
| 241 |
-
st.error("Please upload the model .joblib file.")
|
| 242 |
-
st.stop()
|
| 243 |
-
try:
|
| 244 |
-
st.session_state.model_obj = joblib.load(io.BytesIO(up_model.getvalue()))
|
| 245 |
-
st.session_state.model_loaded = True
|
| 246 |
-
except Exception as e:
|
| 247 |
-
st.error(f"Failed to load model: {e}")
|
| 248 |
-
st.stop()
|
| 249 |
-
|
| 250 |
-
if up_meta:
|
| 251 |
-
try:
|
| 252 |
-
st.session_state.meta_dict = json.loads(up_meta.getvalue().decode("utf-8"))
|
| 253 |
-
except Exception as e:
|
| 254 |
-
st.warning(f"Could not parse meta.json: {e}")
|
| 255 |
-
st.session_state.meta_dict = {}
|
| 256 |
-
else:
|
| 257 |
-
st.warning("No meta.json uploaded — using app defaults.")
|
| 258 |
-
st.session_state.meta_dict = {}
|
| 259 |
-
|
| 260 |
-
st.success("Model loaded in memory ✓")
|
| 261 |
-
|
| 262 |
-
# Apply meta (if provided)
|
| 263 |
-
meta = st.session_state.meta_dict
|
| 264 |
-
if meta:
|
| 265 |
-
FEATURES = meta.get("features", FEATURES)
|
| 266 |
-
TARGET = meta.get("target", TARGET)
|
| 267 |
-
PRED_COL = meta.get("pred_col", PRED_COL)
|
| 268 |
-
ACTUAL_COL = meta.get("actual_col", ACTUAL_COL)
|
| 269 |
-
TRANSFORM = meta.get("transform", TRANSFORM)
|
| 270 |
-
UNITS = meta.get("units", UNITS)
|
| 271 |
-
ALIASES = meta.get("feature_aliases")
|
| 272 |
-
if STRICT_VERSION_CHECK and meta.get("versions"):
|
| 273 |
-
import numpy as _np, sklearn as _skl
|
| 274 |
-
mv = meta["versions"]; msg=[]
|
| 275 |
-
if mv.get("numpy") and mv["numpy"] != _np.__version__:
|
| 276 |
-
msg.append(f"NumPy {mv['numpy']} expected, running {_np.__version__}")
|
| 277 |
-
if mv.get("scikit_learn") and mv["scikit_learn"] != _skl.__version__:
|
| 278 |
-
msg.append(f"scikit-learn {mv['scikit_learn']} expected, running {_skl.__version__}")
|
| 279 |
-
if msg:
|
| 280 |
-
st.warning("Environment mismatch: " + " | ".join(msg))
|
| 281 |
-
else:
|
| 282 |
-
ALIASES = None
|
| 283 |
-
|
| 284 |
-
# Guard: require model first
|
| 285 |
-
if not st.session_state.model_loaded:
|
| 286 |
-
st.header("Welcome!")
|
| 287 |
-
st.info("Upload your **model** (.joblib) and optional **meta.json** in the left sidebar, then click **Load model**.")
|
| 288 |
-
st.stop()
|
| 289 |
-
|
| 290 |
-
# Keep a short alias
|
| 291 |
-
model = st.session_state.model_obj
|
| 292 |
-
|
| 293 |
-
# =========================
|
| 294 |
-
# Sticky header helper
|
| 295 |
-
# =========================
|
| 296 |
def sticky_header(title, message):
|
| 297 |
st.markdown(
|
| 298 |
f"""
|
|
@@ -310,20 +508,161 @@ def sticky_header(title, message):
|
|
| 310 |
unsafe_allow_html=True
|
| 311 |
)
|
| 312 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 313 |
# =========================
|
| 314 |
# INTRO
|
| 315 |
# =========================
|
| 316 |
if st.session_state.app_step == "intro":
|
| 317 |
-
st.header("
|
|
|
|
|
|
|
| 318 |
st.markdown(
|
| 319 |
-
|
| 320 |
-
"
|
|
|
|
| 321 |
)
|
| 322 |
if st.button("Start Showcase", type="primary"):
|
| 323 |
st.session_state.app_step = "dev"; st.rerun()
|
| 324 |
|
| 325 |
# =========================
|
| 326 |
-
# CASE BUILDING (Train/Test)
|
| 327 |
# =========================
|
| 328 |
def _find_sheet(book, names):
|
| 329 |
low2orig = {k.lower(): k for k in book.keys()}
|
|
@@ -332,8 +671,8 @@ def _find_sheet(book, names):
|
|
| 332 |
return None
|
| 333 |
|
| 334 |
if st.session_state.app_step == "dev":
|
| 335 |
-
st.sidebar.header("
|
| 336 |
-
up = st.sidebar.file_uploader("Upload Train/Test
|
| 337 |
if up is not None:
|
| 338 |
st.session_state.dev_file_bytes = up.getvalue()
|
| 339 |
st.session_state.dev_file_name = up.name
|
|
@@ -368,8 +707,8 @@ if st.session_state.app_step == "dev":
|
|
| 368 |
st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training and Test/Testing sheets.</div>', unsafe_allow_html=True)
|
| 369 |
st.stop()
|
| 370 |
|
| 371 |
-
tr0 = _normalize_columns(book[sh_train].copy(), FEATURES, TARGET,
|
| 372 |
-
te0 = _normalize_columns(book[sh_test].copy(), FEATURES, TARGET,
|
| 373 |
|
| 374 |
actual_col = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in tr0.columns) else TARGET
|
| 375 |
if not (ensure_cols(tr0, FEATURES+[actual_col]) and ensure_cols(te0, FEATURES+[actual_col])):
|
|
@@ -393,8 +732,8 @@ if st.session_state.app_step == "dev":
|
|
| 393 |
"MAPE%": mape(te[actual_col], te[PRED_COL]),
|
| 394 |
}
|
| 395 |
|
| 396 |
-
|
| 397 |
-
st.session_state.train_ranges = {f:(float(
|
| 398 |
st.markdown('<div class="st-message-box st-success">Case has been built and results are displayed below.</div>', unsafe_allow_html=True)
|
| 399 |
|
| 400 |
def _dev_block(df, m):
|
|
@@ -414,8 +753,8 @@ if st.session_state.app_step == "dev":
|
|
| 414 |
st.plotly_chart(track_plot(df, include_actual=True),
|
| 415 |
use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
|
| 416 |
with col_cross:
|
| 417 |
-
|
| 418 |
-
st.pyplot(cross_plot_static(df[
|
| 419 |
|
| 420 |
if "Train" in st.session_state.results or "Test" in st.session_state.results:
|
| 421 |
tab1, tab2 = st.tabs(["Training", "Testing"])
|
|
@@ -423,61 +762,13 @@ if st.session_state.app_step == "dev":
|
|
| 423 |
with tab1: _dev_block(st.session_state.results["Train"], st.session_state.results["m_train"])
|
| 424 |
if "Test" in st.session_state.results:
|
| 425 |
with tab2: _dev_block(st.session_state.results["Test"], st.session_state.results["m_test"])
|
| 426 |
-
|
| 427 |
-
st.divider()
|
| 428 |
-
st.markdown("### Export to Excel")
|
| 429 |
-
options = ["Training","Training_Metrics","Training_Summary","Testing","Testing_Metrics","Testing_Summary","Info"]
|
| 430 |
-
selected = st.multiselect("Sheets to include", options=options, default=[])
|
| 431 |
-
def _summary_table(df: pd.DataFrame, cols: list[str]) -> pd.DataFrame:
|
| 432 |
-
cols = [c for c in cols if c in df.columns]
|
| 433 |
-
if not cols: return pd.DataFrame()
|
| 434 |
-
tbl = (df[cols].agg(['min','max','mean','std'])
|
| 435 |
-
.T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"})
|
| 436 |
-
.reset_index(names="Field"))
|
| 437 |
-
return _round_numeric(tbl, 3)
|
| 438 |
-
def build_export(selected: list[str]) -> tuple[bytes|None, str|None]:
|
| 439 |
-
res = st.session_state.get("results", {})
|
| 440 |
-
if not res: return None, None
|
| 441 |
-
sheets, order = {}, []
|
| 442 |
-
def _add(n, d):
|
| 443 |
-
if isinstance(d, pd.DataFrame) and not d.empty: sheets[n]=_round_numeric(d,3); order.append(n)
|
| 444 |
-
if "Training" in selected and "Train" in res: _add("Training", res["Train"])
|
| 445 |
-
if "Training_Metrics" in selected and res.get("m_train"): _add("Training_Metrics", pd.DataFrame([res["m_train"]]))
|
| 446 |
-
if "Training_Summary" in selected and "Train" in res:
|
| 447 |
-
tr_cols = FEATURES + [c for c in [TARGET, PRED_COL] if c in res["Train"].columns]
|
| 448 |
-
_add("Training_Summary", _summary_table(res["Train"], tr_cols))
|
| 449 |
-
if "Testing" in selected and "Test" in res: _add("Testing", res["Test"])
|
| 450 |
-
if "Testing_Metrics" in selected and res.get("m_test"): _add("Testing_Metrics", pd.DataFrame([res["m_test"]]))
|
| 451 |
-
if "Testing_Summary" in selected and "Test" in res:
|
| 452 |
-
te_cols = FEATURES + [c for c in [TARGET, PRED_COL] if c in res["Test"].columns]
|
| 453 |
-
_add("Testing_Summary", _summary_table(res["Test"], te_cols))
|
| 454 |
-
if "Info" in selected:
|
| 455 |
-
info = pd.DataFrame([
|
| 456 |
-
{"Key":"AppName","Value":APP_NAME},
|
| 457 |
-
{"Key":"Tagline","Value":TAGLINE},
|
| 458 |
-
{"Key":"Target","Value":TARGET},
|
| 459 |
-
{"Key":"PredColumn","Value":PRED_COL},
|
| 460 |
-
{"Key":"Features","Value":", ".join(FEATURES)},
|
| 461 |
-
{"Key":"ExportedAt","Value":datetime.now().strftime("%Y-%m-%d %H:%M:%S")},
|
| 462 |
-
])
|
| 463 |
-
_add("Info", info)
|
| 464 |
-
if not order: return None, None
|
| 465 |
-
bio = io.BytesIO()
|
| 466 |
-
with pd.ExcelWriter(bio, engine=_excel_engine()) as w:
|
| 467 |
-
for name in order:
|
| 468 |
-
df = sheets[name]; df.to_excel(w, sheet_name=_excel_safe_name(name), index=False)
|
| 469 |
-
bio.seek(0)
|
| 470 |
-
return bio.getvalue(), f"MinStress_Export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
|
| 471 |
-
data, fname = build_export(selected)
|
| 472 |
-
st.download_button("⬇️ Export Excel", data=(data or b""), file_name=(fname or "MinStress_Export.xlsx"),
|
| 473 |
-
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
| 474 |
-
disabled=(data is None))
|
| 475 |
|
| 476 |
# =========================
|
| 477 |
# VALIDATION (with actual)
|
| 478 |
# =========================
|
| 479 |
if st.session_state.app_step == "validate":
|
| 480 |
-
st.sidebar.header("
|
| 481 |
up = st.sidebar.file_uploader("Upload Validation Excel", type=["xlsx","xls"])
|
| 482 |
if up is not None:
|
| 483 |
book = read_book_bytes(up.getvalue())
|
|
@@ -496,10 +787,12 @@ if st.session_state.app_step == "validate":
|
|
| 496 |
book = read_book_bytes(up.getvalue())
|
| 497 |
names = list(book.keys())
|
| 498 |
name = next((s for s in names if s.lower() in ("validation","validate","validation2","val","val2")), names[0])
|
| 499 |
-
df0 = _normalize_columns(book[name].copy(), FEATURES, TARGET,
|
| 500 |
-
|
| 501 |
-
if
|
|
|
|
| 502 |
st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
|
|
|
|
| 503 |
df = df0.copy()
|
| 504 |
df[PRED_COL] = _inv_transform(model.predict(_make_X(df0, FEATURES)), TRANSFORM)
|
| 505 |
st.session_state.results["Validate"] = df
|
|
@@ -517,9 +810,9 @@ if st.session_state.app_step == "validate":
|
|
| 517 |
)
|
| 518 |
|
| 519 |
st.session_state.results["m_val"] = {
|
| 520 |
-
"R": pearson_r(df[
|
| 521 |
-
"RMSE": rmse(df[
|
| 522 |
-
"MAPE%": mape(df[
|
| 523 |
}
|
| 524 |
st.session_state.results["sv_val"] = {"n":len(df), "pred_min":float(df[PRED_COL].min()), "pred_max":float(df[PRED_COL].max()), "oor":oor_pct}
|
| 525 |
st.session_state.results["oor_tbl"] = tbl
|
|
@@ -541,28 +834,24 @@ if st.session_state.app_step == "validate":
|
|
| 541 |
st.plotly_chart(track_plot(st.session_state.results["Validate"], include_actual=True),
|
| 542 |
use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
|
| 543 |
with col_cross:
|
| 544 |
-
|
| 545 |
-
st.pyplot(cross_plot_static(st.session_state.results["Validate"][
|
| 546 |
st.session_state.results["Validate"][PRED_COL]),
|
| 547 |
use_container_width=False)
|
| 548 |
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
|
| 556 |
-
bio.seek(0)
|
| 557 |
-
st.download_button("⬇️ Export Excel", data=bio.getvalue(),
|
| 558 |
-
file_name=f"Validation_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx",
|
| 559 |
-
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
|
| 560 |
|
| 561 |
# =========================
|
| 562 |
# PREDICTION (no actual)
|
| 563 |
# =========================
|
| 564 |
if st.session_state.app_step == "predict":
|
| 565 |
-
st.sidebar.header("
|
| 566 |
up = st.sidebar.file_uploader("Upload Prediction Excel", type=["xlsx","xls"])
|
| 567 |
if up is not None:
|
| 568 |
book = read_book_bytes(up.getvalue())
|
|
@@ -578,7 +867,7 @@ if st.session_state.app_step == "predict":
|
|
| 578 |
|
| 579 |
if go_btn and up is not None:
|
| 580 |
book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
|
| 581 |
-
df0 = _normalize_columns(book[name].copy(), FEATURES, TARGET,
|
| 582 |
if not ensure_cols(df0, FEATURES):
|
| 583 |
st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
|
| 584 |
df = df0.copy()
|
|
@@ -613,6 +902,7 @@ if st.session_state.app_step == "predict":
|
|
| 613 |
with col_right:
|
| 614 |
st.plotly_chart(track_plot(df, include_actual=False),
|
| 615 |
use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
|
|
|
|
| 616 |
|
| 617 |
# =========================
|
| 618 |
# Preview modal
|
|
@@ -632,36 +922,23 @@ if st.session_state.show_preview_modal:
|
|
| 632 |
tabs = st.tabs(names)
|
| 633 |
for t, name in zip(tabs, names):
|
| 634 |
with t:
|
| 635 |
-
df = _normalize_columns(book_to_preview[name], FEATURES, TARGET,
|
| 636 |
t1, t2 = st.tabs(["Tracks", "Summary"])
|
| 637 |
with t1:
|
| 638 |
-
|
| 639 |
-
cols = [c for c in FEATURES if c in df.columns]
|
| 640 |
-
if not cols:
|
| 641 |
-
st.info("No feature columns to preview.")
|
| 642 |
-
else:
|
| 643 |
-
idx = np.arange(1, len(df)+1)
|
| 644 |
-
fig, axes = plt.subplots(1, len(cols), figsize=(2.4*len(cols), 7.0), sharey=True, dpi=100)
|
| 645 |
-
if len(cols)==1: axes=[axes]
|
| 646 |
-
for ax, col in zip(axes, cols):
|
| 647 |
-
x = pd.to_numeric(df[col], errors="coerce")
|
| 648 |
-
ax.plot(x, idx, '-', lw=1.6)
|
| 649 |
-
ax.set_xlabel(col); ax.xaxis.set_label_position('top'); ax.xaxis.tick_top()
|
| 650 |
-
ax.set_ylim(idx.max(), idx.min()); ax.grid(True, linestyle=":", alpha=0.3)
|
| 651 |
-
fig.tight_layout()
|
| 652 |
-
st.pyplot(fig, use_container_width=True)
|
| 653 |
with t2:
|
| 654 |
-
|
| 655 |
-
if not
|
| 656 |
st.info("No feature columns found to summarize.")
|
| 657 |
else:
|
| 658 |
tbl = (
|
| 659 |
-
df[
|
| 660 |
.agg(['min','max','mean','std'])
|
| 661 |
.T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"})
|
| 662 |
.reset_index(names="Feature")
|
| 663 |
)
|
| 664 |
df_centered_rounded(tbl)
|
|
|
|
| 665 |
st.session_state.show_preview_modal = False
|
| 666 |
|
| 667 |
# =========================
|
|
|
|
| 1 |
# app.py — ST_Min_Horizontal_Stress (σhmin)
|
| 2 |
+
# Full Streamlit app — trains the model inside the app (fixed best params or optional GridSearch).
|
| 3 |
+
# No external model file is required. Users can still download the trained .joblib + meta.json.
|
| 4 |
|
| 5 |
import io, json, os, base64, math
|
| 6 |
from pathlib import Path
|
|
|
|
| 29 |
FEATURES = ["Q (gpm)", "SPP (psi)", "ST (kft.lbf)", "WOB (klbf)", "ROP (ft/h)"]
|
| 30 |
TARGET = "σhmin (MPa)"
|
| 31 |
PRED_COL = "σhmin_Pred"
|
| 32 |
+
ACTUAL_COL = None # If your workbook has a separate actual column, set via meta.json (actual_col)
|
| 33 |
TRANSFORM = "none" # "none" | "log10" | "ln"
|
| 34 |
UNITS = "MPa"
|
| 35 |
|
|
|
|
| 42 |
|
| 43 |
STRICT_VERSION_CHECK = True
|
| 44 |
|
| 45 |
+
# Local (optional) — only used for Excel export helper sizing
|
| 46 |
+
MODELS_DIR = Path("models")
|
| 47 |
+
|
| 48 |
# =========================
|
| 49 |
# Page / CSS
|
| 50 |
# =========================
|
|
|
|
| 74 |
]
|
| 75 |
|
| 76 |
# =========================
|
| 77 |
+
# Password gate
|
| 78 |
# =========================
|
| 79 |
def inline_logo(path="logo.png") -> str:
|
| 80 |
try:
|
|
|
|
| 91 |
required = os.environ.get("APP_PASSWORD", "")
|
| 92 |
|
| 93 |
if not required:
|
| 94 |
+
st.warning("Set APP_PASSWORD in Secrets (or environment) and restart.")
|
| 95 |
+
st.stop()
|
| 96 |
if st.session_state.get("auth_ok", False):
|
| 97 |
return
|
| 98 |
|
|
|
|
| 208 |
X[c] = pd.to_numeric(X[c], errors="coerce")
|
| 209 |
return X
|
| 210 |
|
| 211 |
+
# =========================
|
| 212 |
+
# Export helpers
|
| 213 |
+
# =========================
|
| 214 |
+
def _summary_table(df: pd.DataFrame, cols: list[str]) -> pd.DataFrame:
|
| 215 |
+
cols = [c for c in cols if c in df.columns]
|
| 216 |
+
if not cols: return pd.DataFrame()
|
| 217 |
+
tbl = (df[cols]
|
| 218 |
+
.agg(['min','max','mean','std'])
|
| 219 |
+
.T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"})
|
| 220 |
+
.reset_index(names="Field"))
|
| 221 |
+
return _round_numeric(tbl, 3)
|
| 222 |
+
|
| 223 |
+
def _train_ranges_df(ranges: dict[str, tuple[float, float]]) -> pd.DataFrame:
|
| 224 |
+
if not ranges: return pd.DataFrame()
|
| 225 |
+
df = pd.DataFrame(ranges).T.reset_index()
|
| 226 |
+
df.columns = ["Feature", "Min", "Max"]
|
| 227 |
+
return _round_numeric(df, 3)
|
| 228 |
+
|
| 229 |
+
def _excel_autofit(writer, sheet_name: str, df: pd.DataFrame, min_w: int = 8, max_w: int = 40):
|
| 230 |
+
try:
|
| 231 |
+
import xlsxwriter # noqa: F401
|
| 232 |
+
except Exception:
|
| 233 |
+
return
|
| 234 |
+
ws = writer.sheets[sheet_name]
|
| 235 |
+
for i, col in enumerate(df.columns):
|
| 236 |
+
series = df[col].astype(str)
|
| 237 |
+
max_len = max([len(str(col))] + series.map(len).tolist())
|
| 238 |
+
ws.set_column(i, i, max(min_w, min(max_len + 2, max_w)))
|
| 239 |
+
ws.freeze_panes(1, 0)
|
| 240 |
+
|
| 241 |
+
def _available_sections() -> list[str]:
|
| 242 |
+
res = st.session_state.get("results", {})
|
| 243 |
+
sections = []
|
| 244 |
+
if "Train" in res: sections += ["Training","Training_Metrics","Training_Summary"]
|
| 245 |
+
if "Test" in res: sections += ["Testing","Testing_Metrics","Testing_Summary"]
|
| 246 |
+
if "Validate" in res: sections += ["Validation","Validation_Metrics","Validation_Summary","Validation_OOR"]
|
| 247 |
+
if "PredictOnly" in res: sections += ["Prediction","Prediction_Summary"]
|
| 248 |
+
if st.session_state.get("train_ranges"): sections += ["Training_Ranges"]
|
| 249 |
+
sections += ["Info"]
|
| 250 |
+
return sections
|
| 251 |
+
|
| 252 |
+
def build_export_workbook(selected: list[str], ndigits: int = 3, do_autofit: bool = True) -> tuple[bytes|None, str|None, list[str]]:
|
| 253 |
+
res = st.session_state.get("results", {})
|
| 254 |
+
if not res: return None, None, []
|
| 255 |
+
sheets: dict[str, pd.DataFrame] = {}
|
| 256 |
+
order: list[str] = []
|
| 257 |
+
|
| 258 |
+
def _add(name: str, df: pd.DataFrame):
|
| 259 |
+
if df is None or (isinstance(df, pd.DataFrame) and df.empty): return
|
| 260 |
+
sheets[name] = _round_numeric(df, ndigits); order.append(name)
|
| 261 |
+
|
| 262 |
+
if "Training" in selected and "Train" in res: _add("Training", res["Train"])
|
| 263 |
+
if "Training_Metrics" in selected and res.get("m_train"): _add("Training_Metrics", pd.DataFrame([res["m_train"]]))
|
| 264 |
+
if "Training_Summary" in selected and "Train" in res:
|
| 265 |
+
tr_cols = FEATURES + [c for c in [TARGET, PRED_COL] if c in res["Train"].columns]
|
| 266 |
+
_add("Training_Summary", _summary_table(res["Train"], tr_cols))
|
| 267 |
+
|
| 268 |
+
if "Testing" in selected and "Test" in res: _add("Testing", res["Test"])
|
| 269 |
+
if "Testing_Metrics" in selected and res.get("m_test"): _add("Testing_Metrics", pd.DataFrame([res["m_test"]]))
|
| 270 |
+
if "Testing_Summary" in selected and "Test" in res:
|
| 271 |
+
te_cols = FEATURES + [c for c in [TARGET, PRED_COL] if c in res["Test"].columns]
|
| 272 |
+
_add("Testing_Summary", _summary_table(res["Test"], te_cols))
|
| 273 |
+
|
| 274 |
+
if "Validation" in selected and "Validate" in res: _add("Validation", res["Validate"])
|
| 275 |
+
if "Validation_Metrics" in selected and res.get("m_val"): _add("Validation_Metrics", pd.DataFrame([res["m_val"]]))
|
| 276 |
+
if "Validation_Summary" in selected and res.get("sv_val"): _add("Validation_Summary", pd.DataFrame([res["sv_val"]]))
|
| 277 |
+
if "Validation_OOR" in selected and isinstance(res.get("oor_tbl"), pd.DataFrame) and not res["oor_tbl"].empty:
|
| 278 |
+
_add("Validation_OOR", res["oor_tbl"].reset_index(drop=True))
|
| 279 |
+
|
| 280 |
+
if "Prediction" in selected and "PredictOnly" in res: _add("Prediction", res["PredictOnly"])
|
| 281 |
+
if "Prediction_Summary" in selected and res.get("sv_pred"): _add("Prediction_Summary", pd.DataFrame([res["sv_pred"]]))
|
| 282 |
+
|
| 283 |
+
if "Training_Ranges" in selected and st.session_state.get("train_ranges"):
|
| 284 |
+
_add("Training_Ranges", _train_ranges_df(st.session_state["train_ranges"]))
|
| 285 |
+
|
| 286 |
+
if "Info" in selected:
|
| 287 |
+
info = pd.DataFrame([
|
| 288 |
+
{"Key": "AppName", "Value": APP_NAME},
|
| 289 |
+
{"Key": "Tagline", "Value": TAGLINE},
|
| 290 |
+
{"Key": "Target", "Value": TARGET},
|
| 291 |
+
{"Key": "PredColumn", "Value": PRED_COL},
|
| 292 |
+
{"Key": "Features", "Value": ", ".join(FEATURES)},
|
| 293 |
+
{"Key": "ExportedAt", "Value": datetime.now().strftime("%Y-%m-%d %H:%M:%S")},
|
| 294 |
+
])
|
| 295 |
+
_add("Info", info)
|
| 296 |
+
|
| 297 |
+
if not order: return None, None, []
|
| 298 |
+
|
| 299 |
+
bio = io.BytesIO()
|
| 300 |
+
engine = _excel_engine()
|
| 301 |
+
with pd.ExcelWriter(bio, engine=engine) as writer:
|
| 302 |
+
for name in order:
|
| 303 |
+
df = sheets[name]; sheet = _excel_safe_name(name)
|
| 304 |
+
df.to_excel(writer, sheet_name=sheet, index=False)
|
| 305 |
+
if do_autofit: _excel_autofit(writer, sheet, df)
|
| 306 |
+
bio.seek(0)
|
| 307 |
+
fname = f"MinStress_Export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
|
| 308 |
+
return bio.getvalue(), fname, order
|
| 309 |
+
|
| 310 |
+
def render_export_button(phase_key: str) -> None:
|
| 311 |
+
res = st.session_state.get("results", {})
|
| 312 |
+
if not res: return
|
| 313 |
+
st.divider()
|
| 314 |
+
st.markdown("### Export to Excel")
|
| 315 |
+
options = _available_sections()
|
| 316 |
+
selected_sheets = st.multiselect(
|
| 317 |
+
"Sheets to include",
|
| 318 |
+
options=options,
|
| 319 |
+
default=[],
|
| 320 |
+
placeholder="Choose option(s)",
|
| 321 |
+
help="Pick the sheets you want in the Excel export.",
|
| 322 |
+
key=f"sheets_{phase_key}",
|
| 323 |
+
)
|
| 324 |
+
if not selected_sheets:
|
| 325 |
+
st.caption("Select one or more sheets above to enable export.")
|
| 326 |
+
st.download_button("⬇️ Export Excel", data=b"", file_name="MinStress_Export.xlsx",
|
| 327 |
+
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
| 328 |
+
disabled=True, key=f"download_{phase_key}")
|
| 329 |
+
return
|
| 330 |
+
data, fname, names = build_export_workbook(selected=selected_sheets, ndigits=3, do_autofit=True)
|
| 331 |
+
if names: st.caption("Will include: " + ", ".join(names))
|
| 332 |
+
st.download_button("⬇️ Export Excel", data=(data or b""), file_name=(fname or "MinStress_Export.xlsx"),
|
| 333 |
+
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
| 334 |
+
disabled=(data is None), key=f"download_{phase_key}")
|
| 335 |
+
|
| 336 |
+
# =========================
|
| 337 |
+
# Plots
|
| 338 |
+
# =========================
|
| 339 |
+
def cross_plot_static(actual, pred):
|
| 340 |
+
a = pd.Series(actual, dtype=float)
|
| 341 |
+
p = pd.Series(pred, dtype=float)
|
| 342 |
+
lo = float(min(a.min(), p.min())); hi = float(max(a.max(), p.max()))
|
| 343 |
+
pad = 0.03 * (hi - lo if hi > lo else 1.0)
|
| 344 |
+
lo2, hi2 = lo - pad, hi + pad
|
| 345 |
+
ticks = np.linspace(lo2, hi2, 5)
|
| 346 |
+
|
| 347 |
+
dpi = 110
|
| 348 |
+
fig, ax = plt.subplots(figsize=(CROSS_W / dpi, CROSS_H / dpi), dpi=dpi, constrained_layout=False)
|
| 349 |
+
ax.scatter(a, p, s=14, c=COLORS["pred"], alpha=0.9, linewidths=0)
|
| 350 |
+
ax.plot([lo2, hi2], [lo2, hi2], linestyle="--", linewidth=1.2, color=COLORS["ref"])
|
| 351 |
+
|
| 352 |
+
ax.set_xlim(lo2, hi2); ax.set_ylim(lo2, hi2)
|
| 353 |
+
ax.set_xticks(ticks); ax.set_yticks(ticks)
|
| 354 |
+
ax.set_aspect("equal", adjustable="box")
|
| 355 |
+
|
| 356 |
+
fmt = FuncFormatter(lambda x, _: f"{x:.2f}")
|
| 357 |
+
ax.xaxis.set_major_formatter(fmt); ax.yaxis.set_major_formatter(fmt)
|
| 358 |
+
|
| 359 |
+
ax.set_xlabel(f"Actual Min Stress ({UNITS})", fontweight="bold", fontsize=10, color="black")
|
| 360 |
+
ax.set_ylabel(f"Predicted Min Stress ({UNITS})", fontweight="bold", fontsize=10, color="black")
|
| 361 |
+
ax.tick_params(labelsize=6, colors="black")
|
| 362 |
+
ax.grid(True, linestyle=":", alpha=0.3)
|
| 363 |
+
for spine in ax.spines.values():
|
| 364 |
+
spine.set_linewidth(1.1); spine.set_color("#444")
|
| 365 |
+
|
| 366 |
+
fig.subplots_adjust(left=0.16, bottom=0.16, right=0.98, top=0.98)
|
| 367 |
+
return fig
|
| 368 |
+
|
| 369 |
+
def track_plot(df, include_actual=True):
|
| 370 |
+
depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
|
| 371 |
+
if depth_col is not None:
|
| 372 |
+
y = pd.to_numeric(df[depth_col], errors="coerce"); ylab = depth_col
|
| 373 |
+
y_range = [float(np.nanmax(y)), float(np.nanmin(y))] # reversed
|
| 374 |
+
else:
|
| 375 |
+
y = pd.Series(np.arange(1, len(df) + 1)); ylab = "Point Index"
|
| 376 |
+
y_range = [float(y.max()), float(y.min())]
|
| 377 |
+
|
| 378 |
+
x_series = pd.Series(df.get(PRED_COL, pd.Series(dtype=float))).astype(float)
|
| 379 |
+
act_col = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in df.columns) else TARGET
|
| 380 |
+
if include_actual and act_col in df.columns:
|
| 381 |
+
x_series = pd.concat([x_series, pd.Series(df[act_col]).astype(float)], ignore_index=True)
|
| 382 |
+
x_lo, x_hi = float(x_series.min()), float(x_series.max())
|
| 383 |
+
x_pad = 0.03 * (x_hi - x_lo if x_hi > x_lo else 1.0)
|
| 384 |
+
xmin, xmax = x_lo - x_pad, x_hi + x_pad
|
| 385 |
+
tick0 = _nice_tick0(xmin, step=max((xmax - xmin) / 10.0, 0.1))
|
| 386 |
+
|
| 387 |
+
fig = go.Figure()
|
| 388 |
+
if PRED_COL in df.columns:
|
| 389 |
+
fig.add_trace(go.Scatter(
|
| 390 |
+
x=df[PRED_COL], y=y, mode="lines",
|
| 391 |
+
line=dict(color=COLORS["pred"], width=1.8),
|
| 392 |
+
name=PRED_COL,
|
| 393 |
+
hovertemplate=f"{PRED_COL}: "+"%{x:.2f}<br>"+ylab+": %{y}<extra></extra>"
|
| 394 |
+
))
|
| 395 |
+
if include_actual and act_col in df.columns:
|
| 396 |
+
fig.add_trace(go.Scatter(
|
| 397 |
+
x=df[act_col], y=y, mode="lines",
|
| 398 |
+
line=dict(color=COLORS["actual"], width=2.0, dash="dot"),
|
| 399 |
+
name=f"{act_col} (actual)",
|
| 400 |
+
hovertemplate=f"{act_col}: "+"%{x:.2f}<br>"+ylab+": %{y}<extra></extra>"
|
| 401 |
+
))
|
| 402 |
+
|
| 403 |
+
fig.update_layout(
|
| 404 |
+
height=TRACK_H, width=TRACK_W, autosize=False,
|
| 405 |
+
paper_bgcolor="#fff", plot_bgcolor="#fff",
|
| 406 |
+
margin=dict(l=64, r=16, t=36, b=48), hovermode="closest",
|
| 407 |
+
font=dict(size=FONT_SZ, color="#000"),
|
| 408 |
+
legend=dict(x=0.98, y=0.05, xanchor="right", yanchor="bottom",
|
| 409 |
+
bgcolor="rgba(255,255,255,0.75)", bordercolor="#ccc", borderwidth=1),
|
| 410 |
+
legend_title_text=""
|
| 411 |
+
)
|
| 412 |
+
fig.update_xaxes(
|
| 413 |
+
title_text=f"Min Stress ({UNITS})",
|
| 414 |
+
title_font=dict(size=20, family=BOLD_FONT, color="#000"),
|
| 415 |
+
tickfont=dict(size=15, family=BOLD_FONT, color="#000"),
|
| 416 |
+
side="top", range=[xmin, xmax],
|
| 417 |
+
ticks="outside", tickformat=",.2f", tickmode="auto", tick0=tick0,
|
| 418 |
+
showline=True, linewidth=1.2, linecolor="#444", mirror=True,
|
| 419 |
+
showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True
|
| 420 |
+
)
|
| 421 |
+
fig.update_yaxes(
|
| 422 |
+
title_text=ylab,
|
| 423 |
+
title_font=dict(size=20, family=BOLD_FONT, color="#000"),
|
| 424 |
+
tickfont=dict(size=15, family=BOLD_FONT, color="#000"),
|
| 425 |
+
range=y_range, ticks="outside",
|
| 426 |
+
showline=True, linewidth=1.2, linecolor="#444", mirror=True,
|
| 427 |
+
showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True
|
| 428 |
+
)
|
| 429 |
+
return fig
|
| 430 |
+
|
| 431 |
+
def preview_tracks(df: pd.DataFrame, cols: list[str]):
|
| 432 |
+
cols = [c for c in cols if c in df.columns]
|
| 433 |
+
n = len(cols)
|
| 434 |
+
if n == 0:
|
| 435 |
+
fig, ax = plt.subplots(figsize=(4, 2))
|
| 436 |
+
ax.text(0.5, 0.5, "No selected columns", ha="center", va="center")
|
| 437 |
+
ax.axis("off")
|
| 438 |
+
return fig
|
| 439 |
+
|
| 440 |
+
depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
|
| 441 |
+
if depth_col is not None:
|
| 442 |
+
idx = pd.to_numeric(df[depth_col], errors="coerce")
|
| 443 |
+
y_label = depth_col
|
| 444 |
+
y_min, y_max = float(np.nanmin(idx)), float(np.nanmax(idx))
|
| 445 |
+
else:
|
| 446 |
+
idx = pd.Series(np.arange(1, len(df) + 1))
|
| 447 |
+
y_label = "Point Index"
|
| 448 |
+
y_min, y_max = float(idx.min()), float(idx.max())
|
| 449 |
+
|
| 450 |
+
cmap = plt.get_cmap("tab20")
|
| 451 |
+
col_colors = {col: cmap(i % cmap.N) for i, col in enumerate(cols)}
|
| 452 |
+
|
| 453 |
+
fig, axes = plt.subplots(1, n, figsize=(2.4 * n, 7.0), sharey=True, dpi=100)
|
| 454 |
+
if n == 1:
|
| 455 |
+
axes = [axes]
|
| 456 |
+
|
| 457 |
+
for i, (ax, col) in enumerate(zip(axes, cols)):
|
| 458 |
+
x = pd.to_numeric(df[col], errors="coerce")
|
| 459 |
+
ax.plot(x, idx, '-', lw=1.6, color=col_colors[col])
|
| 460 |
+
ax.set_xlabel(col); ax.xaxis.set_label_position('top'); ax.xaxis.tick_top()
|
| 461 |
+
ax.set_ylim(y_max, y_min) # reversed depth down
|
| 462 |
+
ax.grid(True, linestyle=":", alpha=0.3)
|
| 463 |
+
if i == 0:
|
| 464 |
+
ax.set_ylabel(y_label)
|
| 465 |
+
else:
|
| 466 |
+
ax.tick_params(labelleft=False); ax.set_ylabel("")
|
| 467 |
+
|
| 468 |
+
fig.tight_layout()
|
| 469 |
+
return fig
|
| 470 |
+
|
| 471 |
# =========================
|
| 472 |
# Session state
|
| 473 |
# =========================
|
|
|
|
| 479 |
st.session_state.setdefault("dev_file_loaded",False)
|
| 480 |
st.session_state.setdefault("dev_preview",False)
|
| 481 |
st.session_state.setdefault("show_preview_modal", False)
|
|
|
|
|
|
|
|
|
|
| 482 |
|
| 483 |
# =========================
|
| 484 |
+
# Sidebar branding
|
| 485 |
# =========================
|
| 486 |
st.sidebar.markdown(f"""
|
| 487 |
<div class="centered-container">
|
|
|
|
| 491 |
</div>
|
| 492 |
""", unsafe_allow_html=True)
|
| 493 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 494 |
def sticky_header(title, message):
|
| 495 |
st.markdown(
|
| 496 |
f"""
|
|
|
|
| 508 |
unsafe_allow_html=True
|
| 509 |
)
|
| 510 |
|
| 511 |
+
# ===============================================================
|
| 512 |
+
# TRAIN THE MODEL IN-APP (no external pickle needed)
|
| 513 |
+
# ===============================================================
|
| 514 |
+
from sklearn.ensemble import RandomForestRegressor
|
| 515 |
+
from sklearn.model_selection import train_test_split
|
| 516 |
+
|
| 517 |
+
# ❶ Set YOUR optimized hyperparameters here
|
| 518 |
+
BEST_PARAMS = {
|
| 519 |
+
"n_estimators": 300,
|
| 520 |
+
"max_depth": 22,
|
| 521 |
+
"max_features": "sqrt", # or "log2" / float in (0,1]
|
| 522 |
+
"min_samples_split": 2,
|
| 523 |
+
"min_samples_leaf": 1,
|
| 524 |
+
"bootstrap": True,
|
| 525 |
+
"random_state": 42,
|
| 526 |
+
"n_jobs": -1
|
| 527 |
+
}
|
| 528 |
+
|
| 529 |
+
st.sidebar.markdown("### Model source")
|
| 530 |
+
source = st.sidebar.radio(
|
| 531 |
+
"Choose how to get the model",
|
| 532 |
+
["Train now (fixed best params)", "Train with Grid Search (optional)"],
|
| 533 |
+
help="Avoids uploading big pickles. Deterministic best-params training is recommended."
|
| 534 |
+
)
|
| 535 |
+
|
| 536 |
+
st.sidebar.markdown("### Training data")
|
| 537 |
+
file_train = st.sidebar.file_uploader("Upload Excel for training (has Train sheet or any sheet)", type=["xlsx","xls"])
|
| 538 |
+
|
| 539 |
+
def _train_model_fixed(X: pd.DataFrame, y: pd.Series, params: dict) -> RandomForestRegressor:
|
| 540 |
+
rf = RandomForestRegressor(**params)
|
| 541 |
+
rf.fit(X, y)
|
| 542 |
+
return rf
|
| 543 |
+
|
| 544 |
+
def _download_buttons(model_obj, meta_dict):
|
| 545 |
+
# model
|
| 546 |
+
buf_model = io.BytesIO()
|
| 547 |
+
joblib.dump(model_obj, buf_model)
|
| 548 |
+
buf_model.seek(0)
|
| 549 |
+
st.download_button("⬇️ Download trained model (.joblib)", buf_model.getvalue(), "minstress_model.joblib")
|
| 550 |
+
|
| 551 |
+
# meta
|
| 552 |
+
meta_bytes = json.dumps(meta_dict, indent=2).encode("utf-8")
|
| 553 |
+
st.download_button("⬇️ Download meta (.json)", meta_bytes, "minstress_meta.json")
|
| 554 |
+
|
| 555 |
+
if not file_train:
|
| 556 |
+
st.info("Upload a training Excel file in the sidebar to build the model.")
|
| 557 |
+
st.stop()
|
| 558 |
+
|
| 559 |
+
# Load train data
|
| 560 |
+
book_train = read_book_bytes(file_train.getvalue())
|
| 561 |
+
sheet_train = next((s for s in book_train if s.lower() in ("train", "training")), list(book_train)[0])
|
| 562 |
+
df_tr0 = _normalize_columns(book_train[sheet_train].copy(), FEATURES, TARGET, None)
|
| 563 |
+
|
| 564 |
+
# Build X/y
|
| 565 |
+
act_col = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in df_tr0.columns) else TARGET
|
| 566 |
+
if not ensure_cols(df_tr0, FEATURES + [act_col]):
|
| 567 |
+
st.stop()
|
| 568 |
+
|
| 569 |
+
X_all = _make_X(df_tr0, FEATURES).copy()
|
| 570 |
+
y_all = pd.to_numeric(df_tr0[act_col], errors="coerce")
|
| 571 |
+
|
| 572 |
+
# Split for reporting
|
| 573 |
+
tsz = st.sidebar.slider("Validation split for reporting", 0.10, 0.40, 0.20, 0.05)
|
| 574 |
+
seed = st.sidebar.number_input("Random seed", 0, 1_000_000, BEST_PARAMS.get("random_state", 42), step=1)
|
| 575 |
+
Xtr, Xva, ytr, yva = train_test_split(X_all, y_all, test_size=tsz, random_state=seed)
|
| 576 |
+
|
| 577 |
+
if source == "Train with Grid Search (optional)":
|
| 578 |
+
from sklearn.model_selection import GridSearchCV
|
| 579 |
+
st.sidebar.markdown("### Grid Search")
|
| 580 |
+
n_list = st.sidebar.multiselect("n_estimators", [100, 200, 300, 400], default=[BEST_PARAMS["n_estimators"]])
|
| 581 |
+
depth_list= st.sidebar.multiselect("max_depth", [12, 16, 20, 22, 26], default=[BEST_PARAMS["max_depth"]])
|
| 582 |
+
maxf_list = st.sidebar.multiselect("max_features", ["sqrt", "log2"], default=[BEST_PARAMS["max_features"]])
|
| 583 |
+
|
| 584 |
+
param_grid = {
|
| 585 |
+
"n_estimators": n_list or [BEST_PARAMS["n_estimators"]],
|
| 586 |
+
"max_depth": depth_list or [BEST_PARAMS["max_depth"]],
|
| 587 |
+
"max_features": maxf_list or [BEST_PARAMS["max_features"]],
|
| 588 |
+
"min_samples_split": [BEST_PARAMS["min_samples_split"]],
|
| 589 |
+
"min_samples_leaf": [BEST_PARAMS["min_samples_leaf"]],
|
| 590 |
+
"bootstrap": [BEST_PARAMS["bootstrap"]],
|
| 591 |
+
"random_state": [seed]
|
| 592 |
+
}
|
| 593 |
+
base = RandomForestRegressor(n_jobs=-1)
|
| 594 |
+
with st.spinner("Running GridSearchCV..."):
|
| 595 |
+
gs = GridSearchCV(base, param_grid=param_grid, cv=3, n_jobs=-1, refit=True)
|
| 596 |
+
gs.fit(Xtr, ytr)
|
| 597 |
+
best = gs.best_estimator_
|
| 598 |
+
st.success(f"GridSearch done. Best params: {gs.best_params_}")
|
| 599 |
+
|
| 600 |
+
# Validation report
|
| 601 |
+
pred_tr = best.predict(Xtr); pred_va = best.predict(Xva)
|
| 602 |
+
m_train = {"R": pearson_r(ytr, pred_tr), "RMSE": rmse(ytr, pred_tr), "MAPE%": mape(ytr, pred_tr)}
|
| 603 |
+
m_valid = {"R": pearson_r(yva, pred_va), "RMSE": rmse(yva, pred_va), "MAPE%": mape(yva, pred_va)}
|
| 604 |
+
st.write("**Training split metrics**:", m_train)
|
| 605 |
+
st.write("**Validation split metrics**:", m_valid)
|
| 606 |
+
|
| 607 |
+
# Final fit on all data with best params
|
| 608 |
+
model = RandomForestRegressor(**{**gs.best_params_, "n_jobs": -1, "random_state": seed})
|
| 609 |
+
model.fit(X_all, y_all)
|
| 610 |
+
else:
|
| 611 |
+
# Deterministic fixed-params training (recommended)
|
| 612 |
+
params = {**BEST_PARAMS, "random_state": seed}
|
| 613 |
+
with st.spinner("Training fixed-params model..."):
|
| 614 |
+
tmp_model = _train_model_fixed(Xtr, ytr, params)
|
| 615 |
+
pred_tr = tmp_model.predict(Xtr); pred_va = tmp_model.predict(Xva)
|
| 616 |
+
m_train = {"R": pearson_r(ytr, pred_tr), "RMSE": rmse(ytr, pred_tr), "MAPE%": mape(ytr, pred_tr)}
|
| 617 |
+
m_valid = {"R": pearson_r(yva, pred_va), "RMSE": rmse(yva, pred_va), "MAPE%": mape(yva, pred_va)}
|
| 618 |
+
st.write("**Training split metrics**:", m_train)
|
| 619 |
+
st.write("**Validation split metrics**:", m_valid)
|
| 620 |
+
|
| 621 |
+
model = _train_model_fixed(X_all, y_all, params)
|
| 622 |
+
|
| 623 |
+
# Create meta + training ranges for OOR checks later
|
| 624 |
+
meta = {
|
| 625 |
+
"features": FEATURES,
|
| 626 |
+
"target": TARGET,
|
| 627 |
+
"pred_col": PRED_COL,
|
| 628 |
+
"actual_col": ACTUAL_COL,
|
| 629 |
+
"transform": TRANSFORM,
|
| 630 |
+
"units": UNITS,
|
| 631 |
+
"versions": {
|
| 632 |
+
"numpy": np.__version__,
|
| 633 |
+
"scikit_learn": __import__("sklearn").__version__
|
| 634 |
+
},
|
| 635 |
+
"training": {
|
| 636 |
+
"n_rows": int(len(X_all)),
|
| 637 |
+
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
| 638 |
+
"used_grid_search": (source == "Train with Grid Search (optional)")
|
| 639 |
+
}
|
| 640 |
+
}
|
| 641 |
+
|
| 642 |
+
tr_min = X_all.min().to_dict()
|
| 643 |
+
tr_max = X_all.max().to_dict()
|
| 644 |
+
st.session_state.train_ranges = {f: (float(tr_min[f]), float(tr_max[f])) for f in FEATURES}
|
| 645 |
+
|
| 646 |
+
st.success("Model ready ✓ — proceed to **Case Building**, **Validation**, or **Prediction**.")
|
| 647 |
+
_download_buttons(model, meta)
|
| 648 |
+
|
| 649 |
# =========================
|
| 650 |
# INTRO
|
| 651 |
# =========================
|
| 652 |
if st.session_state.app_step == "intro":
|
| 653 |
+
st.header("Welcome!")
|
| 654 |
+
st.markdown(f"This software is developed by *Smart Thinking AI-Solutions Team* to estimate **Minimum Horizontal Stress** ({UNITS}) from drilling/offset data.")
|
| 655 |
+
st.subheader("How It Works")
|
| 656 |
st.markdown(
|
| 657 |
+
"1) **Upload your data to build the case and preview the model performance.** \n"
|
| 658 |
+
"2) Click **Run Model** to compute metrics and plots. \n"
|
| 659 |
+
"3) **Proceed to Validation** (with actual) or **Proceed to Prediction** (no actual)."
|
| 660 |
)
|
| 661 |
if st.button("Start Showcase", type="primary"):
|
| 662 |
st.session_state.app_step = "dev"; st.rerun()
|
| 663 |
|
| 664 |
# =========================
|
| 665 |
+
# CASE BUILDING (Train/Test) — optional evaluation stage
|
| 666 |
# =========================
|
| 667 |
def _find_sheet(book, names):
|
| 668 |
low2orig = {k.lower(): k for k in book.keys()}
|
|
|
|
| 671 |
return None
|
| 672 |
|
| 673 |
if st.session_state.app_step == "dev":
|
| 674 |
+
st.sidebar.header("Case Building")
|
| 675 |
+
up = st.sidebar.file_uploader("Upload Your Data File (Train/Test sheets)", type=["xlsx","xls"])
|
| 676 |
if up is not None:
|
| 677 |
st.session_state.dev_file_bytes = up.getvalue()
|
| 678 |
st.session_state.dev_file_name = up.name
|
|
|
|
| 707 |
st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training and Test/Testing sheets.</div>', unsafe_allow_html=True)
|
| 708 |
st.stop()
|
| 709 |
|
| 710 |
+
tr0 = _normalize_columns(book[sh_train].copy(), FEATURES, TARGET, None)
|
| 711 |
+
te0 = _normalize_columns(book[sh_test].copy(), FEATURES, TARGET, None)
|
| 712 |
|
| 713 |
actual_col = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in tr0.columns) else TARGET
|
| 714 |
if not (ensure_cols(tr0, FEATURES+[actual_col]) and ensure_cols(te0, FEATURES+[actual_col])):
|
|
|
|
| 732 |
"MAPE%": mape(te[actual_col], te[PRED_COL]),
|
| 733 |
}
|
| 734 |
|
| 735 |
+
tr_min2 = tr[FEATURES].min().to_dict(); tr_max2 = tr[FEATURES].max().to_dict()
|
| 736 |
+
st.session_state.train_ranges = {f:(float(tr_min2[f]), float(tr_max2[f])) for f in FEATURES}
|
| 737 |
st.markdown('<div class="st-message-box st-success">Case has been built and results are displayed below.</div>', unsafe_allow_html=True)
|
| 738 |
|
| 739 |
def _dev_block(df, m):
|
|
|
|
| 753 |
st.plotly_chart(track_plot(df, include_actual=True),
|
| 754 |
use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
|
| 755 |
with col_cross:
|
| 756 |
+
act_col2 = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in df.columns) else TARGET
|
| 757 |
+
st.pyplot(cross_plot_static(df[act_col2], df[PRED_COL]), use_container_width=False)
|
| 758 |
|
| 759 |
if "Train" in st.session_state.results or "Test" in st.session_state.results:
|
| 760 |
tab1, tab2 = st.tabs(["Training", "Testing"])
|
|
|
|
| 762 |
with tab1: _dev_block(st.session_state.results["Train"], st.session_state.results["m_train"])
|
| 763 |
if "Test" in st.session_state.results:
|
| 764 |
with tab2: _dev_block(st.session_state.results["Test"], st.session_state.results["m_test"])
|
| 765 |
+
render_export_button(phase_key="dev")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 766 |
|
| 767 |
# =========================
|
| 768 |
# VALIDATION (with actual)
|
| 769 |
# =========================
|
| 770 |
if st.session_state.app_step == "validate":
|
| 771 |
+
st.sidebar.header("Validate the Model")
|
| 772 |
up = st.sidebar.file_uploader("Upload Validation Excel", type=["xlsx","xls"])
|
| 773 |
if up is not None:
|
| 774 |
book = read_book_bytes(up.getvalue())
|
|
|
|
| 787 |
book = read_book_bytes(up.getvalue())
|
| 788 |
names = list(book.keys())
|
| 789 |
name = next((s for s in names if s.lower() in ("validation","validate","validation2","val","val2")), names[0])
|
| 790 |
+
df0 = _normalize_columns(book[name].copy(), FEATURES, TARGET, None)
|
| 791 |
+
|
| 792 |
+
act_col2 = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in df0.columns) else TARGET
|
| 793 |
+
if not ensure_cols(df0, FEATURES+[act_col2]):
|
| 794 |
st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
|
| 795 |
+
|
| 796 |
df = df0.copy()
|
| 797 |
df[PRED_COL] = _inv_transform(model.predict(_make_X(df0, FEATURES)), TRANSFORM)
|
| 798 |
st.session_state.results["Validate"] = df
|
|
|
|
| 810 |
)
|
| 811 |
|
| 812 |
st.session_state.results["m_val"] = {
|
| 813 |
+
"R": pearson_r(df[act_col2], df[PRED_COL]),
|
| 814 |
+
"RMSE": rmse(df[act_col2], df[PRED_COL]),
|
| 815 |
+
"MAPE%": mape(df[act_col2], df[PRED_COL]),
|
| 816 |
}
|
| 817 |
st.session_state.results["sv_val"] = {"n":len(df), "pred_min":float(df[PRED_COL].min()), "pred_max":float(df[PRED_COL].max()), "oor":oor_pct}
|
| 818 |
st.session_state.results["oor_tbl"] = tbl
|
|
|
|
| 834 |
st.plotly_chart(track_plot(st.session_state.results["Validate"], include_actual=True),
|
| 835 |
use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
|
| 836 |
with col_cross:
|
| 837 |
+
act_col3 = ACTUAL_COL if (ACTUAL_COL and ACTUAL_COL in st.session_state.results["Validate"].columns) else TARGET
|
| 838 |
+
st.pyplot(cross_plot_static(st.session_state.results["Validate"][act_col3],
|
| 839 |
st.session_state.results["Validate"][PRED_COL]),
|
| 840 |
use_container_width=False)
|
| 841 |
|
| 842 |
+
render_export_button(phase_key="validate")
|
| 843 |
+
|
| 844 |
+
sv = st.session_state.results["sv_val"]
|
| 845 |
+
if sv["oor"] > 0: st.markdown('<div class="st-message-box st-warning">Some inputs fall outside **training min–max** ranges.</div>', unsafe_allow_html=True)
|
| 846 |
+
if st.session_state.results["oor_tbl"] is not None:
|
| 847 |
+
st.write("*Out-of-range rows (vs. Training min–max):*")
|
| 848 |
+
df_centered_rounded(st.session_state.results["oor_tbl"])
|
|
|
|
|
|
|
|
|
|
|
|
|
| 849 |
|
| 850 |
# =========================
|
| 851 |
# PREDICTION (no actual)
|
| 852 |
# =========================
|
| 853 |
if st.session_state.app_step == "predict":
|
| 854 |
+
st.sidebar.header("Prediction (No Actual)")
|
| 855 |
up = st.sidebar.file_uploader("Upload Prediction Excel", type=["xlsx","xls"])
|
| 856 |
if up is not None:
|
| 857 |
book = read_book_bytes(up.getvalue())
|
|
|
|
| 867 |
|
| 868 |
if go_btn and up is not None:
|
| 869 |
book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
|
| 870 |
+
df0 = _normalize_columns(book[name].copy(), FEATURES, TARGET, None)
|
| 871 |
if not ensure_cols(df0, FEATURES):
|
| 872 |
st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
|
| 873 |
df = df0.copy()
|
|
|
|
| 902 |
with col_right:
|
| 903 |
st.plotly_chart(track_plot(df, include_actual=False),
|
| 904 |
use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
|
| 905 |
+
render_export_button(phase_key="predict")
|
| 906 |
|
| 907 |
# =========================
|
| 908 |
# Preview modal
|
|
|
|
| 922 |
tabs = st.tabs(names)
|
| 923 |
for t, name in zip(tabs, names):
|
| 924 |
with t:
|
| 925 |
+
df = _normalize_columns(book_to_preview[name], FEATURES, TARGET, None)
|
| 926 |
t1, t2 = st.tabs(["Tracks", "Summary"])
|
| 927 |
with t1:
|
| 928 |
+
st.pyplot(preview_tracks(df, FEATURES), use_container_width=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 929 |
with t2:
|
| 930 |
+
feat_present = [c for c in FEATURES if c in df.columns]
|
| 931 |
+
if not feat_present:
|
| 932 |
st.info("No feature columns found to summarize.")
|
| 933 |
else:
|
| 934 |
tbl = (
|
| 935 |
+
df[feat_present]
|
| 936 |
.agg(['min','max','mean','std'])
|
| 937 |
.T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"})
|
| 938 |
.reset_index(names="Feature")
|
| 939 |
)
|
| 940 |
df_centered_rounded(tbl)
|
| 941 |
+
|
| 942 |
st.session_state.show_preview_modal = False
|
| 943 |
|
| 944 |
# =========================
|