Update app.py
Browse files
app.py
CHANGED
|
@@ -28,15 +28,15 @@ from sklearn.impute import SimpleImputer
|
|
| 28 |
APP_NAME = "ST_GeoMech_SHmax"
|
| 29 |
TAGLINE = "Real-Time Maximum Horizontal Stress Prediction"
|
| 30 |
|
| 31 |
-
# Canonical
|
| 32 |
FEATURES = ["Q (gpm)", "SPP (psi)", "T (kft.lbf)", "WOB (klbf)", "ROP (ft/h)"]
|
| 33 |
-
TARGET = "MaxStress
|
| 34 |
PRED_COL = "SHmax_Pred"
|
| 35 |
ACTUAL_COL = TARGET
|
| 36 |
TRANSFORM = "none" # "none" | "log10" | "ln"
|
| 37 |
UNITS = "Psi"
|
| 38 |
|
| 39 |
-
#
|
| 40 |
BEST_PARAMS = dict(
|
| 41 |
n_estimators=100,
|
| 42 |
max_depth=22,
|
|
@@ -81,7 +81,7 @@ TABLE_CENTER_CSS = [
|
|
| 81 |
]
|
| 82 |
|
| 83 |
# =========================
|
| 84 |
-
# Password gate (
|
| 85 |
# =========================
|
| 86 |
def inline_logo(path="logo.png") -> str:
|
| 87 |
try:
|
|
@@ -96,10 +96,13 @@ def add_password_gate() -> None:
|
|
| 96 |
required = st.secrets.get("APP_PASSWORD", "")
|
| 97 |
except Exception:
|
| 98 |
required = os.environ.get("APP_PASSWORD", "")
|
|
|
|
| 99 |
if not required:
|
| 100 |
-
return
|
|
|
|
| 101 |
if st.session_state.get("auth_ok", False):
|
| 102 |
return
|
|
|
|
| 103 |
st.sidebar.markdown(f"""
|
| 104 |
<div class="centered-container">
|
| 105 |
<img src="{inline_logo('logo.png')}" class="brand-logo">
|
|
@@ -116,7 +119,7 @@ def add_password_gate() -> None:
|
|
| 116 |
st.error("Incorrect key.")
|
| 117 |
st.stop()
|
| 118 |
|
| 119 |
-
|
| 120 |
|
| 121 |
# =========================
|
| 122 |
# Utilities
|
|
@@ -186,26 +189,6 @@ def ensure_cols(df: pd.DataFrame, cols: list[str]) -> bool:
|
|
| 186 |
def _nice_tick0(xmin: float, step: float = 0.1) -> float:
|
| 187 |
return step * math.floor(xmin / step) if np.isfinite(xmin) else xmin
|
| 188 |
|
| 189 |
-
# ---------- Column normalization / aliases ----------
|
| 190 |
-
ALIASES = {
|
| 191 |
-
"Q (gpm)": ["Q, gpm", "Q_gpm", "Q(gpm)", "Q (gpm)"],
|
| 192 |
-
"SPP (psi)": ["SPP(psi)", "SPP (psi)", "SPP psi"],
|
| 193 |
-
"T (kft.lbf)": ["T(kft.lbf)", "T (kft.lbf)"],
|
| 194 |
-
"WOB (klbf)": ["WOB(klbf)", "WOB (klbf)"],
|
| 195 |
-
"ROP (ft/h)": ["ROP(ft/h)", "ROP (ft/h)"],
|
| 196 |
-
"MaxStress": ["Max Stress", "MAXStress", "SHmax", "SHmax_Actual"],
|
| 197 |
-
}
|
| 198 |
-
def _normalize_columns(df: pd.DataFrame) -> pd.DataFrame:
|
| 199 |
-
out = df.copy()
|
| 200 |
-
out.columns = [str(c).strip().replace(" ,", ",").replace(", ", ", ").replace(" ", " ")
|
| 201 |
-
.replace("psi)", "psi)").replace("(psi", "(psi") for c in out.columns]
|
| 202 |
-
mapping = {}
|
| 203 |
-
for canonical, alts in ALIASES.items():
|
| 204 |
-
for a in alts:
|
| 205 |
-
if a in out.columns and canonical != a:
|
| 206 |
-
mapping[a] = canonical
|
| 207 |
-
return out.rename(columns=mapping)
|
| 208 |
-
|
| 209 |
# ---------- Transform helpers ----------
|
| 210 |
def _inv_transform(x: np.ndarray, transform: str) -> np.ndarray:
|
| 211 |
t = (transform or "none").lower()
|
|
@@ -346,7 +329,7 @@ def render_export_button(phase_key: str) -> None:
|
|
| 346 |
disabled=(data is None), key=f"download_{phase_key}")
|
| 347 |
|
| 348 |
# =========================
|
| 349 |
-
# Plots
|
| 350 |
# =========================
|
| 351 |
def cross_plot_static(actual, pred):
|
| 352 |
a = pd.Series(actual, dtype=float)
|
|
@@ -365,7 +348,7 @@ def cross_plot_static(actual, pred):
|
|
| 365 |
ax.set_xticks(ticks); ax.set_yticks(ticks)
|
| 366 |
ax.set_aspect("equal", adjustable="box")
|
| 367 |
|
| 368 |
-
fmt = FuncFormatter(lambda x, _: f"{x:.0f}")
|
| 369 |
ax.xaxis.set_major_formatter(fmt); ax.yaxis.set_major_formatter(fmt)
|
| 370 |
|
| 371 |
ax.set_xlabel(f"Actual Max Stress ({UNITS})", fontweight="bold", fontsize=10, color="black")
|
|
@@ -421,13 +404,14 @@ def track_plot(df, include_actual=True):
|
|
| 421 |
bgcolor="rgba(255,255,255,0.75)", bordercolor="#ccc", borderwidth=1),
|
| 422 |
legend_title_text=""
|
| 423 |
)
|
| 424 |
-
# no decimals on the x ticks
|
| 425 |
fig.update_xaxes(
|
| 426 |
title_text=f"Max Stress ({UNITS})",
|
| 427 |
title_font=dict(size=20, family=BOLD_FONT, color="#000"),
|
| 428 |
tickfont=dict(size=15, family=BOLD_FONT, color="#000"),
|
| 429 |
side="top", range=[xmin, xmax],
|
| 430 |
-
ticks="outside",
|
|
|
|
|
|
|
| 431 |
showline=True, linewidth=1.2, linecolor="#444", mirror=True,
|
| 432 |
showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True
|
| 433 |
)
|
|
@@ -447,7 +431,8 @@ def preview_tracks(df: pd.DataFrame, cols: list[str]):
|
|
| 447 |
if n == 0:
|
| 448 |
fig, ax = plt.subplots(figsize=(4, 2))
|
| 449 |
ax.text(0.5, 0.5, "No selected columns", ha="center", va="center")
|
| 450 |
-
ax.axis("off")
|
|
|
|
| 451 |
|
| 452 |
depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
|
| 453 |
if depth_col is not None:
|
|
@@ -463,16 +448,20 @@ def preview_tracks(df: pd.DataFrame, cols: list[str]):
|
|
| 463 |
col_colors = {col: cmap(i % cmap.N) for i, col in enumerate(cols)}
|
| 464 |
|
| 465 |
fig, axes = plt.subplots(1, n, figsize=(2.4 * n, 7.0), sharey=True, dpi=100)
|
| 466 |
-
if n == 1:
|
|
|
|
|
|
|
| 467 |
for i, (ax, col) in enumerate(zip(axes, cols)):
|
| 468 |
x = pd.to_numeric(df[col], errors="coerce")
|
| 469 |
ax.plot(x, idx, '-', lw=1.6, color=col_colors[col])
|
| 470 |
ax.set_xlabel(col); ax.xaxis.set_label_position('top'); ax.xaxis.tick_top()
|
| 471 |
ax.set_ylim(y_max, y_min) # reversed depth down
|
| 472 |
ax.grid(True, linestyle=":", alpha=0.3)
|
| 473 |
-
if i == 0:
|
|
|
|
| 474 |
else:
|
| 475 |
ax.tick_params(labelleft=False); ax.set_ylabel("")
|
|
|
|
| 476 |
fig.tight_layout()
|
| 477 |
return fig
|
| 478 |
|
|
@@ -480,7 +469,12 @@ def preview_tracks(df: pd.DataFrame, cols: list[str]):
|
|
| 480 |
# Fixed training pipeline
|
| 481 |
# =========================
|
| 482 |
def build_pipeline() -> Pipeline:
|
| 483 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 484 |
model = RandomForestRegressor(**BEST_PARAMS)
|
| 485 |
pipe = Pipeline(steps=[
|
| 486 |
("imputer", SimpleImputer(strategy="median")),
|
|
@@ -498,8 +492,11 @@ st.session_state.setdefault("dev_file_name","")
|
|
| 498 |
st.session_state.setdefault("dev_file_bytes",b"")
|
| 499 |
st.session_state.setdefault("dev_file_loaded",False)
|
| 500 |
st.session_state.setdefault("dev_preview",False)
|
| 501 |
-
st.session_state.setdefault("
|
| 502 |
-
|
|
|
|
|
|
|
|
|
|
| 503 |
|
| 504 |
# =========================
|
| 505 |
# Sidebar branding
|
|
@@ -529,40 +526,49 @@ def sticky_header(title, message):
|
|
| 529 |
unsafe_allow_html=True
|
| 530 |
)
|
| 531 |
|
| 532 |
-
|
| 533 |
-
|
| 534 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 535 |
return
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
|
| 560 |
-
|
| 561 |
-
|
| 562 |
-
.reset_index(names="Feature")
|
| 563 |
-
)
|
| 564 |
-
df_centered_rounded(tbl)
|
| 565 |
-
# keep it open; do not reset flag so user can collapse if they want
|
| 566 |
|
| 567 |
# =========================
|
| 568 |
# INTRO
|
|
@@ -572,7 +578,7 @@ if st.session_state.app_step == "intro":
|
|
| 572 |
st.markdown(f"This software is developed by *Smart Thinking AI-Solutions Team* to estimate **Maximum Horizontal Stress** ({UNITS}) from drilling/offset data.")
|
| 573 |
st.subheader("How It Works")
|
| 574 |
st.markdown(
|
| 575 |
-
"1) **Upload your
|
| 576 |
"2) **Validate** on held-out wells (with actual). \n"
|
| 577 |
"3) **Predict** on wells without actual."
|
| 578 |
)
|
|
@@ -590,7 +596,7 @@ def _find_sheet(book, names):
|
|
| 590 |
|
| 591 |
if st.session_state.app_step == "dev":
|
| 592 |
st.sidebar.header("Case Building")
|
| 593 |
-
up = st.sidebar.file_uploader("Upload Your Data File
|
| 594 |
if up is not None:
|
| 595 |
st.session_state.dev_file_bytes = up.getvalue()
|
| 596 |
st.session_state.dev_file_name = up.name
|
|
@@ -604,18 +610,25 @@ if st.session_state.app_step == "dev":
|
|
| 604 |
df0 = next(iter(tmp.values()))
|
| 605 |
st.sidebar.caption(f"**Data loaded:** {st.session_state.dev_file_name} • {df0.shape[0]} rows × {df0.shape[1]} cols")
|
| 606 |
|
|
|
|
| 607 |
if st.sidebar.button("Preview data", use_container_width=True, disabled=not st.session_state.dev_file_loaded):
|
| 608 |
-
st.session_state.
|
| 609 |
-
st.session_state.
|
|
|
|
| 610 |
|
| 611 |
run = st.sidebar.button("Run Model", type="primary", use_container_width=True)
|
| 612 |
if st.sidebar.button("Proceed to Validation ▶", use_container_width=True): st.session_state.app_step="validate"; st.rerun()
|
| 613 |
if st.sidebar.button("Proceed to Prediction ▶", use_container_width=True): st.session_state.app_step="predict"; st.rerun()
|
| 614 |
|
| 615 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 616 |
|
| 617 |
-
#
|
| 618 |
-
|
| 619 |
|
| 620 |
if run and st.session_state.dev_file_bytes:
|
| 621 |
book = read_book_bytes(st.session_state.dev_file_bytes)
|
|
@@ -625,9 +638,10 @@ if st.session_state.app_step == "dev":
|
|
| 625 |
st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training and Test/Testing sheets.</div>', unsafe_allow_html=True)
|
| 626 |
st.stop()
|
| 627 |
|
| 628 |
-
tr0 =
|
| 629 |
-
te0 =
|
| 630 |
|
|
|
|
| 631 |
if not (ensure_cols(tr0, FEATURES+[TARGET]) and ensure_cols(te0, FEATURES+[TARGET])):
|
| 632 |
st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True)
|
| 633 |
st.stop()
|
|
@@ -639,8 +653,10 @@ if st.session_state.app_step == "dev":
|
|
| 639 |
y_te = pd.to_numeric(te0[TARGET], errors="coerce")
|
| 640 |
|
| 641 |
# Drop rows with NA in y
|
| 642 |
-
mask_tr = np.isfinite(y_tr)
|
| 643 |
-
|
|
|
|
|
|
|
| 644 |
|
| 645 |
pipe = build_pipeline()
|
| 646 |
pipe.fit(X_tr, y_tr)
|
|
@@ -689,7 +705,7 @@ if st.session_state.app_step == "dev":
|
|
| 689 |
|
| 690 |
if "Train" in st.session_state.results or "Test" in st.session_state.results:
|
| 691 |
tab1, tab2 = st.tabs(["Training", "Testing"])
|
| 692 |
-
if "Train" in st.session_state.results:
|
| 693 |
with tab1: _dev_block(st.session_state.results["Train"], st.session_state.results["m_train"])
|
| 694 |
if "Test" in st.session_state.results:
|
| 695 |
with tab2: _dev_block(st.session_state.results["Test"], st.session_state.results["m_test"])
|
|
@@ -706,25 +722,29 @@ if st.session_state.app_step == "validate":
|
|
| 706 |
if book:
|
| 707 |
df0 = next(iter(book.values()))
|
| 708 |
st.sidebar.caption(f"**Data loaded:** {up.name} • {df0.shape[0]} rows × {df0.shape[1]} cols")
|
|
|
|
|
|
|
| 709 |
if st.sidebar.button("Preview data", use_container_width=True, disabled=(up is None)):
|
| 710 |
-
st.session_state.
|
|
|
|
|
|
|
|
|
|
| 711 |
go_btn = st.sidebar.button("Predict & Validate", type="primary", use_container_width=True)
|
| 712 |
if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
|
| 713 |
if st.sidebar.button("Proceed to Prediction ▶", use_container_width=True): st.session_state.app_step="predict"; st.rerun()
|
| 714 |
|
| 715 |
-
sticky_header("Validate the Model", "Upload a dataset with the same **features** and **
|
| 716 |
-
|
| 717 |
-
# Preview on top
|
| 718 |
-
render_preview_top(None, upload_obj=up)
|
| 719 |
|
| 720 |
if go_btn and up is not None:
|
| 721 |
if st.session_state.fitted_model is None:
|
| 722 |
-
st.error("Please train the model first in Case Building.")
|
|
|
|
| 723 |
|
| 724 |
book = read_book_bytes(up.getvalue())
|
| 725 |
names = list(book.keys())
|
| 726 |
name = next((s for s in names if s.lower() in ("validation","validate","validation2","val","val2")), names[0])
|
| 727 |
-
df0 =
|
| 728 |
|
| 729 |
if not ensure_cols(df0, FEATURES+[TARGET]):
|
| 730 |
st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
|
|
@@ -793,22 +813,26 @@ if st.session_state.app_step == "predict":
|
|
| 793 |
if book:
|
| 794 |
df0 = next(iter(book.values()))
|
| 795 |
st.sidebar.caption(f"**Data loaded:** {up.name} • {df0.shape[0]} rows × {df0.shape[1]} cols")
|
|
|
|
|
|
|
| 796 |
if st.sidebar.button("Preview data", use_container_width=True, disabled=(up is None)):
|
| 797 |
-
st.session_state.
|
|
|
|
|
|
|
|
|
|
| 798 |
go_btn = st.sidebar.button("Predict", type="primary", use_container_width=True)
|
| 799 |
if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
|
| 800 |
|
| 801 |
sticky_header("Prediction", "Upload a dataset with the 5 feature columns (no actual column).")
|
| 802 |
-
|
| 803 |
-
# Preview on top
|
| 804 |
-
render_preview_top(None, upload_obj=up)
|
| 805 |
|
| 806 |
if go_btn and up is not None:
|
| 807 |
if st.session_state.fitted_model is None:
|
| 808 |
-
st.error("Please train the model first in Case Building.")
|
|
|
|
| 809 |
|
| 810 |
book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
|
| 811 |
-
df0 =
|
| 812 |
if not ensure_cols(df0, FEATURES):
|
| 813 |
st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
|
| 814 |
df = df0.copy()
|
|
|
|
| 28 |
APP_NAME = "ST_GeoMech_SHmax"
|
| 29 |
TAGLINE = "Real-Time Maximum Horizontal Stress Prediction"
|
| 30 |
|
| 31 |
+
# -------- Canonical names (match your files) --------
|
| 32 |
FEATURES = ["Q (gpm)", "SPP (psi)", "T (kft.lbf)", "WOB (klbf)", "ROP (ft/h)"]
|
| 33 |
+
TARGET = "MaxStress_Actual" # <-- matches your sheet (was 'MaxStress', causing the error)
|
| 34 |
PRED_COL = "SHmax_Pred"
|
| 35 |
ACTUAL_COL = TARGET
|
| 36 |
TRANSFORM = "none" # "none" | "log10" | "ln"
|
| 37 |
UNITS = "Psi"
|
| 38 |
|
| 39 |
+
# ---- Fixed ("best") model params baked into the code ----
|
| 40 |
BEST_PARAMS = dict(
|
| 41 |
n_estimators=100,
|
| 42 |
max_depth=22,
|
|
|
|
| 81 |
]
|
| 82 |
|
| 83 |
# =========================
|
| 84 |
+
# Password gate (same as shmin)
|
| 85 |
# =========================
|
| 86 |
def inline_logo(path="logo.png") -> str:
|
| 87 |
try:
|
|
|
|
| 96 |
required = st.secrets.get("APP_PASSWORD", "")
|
| 97 |
except Exception:
|
| 98 |
required = os.environ.get("APP_PASSWORD", "")
|
| 99 |
+
|
| 100 |
if not required:
|
| 101 |
+
return # no password configured
|
| 102 |
+
|
| 103 |
if st.session_state.get("auth_ok", False):
|
| 104 |
return
|
| 105 |
+
|
| 106 |
st.sidebar.markdown(f"""
|
| 107 |
<div class="centered-container">
|
| 108 |
<img src="{inline_logo('logo.png')}" class="brand-logo">
|
|
|
|
| 119 |
st.error("Incorrect key.")
|
| 120 |
st.stop()
|
| 121 |
|
| 122 |
+
add_password_gate()
|
| 123 |
|
| 124 |
# =========================
|
| 125 |
# Utilities
|
|
|
|
| 189 |
def _nice_tick0(xmin: float, step: float = 0.1) -> float:
|
| 190 |
return step * math.floor(xmin / step) if np.isfinite(xmin) else xmin
|
| 191 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
# ---------- Transform helpers ----------
|
| 193 |
def _inv_transform(x: np.ndarray, transform: str) -> np.ndarray:
|
| 194 |
t = (transform or "none").lower()
|
|
|
|
| 329 |
disabled=(data is None), key=f"download_{phase_key}")
|
| 330 |
|
| 331 |
# =========================
|
| 332 |
+
# Plots (no decimals on X)
|
| 333 |
# =========================
|
| 334 |
def cross_plot_static(actual, pred):
|
| 335 |
a = pd.Series(actual, dtype=float)
|
|
|
|
| 348 |
ax.set_xticks(ticks); ax.set_yticks(ticks)
|
| 349 |
ax.set_aspect("equal", adjustable="box")
|
| 350 |
|
| 351 |
+
fmt = FuncFormatter(lambda x, _: f"{x:.0f}") # no decimals
|
| 352 |
ax.xaxis.set_major_formatter(fmt); ax.yaxis.set_major_formatter(fmt)
|
| 353 |
|
| 354 |
ax.set_xlabel(f"Actual Max Stress ({UNITS})", fontweight="bold", fontsize=10, color="black")
|
|
|
|
| 404 |
bgcolor="rgba(255,255,255,0.75)", bordercolor="#ccc", borderwidth=1),
|
| 405 |
legend_title_text=""
|
| 406 |
)
|
|
|
|
| 407 |
fig.update_xaxes(
|
| 408 |
title_text=f"Max Stress ({UNITS})",
|
| 409 |
title_font=dict(size=20, family=BOLD_FONT, color="#000"),
|
| 410 |
tickfont=dict(size=15, family=BOLD_FONT, color="#000"),
|
| 411 |
side="top", range=[xmin, xmax],
|
| 412 |
+
ticks="outside",
|
| 413 |
+
tickformat=",.0f", # <— no decimals on ticks
|
| 414 |
+
tickmode="auto", tick0=tick0,
|
| 415 |
showline=True, linewidth=1.2, linecolor="#444", mirror=True,
|
| 416 |
showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True
|
| 417 |
)
|
|
|
|
| 431 |
if n == 0:
|
| 432 |
fig, ax = plt.subplots(figsize=(4, 2))
|
| 433 |
ax.text(0.5, 0.5, "No selected columns", ha="center", va="center")
|
| 434 |
+
ax.axis("off")
|
| 435 |
+
return fig
|
| 436 |
|
| 437 |
depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
|
| 438 |
if depth_col is not None:
|
|
|
|
| 448 |
col_colors = {col: cmap(i % cmap.N) for i, col in enumerate(cols)}
|
| 449 |
|
| 450 |
fig, axes = plt.subplots(1, n, figsize=(2.4 * n, 7.0), sharey=True, dpi=100)
|
| 451 |
+
if n == 1:
|
| 452 |
+
axes = [axes]
|
| 453 |
+
|
| 454 |
for i, (ax, col) in enumerate(zip(axes, cols)):
|
| 455 |
x = pd.to_numeric(df[col], errors="coerce")
|
| 456 |
ax.plot(x, idx, '-', lw=1.6, color=col_colors[col])
|
| 457 |
ax.set_xlabel(col); ax.xaxis.set_label_position('top'); ax.xaxis.tick_top()
|
| 458 |
ax.set_ylim(y_max, y_min) # reversed depth down
|
| 459 |
ax.grid(True, linestyle=":", alpha=0.3)
|
| 460 |
+
if i == 0:
|
| 461 |
+
ax.set_ylabel(y_label)
|
| 462 |
else:
|
| 463 |
ax.tick_params(labelleft=False); ax.set_ylabel("")
|
| 464 |
+
|
| 465 |
fig.tight_layout()
|
| 466 |
return fig
|
| 467 |
|
|
|
|
| 469 |
# Fixed training pipeline
|
| 470 |
# =========================
|
| 471 |
def build_pipeline() -> Pipeline:
|
| 472 |
+
"""
|
| 473 |
+
Fixed, optimized pipeline:
|
| 474 |
+
- Numeric imputation (median)
|
| 475 |
+
- RandomForestRegressor with tuned params (BEST_PARAMS)
|
| 476 |
+
Trees don't need scaling; robust to feature distributions.
|
| 477 |
+
"""
|
| 478 |
model = RandomForestRegressor(**BEST_PARAMS)
|
| 479 |
pipe = Pipeline(steps=[
|
| 480 |
("imputer", SimpleImputer(strategy="median")),
|
|
|
|
| 492 |
st.session_state.setdefault("dev_file_bytes",b"")
|
| 493 |
st.session_state.setdefault("dev_file_loaded",False)
|
| 494 |
st.session_state.setdefault("dev_preview",False)
|
| 495 |
+
st.session_state.setdefault("fitted_model", None) # cache trained pipeline
|
| 496 |
+
|
| 497 |
+
# NEW: persistent top-of-page preview panel state (same as shmin)
|
| 498 |
+
st.session_state.setdefault("show_preview_panel", False)
|
| 499 |
+
st.session_state.setdefault("preview_book", {}) # parsed Excel sheets to preview
|
| 500 |
|
| 501 |
# =========================
|
| 502 |
# Sidebar branding
|
|
|
|
| 526 |
unsafe_allow_html=True
|
| 527 |
)
|
| 528 |
|
| 529 |
+
# ---------- Top-of-page Preview Panel ----------
|
| 530 |
+
def render_preview_panel():
|
| 531 |
+
"""If enabled, draws a preview panel at the very top of the page."""
|
| 532 |
+
if not st.session_state.get("show_preview_panel"):
|
| 533 |
+
return
|
| 534 |
+
|
| 535 |
+
st.markdown("## 🔎 Data preview")
|
| 536 |
+
book = st.session_state.get("preview_book", {}) or {}
|
| 537 |
+
if not book:
|
| 538 |
+
st.info("No data loaded yet.")
|
| 539 |
+
col = st.columns(2)[1]
|
| 540 |
+
with col:
|
| 541 |
+
if st.button("Hide preview"):
|
| 542 |
+
st.session_state.show_preview_panel = False
|
| 543 |
+
st.session_state.preview_book = {}
|
| 544 |
+
st.rerun()
|
| 545 |
return
|
| 546 |
+
|
| 547 |
+
names = list(book.keys())
|
| 548 |
+
tabs = st.tabs(names + ["✖ Hide preview"])
|
| 549 |
+
for i, name in enumerate(names):
|
| 550 |
+
with tabs[i]:
|
| 551 |
+
df = book[name]
|
| 552 |
+
t1, t2 = st.tabs(["Tracks", "Summary"])
|
| 553 |
+
with t1:
|
| 554 |
+
st.pyplot(preview_tracks(df, FEATURES), use_container_width=True)
|
| 555 |
+
with t2:
|
| 556 |
+
feat_present = [c for c in FEATURES if c in df.columns]
|
| 557 |
+
if not feat_present:
|
| 558 |
+
st.info("No feature columns found to summarize.")
|
| 559 |
+
else:
|
| 560 |
+
tbl = (
|
| 561 |
+
df[feat_present]
|
| 562 |
+
.agg(['min','max','mean','std'])
|
| 563 |
+
.T.rename(columns={"Min":"Min","Max":"Max","mean":"Mean","std":"Std"})
|
| 564 |
+
.reset_index(names="Feature")
|
| 565 |
+
)
|
| 566 |
+
df_centered_rounded(tbl)
|
| 567 |
+
with tabs[-1]:
|
| 568 |
+
if st.button("Hide preview", use_container_width=True):
|
| 569 |
+
st.session_state.show_preview_panel = False
|
| 570 |
+
st.session_state.preview_book = {}
|
| 571 |
+
st.rerun()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 572 |
|
| 573 |
# =========================
|
| 574 |
# INTRO
|
|
|
|
| 578 |
st.markdown(f"This software is developed by *Smart Thinking AI-Solutions Team* to estimate **Maximum Horizontal Stress** ({UNITS}) from drilling/offset data.")
|
| 579 |
st.subheader("How It Works")
|
| 580 |
st.markdown(
|
| 581 |
+
"1) **Upload your data file** and click **Run Model** to fit the baked-in pipeline. \n"
|
| 582 |
"2) **Validate** on held-out wells (with actual). \n"
|
| 583 |
"3) **Predict** on wells without actual."
|
| 584 |
)
|
|
|
|
| 596 |
|
| 597 |
if st.session_state.app_step == "dev":
|
| 598 |
st.sidebar.header("Case Building")
|
| 599 |
+
up = st.sidebar.file_uploader("Upload Your Data File", type=["xlsx","xls"])
|
| 600 |
if up is not None:
|
| 601 |
st.session_state.dev_file_bytes = up.getvalue()
|
| 602 |
st.session_state.dev_file_name = up.name
|
|
|
|
| 610 |
df0 = next(iter(tmp.values()))
|
| 611 |
st.sidebar.caption(f"**Data loaded:** {st.session_state.dev_file_name} • {df0.shape[0]} rows × {df0.shape[1]} cols")
|
| 612 |
|
| 613 |
+
# PREVIEW button -> show preview panel at top
|
| 614 |
if st.sidebar.button("Preview data", use_container_width=True, disabled=not st.session_state.dev_file_loaded):
|
| 615 |
+
st.session_state.preview_book = read_book_bytes(st.session_state.dev_file_bytes) if st.session_state.dev_file_bytes else {}
|
| 616 |
+
st.session_state.show_preview_panel = True
|
| 617 |
+
st.rerun()
|
| 618 |
|
| 619 |
run = st.sidebar.button("Run Model", type="primary", use_container_width=True)
|
| 620 |
if st.sidebar.button("Proceed to Validation ▶", use_container_width=True): st.session_state.app_step="validate"; st.rerun()
|
| 621 |
if st.sidebar.button("Proceed to Prediction ▶", use_container_width=True): st.session_state.app_step="predict"; st.rerun()
|
| 622 |
|
| 623 |
+
if st.session_state.dev_file_loaded and st.session_state.show_preview_panel:
|
| 624 |
+
sticky_header("Case Building", "Previewed ✓ — now click **Run Model**.")
|
| 625 |
+
elif st.session_state.dev_file_loaded:
|
| 626 |
+
sticky_header("Case Building", "📄 **Preview uploaded data** using the sidebar button, then click **Run Model**.")
|
| 627 |
+
else:
|
| 628 |
+
sticky_header("Case Building", "**Upload your data to build a case, then run the model to review performance.**")
|
| 629 |
|
| 630 |
+
# Render the preview panel at the very top (above results)
|
| 631 |
+
render_preview_panel()
|
| 632 |
|
| 633 |
if run and st.session_state.dev_file_bytes:
|
| 634 |
book = read_book_bytes(st.session_state.dev_file_bytes)
|
|
|
|
| 638 |
st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training and Test/Testing sheets.</div>', unsafe_allow_html=True)
|
| 639 |
st.stop()
|
| 640 |
|
| 641 |
+
tr0 = book[sh_train].copy()
|
| 642 |
+
te0 = book[sh_test].copy()
|
| 643 |
|
| 644 |
+
# Ensure columns exist
|
| 645 |
if not (ensure_cols(tr0, FEATURES+[TARGET]) and ensure_cols(te0, FEATURES+[TARGET])):
|
| 646 |
st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True)
|
| 647 |
st.stop()
|
|
|
|
| 653 |
y_te = pd.to_numeric(te0[TARGET], errors="coerce")
|
| 654 |
|
| 655 |
# Drop rows with NA in y
|
| 656 |
+
mask_tr = np.isfinite(y_tr)
|
| 657 |
+
X_tr, y_tr = X_tr.loc[mask_tr], y_tr.loc[mask_tr]
|
| 658 |
+
mask_te = np.isfinite(y_te)
|
| 659 |
+
X_te, y_te = X_te.loc[mask_te], y_te.loc[mask_te]
|
| 660 |
|
| 661 |
pipe = build_pipeline()
|
| 662 |
pipe.fit(X_tr, y_tr)
|
|
|
|
| 705 |
|
| 706 |
if "Train" in st.session_state.results or "Test" in st.session_state.results:
|
| 707 |
tab1, tab2 = st.tabs(["Training", "Testing"])
|
| 708 |
+
if "Train" in st.session_state.results:
|
| 709 |
with tab1: _dev_block(st.session_state.results["Train"], st.session_state.results["m_train"])
|
| 710 |
if "Test" in st.session_state.results:
|
| 711 |
with tab2: _dev_block(st.session_state.results["Test"], st.session_state.results["m_test"])
|
|
|
|
| 722 |
if book:
|
| 723 |
df0 = next(iter(book.values()))
|
| 724 |
st.sidebar.caption(f"**Data loaded:** {up.name} • {df0.shape[0]} rows × {df0.shape[1]} cols")
|
| 725 |
+
|
| 726 |
+
# PREVIEW button -> show preview panel at top
|
| 727 |
if st.sidebar.button("Preview data", use_container_width=True, disabled=(up is None)):
|
| 728 |
+
st.session_state.preview_book = read_book_bytes(up.getvalue()) if up is not None else {}
|
| 729 |
+
st.session_state.show_preview_panel = True
|
| 730 |
+
st.rerun()
|
| 731 |
+
|
| 732 |
go_btn = st.sidebar.button("Predict & Validate", type="primary", use_container_width=True)
|
| 733 |
if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
|
| 734 |
if st.sidebar.button("Proceed to Prediction ▶", use_container_width=True): st.session_state.app_step="predict"; st.rerun()
|
| 735 |
|
| 736 |
+
sticky_header("Validate the Model", "Upload a dataset with the same **features** and **MaxStress_Actual** to evaluate performance.")
|
| 737 |
+
render_preview_panel() # top-of-page preview
|
|
|
|
|
|
|
| 738 |
|
| 739 |
if go_btn and up is not None:
|
| 740 |
if st.session_state.fitted_model is None:
|
| 741 |
+
st.error("Please train the model first in Case Building.")
|
| 742 |
+
st.stop()
|
| 743 |
|
| 744 |
book = read_book_bytes(up.getvalue())
|
| 745 |
names = list(book.keys())
|
| 746 |
name = next((s for s in names if s.lower() in ("validation","validate","validation2","val","val2")), names[0])
|
| 747 |
+
df0 = book[name].copy()
|
| 748 |
|
| 749 |
if not ensure_cols(df0, FEATURES+[TARGET]):
|
| 750 |
st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
|
|
|
|
| 813 |
if book:
|
| 814 |
df0 = next(iter(book.values()))
|
| 815 |
st.sidebar.caption(f"**Data loaded:** {up.name} • {df0.shape[0]} rows × {df0.shape[1]} cols")
|
| 816 |
+
|
| 817 |
+
# PREVIEW button -> show preview panel at top
|
| 818 |
if st.sidebar.button("Preview data", use_container_width=True, disabled=(up is None)):
|
| 819 |
+
st.session_state.preview_book = read_book_bytes(up.getvalue()) if up is not None else {}
|
| 820 |
+
st.session_state.show_preview_panel = True
|
| 821 |
+
st.rerun()
|
| 822 |
+
|
| 823 |
go_btn = st.sidebar.button("Predict", type="primary", use_container_width=True)
|
| 824 |
if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
|
| 825 |
|
| 826 |
sticky_header("Prediction", "Upload a dataset with the 5 feature columns (no actual column).")
|
| 827 |
+
render_preview_panel() # top-of-page preview
|
|
|
|
|
|
|
| 828 |
|
| 829 |
if go_btn and up is not None:
|
| 830 |
if st.session_state.fitted_model is None:
|
| 831 |
+
st.error("Please train the model first in Case Building.")
|
| 832 |
+
st.stop()
|
| 833 |
|
| 834 |
book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
|
| 835 |
+
df0 = book[name].copy()
|
| 836 |
if not ensure_cols(df0, FEATURES):
|
| 837 |
st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
|
| 838 |
df = df0.copy()
|