Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
# app.py — ST_Sonic_Ts (Shear Slowness Ts)
|
|
|
|
| 2 |
import io, json, os, base64, math
|
| 3 |
from pathlib import Path
|
| 4 |
import streamlit as st
|
|
@@ -22,9 +23,16 @@ from sklearn.metrics import mean_squared_error, mean_absolute_error
|
|
| 22 |
APP_NAME = "ST_Log_Sonic (Ts)"
|
| 23 |
TAGLINE = "Real-Time Shear Slowness (Ts) Prediction"
|
| 24 |
|
| 25 |
-
# Defaults (
|
| 26 |
-
FEATURES = [
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
PRED_COL = "Ts_Pred"
|
| 29 |
|
| 30 |
MODELS_DIR = Path("models")
|
|
@@ -32,7 +40,7 @@ DEFAULT_MODEL = MODELS_DIR / "ts_model.joblib"
|
|
| 32 |
MODEL_FALLBACKS = [MODELS_DIR / "model.joblib", MODELS_DIR / "model.pkl"]
|
| 33 |
COLORS = {"pred": "#1f77b4", "actual": "#f2b702", "ref": "#5a5a5a"}
|
| 34 |
|
| 35 |
-
#
|
| 36 |
STRICT_VERSION_CHECK = False
|
| 37 |
|
| 38 |
# ---- Plot sizing ----
|
|
@@ -50,14 +58,11 @@ st.set_page_config(page_title=APP_NAME, page_icon="logo.png", layout="wide")
|
|
| 50 |
st.markdown("""
|
| 51 |
<style>
|
| 52 |
.brand-logo { width: 200px; height: auto; object-fit: contain; }
|
| 53 |
-
.sidebar-header { display:flex; align-items:center; gap:12px; }
|
| 54 |
-
.sidebar-header .text h1 { font-size: 1.05rem; margin:0; line-height:1.1; }
|
| 55 |
-
.sidebar-header .text .tag { font-size: .85rem; color:#6b7280; margin:2px 0 0; }
|
| 56 |
.centered-container { display: flex; flex-direction: column; align-items: center; text-align: center; }
|
| 57 |
.st-message-box { background-color: #f0f2f6; color: #333; padding: 10px; border-radius: 10px; border: 1px solid #e6e9ef; }
|
| 58 |
.st-message-box.st-success { background-color: #d4edda; color: #155724; border-color: #c3e6cb; }
|
| 59 |
.st-message-box.st-warning { background-color: #fff3cd; color: #856404; border-color: #ffeeba; }
|
| 60 |
-
.st-message-box.st-error
|
| 61 |
.main .block-container { overflow: unset !important; }
|
| 62 |
div[data-testid="stVerticalBlock"] { overflow: unset !important; }
|
| 63 |
div[data-testid="stExpander"] > details > summary {
|
|
@@ -140,47 +145,49 @@ def parse_excel(data_bytes: bytes):
|
|
| 140 |
xl = pd.ExcelFile(bio)
|
| 141 |
return {sh: xl.parse(sh) for sh in xl.sheet_names}
|
| 142 |
|
| 143 |
-
def read_book_bytes(b: bytes):
|
| 144 |
return parse_excel(b) if b else {}
|
| 145 |
|
| 146 |
# ---- Canonical feature aliasing ------------------------------------------
|
| 147 |
def _build_alias_map(canonical_features: list[str], target_name: str) -> dict:
|
| 148 |
"""
|
| 149 |
-
|
| 150 |
-
|
| 151 |
"""
|
| 152 |
-
def pick(expected_list,
|
| 153 |
-
|
| 154 |
-
for v in family_variants:
|
| 155 |
if v in expected_list:
|
| 156 |
return v
|
| 157 |
-
return
|
| 158 |
|
| 159 |
-
can_WOB = pick(canonical_features, ["WOB, klbf","WOB(klbf)","WOB
|
| 160 |
-
can_TORQUE = pick(canonical_features, ["Torque(kft.lbf)","TORQUE(kft.lbf)"])
|
| 161 |
-
can_SPP = pick(canonical_features, ["SPP(psi)"])
|
| 162 |
-
can_RPM = pick(canonical_features, ["RPM(1/min)","RPM
|
| 163 |
-
can_ROP = pick(canonical_features, ["ROP(ft/h)","ROP
|
| 164 |
-
can_FR = pick(canonical_features, ["Flow Rate
|
|
|
|
| 165 |
|
| 166 |
alias = {
|
| 167 |
-
#
|
| 168 |
-
"WOB
|
| 169 |
-
|
| 170 |
-
"
|
| 171 |
-
|
| 172 |
-
"
|
| 173 |
-
|
| 174 |
-
"
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
#
|
| 180 |
-
"
|
| 181 |
-
|
| 182 |
-
"Ts
|
| 183 |
-
"
|
|
|
|
|
|
|
| 184 |
}
|
| 185 |
return alias
|
| 186 |
|
|
@@ -188,7 +195,6 @@ def _normalize_columns(df: pd.DataFrame, canonical_features: list[str], target_n
|
|
| 188 |
out = df.copy()
|
| 189 |
out.columns = [str(c).strip().replace(" ,", ",").replace(", ", ", ").replace(" ", " ") for c in out.columns]
|
| 190 |
alias = _build_alias_map(canonical_features, target_name)
|
| 191 |
-
# only rename keys that actually exist
|
| 192 |
actual = {k: v for k, v in alias.items() if k in out.columns and k != v}
|
| 193 |
return out.rename(columns=actual)
|
| 194 |
|
|
@@ -219,6 +225,17 @@ def df_centered_rounded(df: pd.DataFrame, hide_index=True):
|
|
| 219 |
)
|
| 220 |
st.dataframe(styler, use_container_width=True, hide_index=hide_index)
|
| 221 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
# === Excel export helpers =================================================
|
| 223 |
def _excel_engine() -> str:
|
| 224 |
try:
|
|
@@ -497,27 +514,54 @@ def track_plot(df, include_actual=True):
|
|
| 497 |
|
| 498 |
# ---------- Preview (matplotlib) ----------
|
| 499 |
def preview_tracks(df: pd.DataFrame, cols: list[str]):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 500 |
cols = [c for c in cols if c in df.columns]
|
| 501 |
n = len(cols)
|
| 502 |
if n == 0:
|
| 503 |
fig, ax = plt.subplots(figsize=(4, 2))
|
| 504 |
-
ax.text(0.5,0.5,"No selected columns",ha="center",va="center")
|
|
|
|
| 505 |
return fig
|
| 506 |
-
|
| 507 |
-
|
| 508 |
depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
|
| 509 |
if depth_col is not None:
|
| 510 |
-
idx = pd.to_numeric(df[depth_col], errors="coerce")
|
|
|
|
| 511 |
else:
|
| 512 |
-
idx = pd.Series(np.arange(1, len(df) + 1))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 513 |
for i, (ax, col) in enumerate(zip(axes, cols)):
|
| 514 |
-
|
| 515 |
-
ax.
|
| 516 |
-
ax.
|
|
|
|
|
|
|
|
|
|
| 517 |
ax.grid(True, linestyle=":", alpha=0.3)
|
| 518 |
-
|
|
|
|
|
|
|
| 519 |
else:
|
| 520 |
-
ax.tick_params(labelleft=False)
|
|
|
|
|
|
|
| 521 |
fig.tight_layout()
|
| 522 |
return fig
|
| 523 |
|
|
@@ -551,7 +595,7 @@ except Exception as e:
|
|
| 551 |
st.error(f"Failed to load model: {e}")
|
| 552 |
st.stop()
|
| 553 |
|
| 554 |
-
#
|
| 555 |
meta = {}
|
| 556 |
meta_candidates = [MODELS_DIR / "ts_meta.json", MODELS_DIR / "meta.json", MODELS_DIR / "ym_meta.json"]
|
| 557 |
meta_path = next((p for p in meta_candidates if p.exists()), None)
|
|
@@ -564,7 +608,7 @@ if meta_path:
|
|
| 564 |
except Exception as e:
|
| 565 |
st.warning(f"Could not parse meta file ({meta_path.name}): {e}")
|
| 566 |
|
| 567 |
-
# Optional: version banner
|
| 568 |
if STRICT_VERSION_CHECK and meta.get("versions"):
|
| 569 |
import numpy as _np, sklearn as _skl
|
| 570 |
mv = meta["versions"]; msg=[]
|
|
@@ -671,7 +715,6 @@ if st.session_state.app_step == "dev":
|
|
| 671 |
st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training/training2 and Test/Testing/testing2 sheets.</div>', unsafe_allow_html=True)
|
| 672 |
st.stop()
|
| 673 |
|
| 674 |
-
# Use meta FEATURES as canonical when normalizing
|
| 675 |
tr = _normalize_columns(book[sh_train].copy(), FEATURES, TARGET)
|
| 676 |
te = _normalize_columns(book[sh_test].copy(), FEATURES, TARGET)
|
| 677 |
|
|
@@ -679,9 +722,9 @@ if st.session_state.app_step == "dev":
|
|
| 679 |
st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True)
|
| 680 |
st.stop()
|
| 681 |
|
| 682 |
-
# Predict with exactly the
|
| 683 |
-
tr[PRED_COL] = model.predict(tr
|
| 684 |
-
te[PRED_COL] = model.predict(te
|
| 685 |
|
| 686 |
st.session_state.results["Train"]=tr; st.session_state.results["Test"]=te
|
| 687 |
st.session_state.results["m_train"]={
|
|
@@ -746,9 +789,9 @@ if st.session_state.app_step == "validate":
|
|
| 746 |
book = read_book_bytes(up.getvalue())
|
| 747 |
name = find_sheet(book, ["Validation","Validate","validation2","Val","val"]) or list(book.keys())[0]
|
| 748 |
df = _normalize_columns(book[name].copy(), FEATURES, TARGET)
|
| 749 |
-
if not ensure_cols(df, FEATURES+[TARGET]):
|
| 750 |
st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
|
| 751 |
-
df[PRED_COL] = model.predict(df
|
| 752 |
st.session_state.results["Validate"]=df
|
| 753 |
|
| 754 |
ranges = st.session_state.train_ranges; oor_pct = 0.0; tbl=None
|
|
@@ -759,7 +802,9 @@ if st.session_state.app_step == "validate":
|
|
| 759 |
tbl = df.loc[any_viol, FEATURES].copy()
|
| 760 |
for c in FEATURES:
|
| 761 |
if pd.api.types.is_numeric_dtype(tbl[c]): tbl[c] = tbl[c].round(2)
|
| 762 |
-
tbl["Violations"] = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in FEATURES}).loc[any_viol].apply(
|
|
|
|
|
|
|
| 763 |
st.session_state.results["m_val"]={
|
| 764 |
"R": pearson_r(df[TARGET], df[PRED_COL]),
|
| 765 |
"RMSE": rmse(df[TARGET], df[PRED_COL]),
|
|
@@ -818,9 +863,9 @@ if st.session_state.app_step == "predict":
|
|
| 818 |
if go_btn and up is not None:
|
| 819 |
book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
|
| 820 |
df = _normalize_columns(book[name].copy(), FEATURES, TARGET)
|
| 821 |
-
if not ensure_cols(df, FEATURES):
|
| 822 |
st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
|
| 823 |
-
df[PRED_COL] = model.predict(df
|
| 824 |
st.session_state.results["PredictOnly"]=df
|
| 825 |
|
| 826 |
ranges = st.session_state.train_ranges; oor_pct = 0.0
|
|
|
|
| 1 |
# app.py — ST_Sonic_Ts (Shear Slowness Ts)
|
| 2 |
+
|
| 3 |
import io, json, os, base64, math
|
| 4 |
from pathlib import Path
|
| 5 |
import streamlit as st
|
|
|
|
| 23 |
APP_NAME = "ST_Log_Sonic (Ts)"
|
| 24 |
TAGLINE = "Real-Time Shear Slowness (Ts) Prediction"
|
| 25 |
|
| 26 |
+
# Defaults (overridden by ts_meta.json if present)
|
| 27 |
+
FEATURES = [
|
| 28 |
+
"WOB (klbf)",
|
| 29 |
+
"Torque (kft.lbf)",
|
| 30 |
+
"SPP (psi)",
|
| 31 |
+
"RPM (1/min)",
|
| 32 |
+
"ROP (ft/h)",
|
| 33 |
+
"Flow Rate (gpm)",
|
| 34 |
+
]
|
| 35 |
+
TARGET = "Ts (us/ft_Actual)"
|
| 36 |
PRED_COL = "Ts_Pred"
|
| 37 |
|
| 38 |
MODELS_DIR = Path("models")
|
|
|
|
| 40 |
MODEL_FALLBACKS = [MODELS_DIR / "model.joblib", MODELS_DIR / "model.pkl"]
|
| 41 |
COLORS = {"pred": "#1f77b4", "actual": "#f2b702", "ref": "#5a5a5a"}
|
| 42 |
|
| 43 |
+
# Optional env banner from meta
|
| 44 |
STRICT_VERSION_CHECK = False
|
| 45 |
|
| 46 |
# ---- Plot sizing ----
|
|
|
|
| 58 |
st.markdown("""
|
| 59 |
<style>
|
| 60 |
.brand-logo { width: 200px; height: auto; object-fit: contain; }
|
|
|
|
|
|
|
|
|
|
| 61 |
.centered-container { display: flex; flex-direction: column; align-items: center; text-align: center; }
|
| 62 |
.st-message-box { background-color: #f0f2f6; color: #333; padding: 10px; border-radius: 10px; border: 1px solid #e6e9ef; }
|
| 63 |
.st-message-box.st-success { background-color: #d4edda; color: #155724; border-color: #c3e6cb; }
|
| 64 |
.st-message-box.st-warning { background-color: #fff3cd; color: #856404; border-color: #ffeeba; }
|
| 65 |
+
.st-message-box.st-error { background-color: #f8d7da; color: #721c24; border-color: #f5c6cb; }
|
| 66 |
.main .block-container { overflow: unset !important; }
|
| 67 |
div[data-testid="stVerticalBlock"] { overflow: unset !important; }
|
| 68 |
div[data-testid="stExpander"] > details > summary {
|
|
|
|
| 145 |
xl = pd.ExcelFile(bio)
|
| 146 |
return {sh: xl.parse(sh) for sh in xl.sheet_names}
|
| 147 |
|
| 148 |
+
def read_book_bytes(b: bytes):
|
| 149 |
return parse_excel(b) if b else {}
|
| 150 |
|
| 151 |
# ---- Canonical feature aliasing ------------------------------------------
|
| 152 |
def _build_alias_map(canonical_features: list[str], target_name: str) -> dict:
|
| 153 |
"""
|
| 154 |
+
Map common header variants -> the *canonical* names in canonical_features.
|
| 155 |
+
Whatever appears in canonical_features (from ts_meta.json) wins.
|
| 156 |
"""
|
| 157 |
+
def pick(expected_list, variants):
|
| 158 |
+
for v in variants:
|
|
|
|
| 159 |
if v in expected_list:
|
| 160 |
return v
|
| 161 |
+
return variants[0]
|
| 162 |
|
| 163 |
+
can_WOB = pick(canonical_features, ["WOB (klbf)", "WOB, klbf", "WOB(klbf)", "WOB( klbf)"])
|
| 164 |
+
can_TORQUE = pick(canonical_features, ["Torque (kft.lbf)", "Torque(kft.lbf)", "TORQUE(kft.lbf)"])
|
| 165 |
+
can_SPP = pick(canonical_features, ["SPP (psi)", "SPP(psi)"])
|
| 166 |
+
can_RPM = pick(canonical_features, ["RPM (1/min)", "RPM(1/min)"])
|
| 167 |
+
can_ROP = pick(canonical_features, ["ROP (ft/h)", "ROP(ft/h)"])
|
| 168 |
+
can_FR = pick(canonical_features, ["Flow Rate (gpm)", "Flow Rate, gpm", "Flow Rate,gpm", "Flow Rate , gpm", "Fow Rate, gpm", "Fow Rate, gpm "])
|
| 169 |
+
can_DEPTH = "Depth (ft)"
|
| 170 |
|
| 171 |
alias = {
|
| 172 |
+
# Features
|
| 173 |
+
"WOB (klbf)": can_WOB, "WOB, klbf": can_WOB, "WOB(klbf)": can_WOB, "WOB( klbf)": can_WOB,
|
| 174 |
+
"Torque (kft.lbf)": can_TORQUE, "Torque(kft.lbf)": can_TORQUE, "TORQUE(kft.lbf)": can_TORQUE,
|
| 175 |
+
"SPP (psi)": can_SPP, "SPP(psi)": can_SPP,
|
| 176 |
+
"RPM (1/min)": can_RPM, "RPM(1/min)": can_RPM,
|
| 177 |
+
"ROP (ft/h)": can_ROP, "ROP(ft/h)": can_ROP,
|
| 178 |
+
"Flow Rate (gpm)": can_FR, "Flow Rate, gpm": can_FR, "Flow Rate,gpm": can_FR, "Flow Rate , gpm": can_FR,
|
| 179 |
+
"Fow Rate, gpm": can_FR, "Fow Rate, gpm ": can_FR,
|
| 180 |
+
|
| 181 |
+
# Depth (plot only)
|
| 182 |
+
"Depth (ft)": can_DEPTH, "Depth, ft": can_DEPTH, "Depth(ft)": can_DEPTH, "DEPTH, ft": can_DEPTH,
|
| 183 |
+
|
| 184 |
+
# Target family
|
| 185 |
+
"Ts (us/ft_Actual)": target_name,
|
| 186 |
+
"Ts,us/ft_Actual": target_name,
|
| 187 |
+
"Ts, us/ft_Actual": target_name,
|
| 188 |
+
"Ts": target_name,
|
| 189 |
+
"TS_Actual": target_name,
|
| 190 |
+
"Ts (us/ft)_Actual": target_name,
|
| 191 |
}
|
| 192 |
return alias
|
| 193 |
|
|
|
|
| 195 |
out = df.copy()
|
| 196 |
out.columns = [str(c).strip().replace(" ,", ",").replace(", ", ", ").replace(" ", " ") for c in out.columns]
|
| 197 |
alias = _build_alias_map(canonical_features, target_name)
|
|
|
|
| 198 |
actual = {k: v for k, v in alias.items() if k in out.columns and k != v}
|
| 199 |
return out.rename(columns=actual)
|
| 200 |
|
|
|
|
| 225 |
)
|
| 226 |
st.dataframe(styler, use_container_width=True, hide_index=hide_index)
|
| 227 |
|
| 228 |
+
# ---------- Build X exactly as trained ----------
|
| 229 |
+
def _make_X(df: pd.DataFrame, features: list[str]) -> pd.DataFrame:
|
| 230 |
+
"""
|
| 231 |
+
Reindex columns to the exact training feature order and coerce to numeric.
|
| 232 |
+
Prevents scikit-learn 'feature names should match' errors.
|
| 233 |
+
"""
|
| 234 |
+
X = df.reindex(columns=features, copy=False)
|
| 235 |
+
for c in X.columns:
|
| 236 |
+
X[c] = pd.to_numeric(X[c], errors="coerce")
|
| 237 |
+
return X
|
| 238 |
+
|
| 239 |
# === Excel export helpers =================================================
|
| 240 |
def _excel_engine() -> str:
|
| 241 |
try:
|
|
|
|
| 514 |
|
| 515 |
# ---------- Preview (matplotlib) ----------
|
| 516 |
def preview_tracks(df: pd.DataFrame, cols: list[str]):
|
| 517 |
+
"""
|
| 518 |
+
Quick-look multi-track preview:
|
| 519 |
+
- one subplot per selected column
|
| 520 |
+
- distinct stable colors per column
|
| 521 |
+
- shared & reversed Y-axis (Depth downwards)
|
| 522 |
+
"""
|
| 523 |
cols = [c for c in cols if c in df.columns]
|
| 524 |
n = len(cols)
|
| 525 |
if n == 0:
|
| 526 |
fig, ax = plt.subplots(figsize=(4, 2))
|
| 527 |
+
ax.text(0.5, 0.5, "No selected columns", ha="center", va="center")
|
| 528 |
+
ax.axis("off")
|
| 529 |
return fig
|
| 530 |
+
|
| 531 |
+
# Depth or fallback to index
|
| 532 |
depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
|
| 533 |
if depth_col is not None:
|
| 534 |
+
idx = pd.to_numeric(df[depth_col], errors="coerce")
|
| 535 |
+
y_label = depth_col
|
| 536 |
else:
|
| 537 |
+
idx = pd.Series(np.arange(1, len(df) + 1))
|
| 538 |
+
y_label = "Point Index"
|
| 539 |
+
|
| 540 |
+
y_min, y_max = float(idx.min()), float(idx.max())
|
| 541 |
+
|
| 542 |
+
# Stable qualitative palette
|
| 543 |
+
cmap = plt.get_cmap("tab20")
|
| 544 |
+
col_colors = {col: cmap(i % cmap.N) for i, col in enumerate(cols)}
|
| 545 |
+
|
| 546 |
+
fig, axes = plt.subplots(1, n, figsize=(2.3 * n, 7.0), sharey=True, dpi=100)
|
| 547 |
+
if n == 1:
|
| 548 |
+
axes = [axes]
|
| 549 |
+
|
| 550 |
for i, (ax, col) in enumerate(zip(axes, cols)):
|
| 551 |
+
x = pd.to_numeric(df[col], errors="coerce")
|
| 552 |
+
ax.plot(x, idx, '-', lw=1.8, color=col_colors[col])
|
| 553 |
+
ax.set_xlabel(col)
|
| 554 |
+
ax.xaxis.set_label_position('top')
|
| 555 |
+
ax.xaxis.tick_top()
|
| 556 |
+
ax.set_ylim(y_max, y_min) # reversed Y (Depth down)
|
| 557 |
ax.grid(True, linestyle=":", alpha=0.3)
|
| 558 |
+
|
| 559 |
+
if i == 0:
|
| 560 |
+
ax.set_ylabel(y_label)
|
| 561 |
else:
|
| 562 |
+
ax.tick_params(labelleft=False)
|
| 563 |
+
ax.set_ylabel("")
|
| 564 |
+
|
| 565 |
fig.tight_layout()
|
| 566 |
return fig
|
| 567 |
|
|
|
|
| 595 |
st.error(f"Failed to load model: {e}")
|
| 596 |
st.stop()
|
| 597 |
|
| 598 |
+
# Load meta (prefer Ts-specific)
|
| 599 |
meta = {}
|
| 600 |
meta_candidates = [MODELS_DIR / "ts_meta.json", MODELS_DIR / "meta.json", MODELS_DIR / "ym_meta.json"]
|
| 601 |
meta_path = next((p for p in meta_candidates if p.exists()), None)
|
|
|
|
| 608 |
except Exception as e:
|
| 609 |
st.warning(f"Could not parse meta file ({meta_path.name}): {e}")
|
| 610 |
|
| 611 |
+
# Optional: version banner
|
| 612 |
if STRICT_VERSION_CHECK and meta.get("versions"):
|
| 613 |
import numpy as _np, sklearn as _skl
|
| 614 |
mv = meta["versions"]; msg=[]
|
|
|
|
| 715 |
st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training/training2 and Test/Testing/testing2 sheets.</div>', unsafe_allow_html=True)
|
| 716 |
st.stop()
|
| 717 |
|
|
|
|
| 718 |
tr = _normalize_columns(book[sh_train].copy(), FEATURES, TARGET)
|
| 719 |
te = _normalize_columns(book[sh_test].copy(), FEATURES, TARGET)
|
| 720 |
|
|
|
|
| 722 |
st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True)
|
| 723 |
st.stop()
|
| 724 |
|
| 725 |
+
# Predict with exactly the training feature order
|
| 726 |
+
tr[PRED_COL] = model.predict(_make_X(tr, FEATURES))
|
| 727 |
+
te[PRED_COL] = model.predict(_make_X(te, FEATURES))
|
| 728 |
|
| 729 |
st.session_state.results["Train"]=tr; st.session_state.results["Test"]=te
|
| 730 |
st.session_state.results["m_train"]={
|
|
|
|
| 789 |
book = read_book_bytes(up.getvalue())
|
| 790 |
name = find_sheet(book, ["Validation","Validate","validation2","Val","val"]) or list(book.keys())[0]
|
| 791 |
df = _normalize_columns(book[name].copy(), FEATURES, TARGET)
|
| 792 |
+
if not ensure_cols(df, FEATURES+[TARGET]):
|
| 793 |
st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
|
| 794 |
+
df[PRED_COL] = model.predict(_make_X(df, FEATURES))
|
| 795 |
st.session_state.results["Validate"]=df
|
| 796 |
|
| 797 |
ranges = st.session_state.train_ranges; oor_pct = 0.0; tbl=None
|
|
|
|
| 802 |
tbl = df.loc[any_viol, FEATURES].copy()
|
| 803 |
for c in FEATURES:
|
| 804 |
if pd.api.types.is_numeric_dtype(tbl[c]): tbl[c] = tbl[c].round(2)
|
| 805 |
+
tbl["Violations"] = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in FEATURES}).loc[any_viol].apply(
|
| 806 |
+
lambda r:", ".join([c for c,v in r.items() if v]), axis=1
|
| 807 |
+
)
|
| 808 |
st.session_state.results["m_val"]={
|
| 809 |
"R": pearson_r(df[TARGET], df[PRED_COL]),
|
| 810 |
"RMSE": rmse(df[TARGET], df[PRED_COL]),
|
|
|
|
| 863 |
if go_btn and up is not None:
|
| 864 |
book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
|
| 865 |
df = _normalize_columns(book[name].copy(), FEATURES, TARGET)
|
| 866 |
+
if not ensure_cols(df, FEATURES):
|
| 867 |
st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
|
| 868 |
+
df[PRED_COL] = model.predict(_make_X(df, FEATURES))
|
| 869 |
st.session_state.results["PredictOnly"]=df
|
| 870 |
|
| 871 |
ranges = st.session_state.train_ranges; oor_pct = 0.0
|