Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -175,7 +175,6 @@ def _abbr(name: str) -> str:
|
|
| 175 |
"KPercent": "K", "K%": "K", "Potassium": "K",
|
| 176 |
"AHT_90": "AHT90", "AHT90AverageHydrocarbonTool90°Phase": "AHT90",
|
| 177 |
}
|
| 178 |
-
# preserve core mnemonics
|
| 179 |
if n.upper() in {"GR", "DT", "RHOB"}: return n.upper() if n.upper() != "DT" else "DT"
|
| 180 |
if n.upper() == "AHT90": return "AHT90"
|
| 181 |
if n.upper() == "TNPH": return "TNPH"
|
|
@@ -188,12 +187,12 @@ def normalize_to_abbr(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 188 |
newcols = []
|
| 189 |
for c in out.columns:
|
| 190 |
ac = _abbr(c)
|
| 191 |
-
if ac in FEATURES:
|
| 192 |
newcols.append(ac)
|
| 193 |
elif str(c).strip().lower() in {"toc", "toc (%)", "totalorganiccarbon"}:
|
| 194 |
newcols.append(TARGET)
|
| 195 |
elif "depth" in str(c).lower():
|
| 196 |
-
newcols.append("Depth")
|
| 197 |
else:
|
| 198 |
newcols.append(str(c))
|
| 199 |
out.columns = newcols
|
|
@@ -201,43 +200,30 @@ def normalize_to_abbr(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 201 |
|
| 202 |
# ---- Model feature order + X builder (returns NumPy to bypass name checks) ----
|
| 203 |
def _training_feature_order(model, fallback_features: list[str]) -> list[str]:
|
| 204 |
-
"""Return the model's training feature order if available, else fallback."""
|
| 205 |
names = list(getattr(model, "feature_names_in_", []))
|
| 206 |
if names:
|
| 207 |
return [str(n) for n in names]
|
| 208 |
return list(fallback_features)
|
| 209 |
|
| 210 |
def _make_X(df_raw: pd.DataFrame, model, fallback_features: list[str]) -> np.ndarray:
|
| 211 |
-
"""
|
| 212 |
-
Returns a NumPy array with columns ordered exactly as in model training.
|
| 213 |
-
Using np.ndarray bypasses sklearn's feature-name validation.
|
| 214 |
-
"""
|
| 215 |
df_abbr = normalize_to_abbr(df_raw)
|
| 216 |
-
# mapping abbr -> actual column present
|
| 217 |
colmap = { _abbr(c): c for c in df_abbr.columns }
|
| 218 |
-
|
| 219 |
train_names = _training_feature_order(model, fallback_features)
|
| 220 |
-
order_cols = []
|
| 221 |
-
missing = []
|
| 222 |
for nm in train_names:
|
| 223 |
ab = _abbr(nm)
|
| 224 |
if ab in colmap:
|
| 225 |
order_cols.append(colmap[ab])
|
| 226 |
else:
|
| 227 |
missing.append(nm)
|
| 228 |
-
|
| 229 |
if missing:
|
| 230 |
st.markdown(
|
| 231 |
'<div class="st-message-box st-error">Missing required columns for prediction (by model training): '
|
| 232 |
-
+ ", ".join(missing) + '</div>',
|
| 233 |
-
unsafe_allow_html=True
|
| 234 |
)
|
| 235 |
st.stop()
|
| 236 |
-
|
| 237 |
X_df = df_abbr[order_cols].apply(pd.to_numeric, errors="coerce")
|
| 238 |
-
|
| 239 |
-
# Safety: ensure plain ndarray (no pandas attrs)
|
| 240 |
-
return np.asarray(X_np, dtype=float)
|
| 241 |
|
| 242 |
def ensure_required_features(df: pd.DataFrame, model, fallback_features: list[str]) -> bool:
|
| 243 |
df_abbr = normalize_to_abbr(df)
|
|
@@ -250,16 +236,10 @@ def ensure_required_features(df: pd.DataFrame, model, fallback_features: list[st
|
|
| 250 |
return True
|
| 251 |
|
| 252 |
def safe_predict(model, df_raw: pd.DataFrame, fallback_features: list[str]) -> np.ndarray:
|
| 253 |
-
"""
|
| 254 |
-
Centralized, name-check-proof prediction:
|
| 255 |
-
- Builds X in training order
|
| 256 |
-
- Converts to NumPy (bypasses sklearn feature-name validation)
|
| 257 |
-
"""
|
| 258 |
X = _make_X(df_raw, model, fallback_features)
|
| 259 |
try:
|
| 260 |
return model.predict(X)
|
| 261 |
-
except Exception
|
| 262 |
-
# As a last resort, try basic float casting
|
| 263 |
return model.predict(np.asarray(X, dtype=float))
|
| 264 |
|
| 265 |
def find_sheet(book, names):
|
|
@@ -282,40 +262,44 @@ def df_centered_rounded(df: pd.DataFrame, hide_index=True):
|
|
| 282 |
)
|
| 283 |
st.dataframe(styler, use_container_width=True, hide_index=hide_index)
|
| 284 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
# =========================
|
| 286 |
# Cross plot (Matplotlib)
|
| 287 |
# =========================
|
| 288 |
def cross_plot_static(actual, pred):
|
| 289 |
a = pd.Series(actual, dtype=float)
|
| 290 |
p = pd.Series(pred, dtype=float)
|
| 291 |
-
|
| 292 |
lo = float(min(a.min(), p.min()))
|
| 293 |
hi = float(max(a.max(), p.max()))
|
| 294 |
pad = 0.03 * (hi - lo if hi > lo else 1.0)
|
| 295 |
lo2, hi2 = lo - pad, hi + pad
|
| 296 |
ticks = np.linspace(lo2, hi2, 5)
|
| 297 |
-
|
| 298 |
dpi = 110
|
| 299 |
fig, ax = plt.subplots(figsize=(CROSS_W / dpi, CROSS_H / dpi), dpi=dpi, constrained_layout=False)
|
| 300 |
-
|
| 301 |
ax.scatter(a, p, s=14, c=COLORS["pred"], alpha=0.9, linewidths=0)
|
| 302 |
ax.plot([lo2, hi2], [lo2, hi2], linestyle="--", linewidth=1.2, color=COLORS["ref"])
|
| 303 |
-
|
| 304 |
ax.set_xlim(lo2, hi2); ax.set_ylim(lo2, hi2)
|
| 305 |
ax.set_xticks(ticks); ax.set_yticks(ticks)
|
| 306 |
ax.set_aspect("equal", adjustable="box")
|
| 307 |
-
|
| 308 |
fmt = FuncFormatter(lambda x, _: f"{x:,.1f}")
|
| 309 |
ax.xaxis.set_major_formatter(fmt); ax.yaxis.set_major_formatter(fmt)
|
| 310 |
-
|
| 311 |
ax.set_xlabel("Actual TOC (%)", fontweight="bold", fontsize=10, color="black")
|
| 312 |
ax.set_ylabel("Predicted TOC (%)", fontweight="bold", fontsize=10, color="black")
|
| 313 |
ax.tick_params(labelsize=6, colors="black")
|
| 314 |
-
|
| 315 |
ax.grid(True, linestyle=":", alpha=0.3)
|
| 316 |
for spine in ax.spines.values():
|
| 317 |
spine.set_linewidth(1.1); spine.set_color("#444")
|
| 318 |
-
|
| 319 |
fig.subplots_adjust(left=0.16, bottom=0.16, right=0.98, top=0.98)
|
| 320 |
return fig
|
| 321 |
|
|
@@ -327,11 +311,10 @@ def track_plot(df, include_actual=True):
|
|
| 327 |
depth_col = next((c for c in df.columns if 'depth' in str(c).lower() or c == "Depth"), None)
|
| 328 |
if depth_col is not None:
|
| 329 |
y = pd.Series(df[depth_col]).astype(float); ylab = depth_col
|
| 330 |
-
y_range = [float(y.max()), float(y.min())]
|
| 331 |
else:
|
| 332 |
y = pd.Series(np.arange(1, len(df) + 1)); ylab = "Point Index"
|
| 333 |
y_range = [float(y.max()), float(y.min())]
|
| 334 |
-
|
| 335 |
x_series = pd.Series(df.get(PRED_COL, pd.Series(dtype=float))).astype(float)
|
| 336 |
if include_actual and TARGET in df.columns:
|
| 337 |
x_series = pd.concat([x_series, pd.Series(df[TARGET]).astype(float)], ignore_index=True)
|
|
@@ -339,7 +322,6 @@ def track_plot(df, include_actual=True):
|
|
| 339 |
x_pad = 0.03 * (x_hi - x_lo if x_hi > x_lo else 1.0)
|
| 340 |
xmin, xmax = x_lo - x_pad, x_hi + x_pad
|
| 341 |
tick0 = _nice_tick0(xmin, step=0.5)
|
| 342 |
-
|
| 343 |
fig = go.Figure()
|
| 344 |
if PRED_COL in df.columns:
|
| 345 |
fig.add_trace(go.Scatter(
|
|
@@ -355,7 +337,6 @@ def track_plot(df, include_actual=True):
|
|
| 355 |
name=f"{TARGET} (actual)",
|
| 356 |
hovertemplate=f"{TARGET}: "+"%{x:.2f}<br>"+ylab+": %{y}<extra></extra>"
|
| 357 |
))
|
| 358 |
-
|
| 359 |
fig.update_layout(
|
| 360 |
height=TRACK_H, width=TRACK_W, autosize=False,
|
| 361 |
paper_bgcolor="#fff", plot_bgcolor="#fff",
|
|
@@ -393,7 +374,6 @@ def preview_tracks(df: pd.DataFrame, cols: list[str]):
|
|
| 393 |
fig, ax = plt.subplots(figsize=(4, 2))
|
| 394 |
ax.text(0.5, 0.5, "No selected columns", ha="center", va="center"); ax.axis("off")
|
| 395 |
return fig
|
| 396 |
-
|
| 397 |
depth_col = next((c for c in df.columns if 'depth' in str(c).lower() or c == "Depth"), None)
|
| 398 |
if depth_col is not None:
|
| 399 |
idx = pd.to_numeric(df[depth_col], errors="coerce")
|
|
@@ -401,20 +381,17 @@ def preview_tracks(df: pd.DataFrame, cols: list[str]):
|
|
| 401 |
else:
|
| 402 |
idx = pd.Series(np.arange(1, len(df) + 1))
|
| 403 |
y_label = "Point Index"
|
| 404 |
-
|
| 405 |
cmap = plt.get_cmap("tab20")
|
| 406 |
col_colors = {col: cmap(i % cmap.N) for i, col in enumerate(cols)}
|
| 407 |
-
|
| 408 |
fig, axes = plt.subplots(1, n, figsize=(2.3 * n, 7.0), sharey=True, dpi=100)
|
| 409 |
if n == 1: axes = [axes]
|
| 410 |
-
|
| 411 |
y_min, y_max = float(idx.min()), float(idx.max())
|
| 412 |
for i, (ax, col) in enumerate(zip(axes, cols)):
|
| 413 |
x = pd.to_numeric(df[col], errors="coerce")
|
| 414 |
ax.plot(x, idx, '-', lw=1.8, color=col_colors[col])
|
| 415 |
-
ax.set_xlabel(col)
|
| 416 |
ax.xaxis.set_label_position('top'); ax.xaxis.tick_top()
|
| 417 |
-
ax.set_ylim(y_max, y_min)
|
| 418 |
ax.grid(True, linestyle=":", alpha=0.3)
|
| 419 |
if i == 0:
|
| 420 |
ax.set_ylabel(y_label)
|
|
@@ -530,6 +507,20 @@ if st.session_state.app_step == "intro":
|
|
| 530 |
"2) Click **Run Model** to compute metrics and plots. \n"
|
| 531 |
"3) **Proceed to Validation** (with actual TOC) or **Proceed to Prediction** (no TOC)."
|
| 532 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 533 |
if st.button("Start Showcase", type="primary"):
|
| 534 |
st.session_state.app_step = "dev"; st.rerun()
|
| 535 |
|
|
@@ -582,7 +573,6 @@ if st.session_state.app_step == "dev":
|
|
| 582 |
tr = normalize_to_abbr(tr_raw)
|
| 583 |
te = normalize_to_abbr(te_raw)
|
| 584 |
|
| 585 |
-
# ---- SAFE PREDICT (NumPy only) ----
|
| 586 |
tr[PRED_COL] = safe_predict(model, tr_raw, FEATURES)
|
| 587 |
te[PRED_COL] = safe_predict(model, te_raw, FEATURES)
|
| 588 |
|
|
@@ -629,14 +619,6 @@ if st.session_state.app_step == "dev":
|
|
| 629 |
st.divider()
|
| 630 |
st.markdown("### Export to Excel")
|
| 631 |
|
| 632 |
-
# Export builder
|
| 633 |
-
def _excel_engine() -> str:
|
| 634 |
-
try:
|
| 635 |
-
import xlsxwriter # noqa: F401
|
| 636 |
-
return "xlsxwriter"
|
| 637 |
-
except Exception:
|
| 638 |
-
return "openpyxl"
|
| 639 |
-
|
| 640 |
def _excel_safe_name(name: str) -> str:
|
| 641 |
bad = '[]:*?/\\'
|
| 642 |
safe = ''.join('_' if ch in bad else ch for ch in str(name))
|
|
@@ -728,7 +710,8 @@ if st.session_state.app_step == "dev":
|
|
| 728 |
sheets["Info"] = info; order.append("Info")
|
| 729 |
|
| 730 |
bio = io.BytesIO()
|
| 731 |
-
|
|
|
|
| 732 |
for name in order:
|
| 733 |
sheets[name].to_excel(writer, sheet_name=_excel_safe_name(name), index=False)
|
| 734 |
bio.seek(0)
|
|
@@ -830,15 +813,7 @@ if st.session_state.app_step == "validate":
|
|
| 830 |
|
| 831 |
st.divider()
|
| 832 |
st.markdown("### Export to Excel")
|
| 833 |
-
|
| 834 |
-
def _available_sections_val():
|
| 835 |
-
res = st.session_state.get("results", {})
|
| 836 |
-
sections = ["Validation","Validation_Metrics","Validation_Summary"]
|
| 837 |
-
if isinstance(res.get("oor_tbl"), pd.DataFrame) and not res["oor_tbl"].empty:
|
| 838 |
-
sections += ["Validation_OOR"]
|
| 839 |
-
sections += ["Info"]
|
| 840 |
-
return sections
|
| 841 |
-
# Minimal export for validation
|
| 842 |
def _export_val():
|
| 843 |
res = st.session_state.get("results", {})
|
| 844 |
sheets = {}
|
|
@@ -854,11 +829,13 @@ if st.session_state.app_step == "validate":
|
|
| 854 |
{"Key":"ExportedAt","Value":datetime.now().strftime("%Y-%m-%d %H:%M:%S")},
|
| 855 |
])
|
| 856 |
bio = io.BytesIO()
|
| 857 |
-
|
|
|
|
| 858 |
for k,v in sheets.items():
|
| 859 |
v.to_excel(writer, sheet_name=k[:31], index=False)
|
| 860 |
bio.seek(0)
|
| 861 |
return bio.getvalue(), f"TOC_Validation_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
|
|
|
|
| 862 |
data_x, fn_x = _export_val()
|
| 863 |
st.download_button("⬇️ Export Excel", data=data_x, file_name=fn_x,
|
| 864 |
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
|
|
@@ -929,7 +906,6 @@ if st.session_state.app_step == "predict":
|
|
| 929 |
use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
|
| 930 |
|
| 931 |
st.divider()
|
| 932 |
-
# Simple export
|
| 933 |
def _export_pred():
|
| 934 |
res = st.session_state.get("results", {})
|
| 935 |
sheets = {"Prediction": res["PredictOnly"], "Prediction_Summary": pd.DataFrame([sv])}
|
|
@@ -940,11 +916,13 @@ if st.session_state.app_step == "predict":
|
|
| 940 |
{"Key":"ExportedAt","Value":datetime.now().strftime("%Y-%m-%d %H:%M:%S")},
|
| 941 |
])
|
| 942 |
bio = io.BytesIO()
|
| 943 |
-
|
|
|
|
| 944 |
for k,v in sheets.items():
|
| 945 |
v.to_excel(writer, sheet_name=k[:31], index=False)
|
| 946 |
bio.seek(0)
|
| 947 |
return bio.getvalue(), f"TOC_Prediction_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
|
|
|
|
| 948 |
data_x, fn_x = _export_pred()
|
| 949 |
st.download_button("⬇️ Export Excel", data=data_x, file_name=fn_x,
|
| 950 |
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
|
|
|
|
| 175 |
"KPercent": "K", "K%": "K", "Potassium": "K",
|
| 176 |
"AHT_90": "AHT90", "AHT90AverageHydrocarbonTool90°Phase": "AHT90",
|
| 177 |
}
|
|
|
|
| 178 |
if n.upper() in {"GR", "DT", "RHOB"}: return n.upper() if n.upper() != "DT" else "DT"
|
| 179 |
if n.upper() == "AHT90": return "AHT90"
|
| 180 |
if n.upper() == "TNPH": return "TNPH"
|
|
|
|
| 187 |
newcols = []
|
| 188 |
for c in out.columns:
|
| 189 |
ac = _abbr(c)
|
| 190 |
+
if ac in FEATURES:
|
| 191 |
newcols.append(ac)
|
| 192 |
elif str(c).strip().lower() in {"toc", "toc (%)", "totalorganiccarbon"}:
|
| 193 |
newcols.append(TARGET)
|
| 194 |
elif "depth" in str(c).lower():
|
| 195 |
+
newcols.append("Depth")
|
| 196 |
else:
|
| 197 |
newcols.append(str(c))
|
| 198 |
out.columns = newcols
|
|
|
|
| 200 |
|
| 201 |
# ---- Model feature order + X builder (returns NumPy to bypass name checks) ----
|
| 202 |
def _training_feature_order(model, fallback_features: list[str]) -> list[str]:
|
|
|
|
| 203 |
names = list(getattr(model, "feature_names_in_", []))
|
| 204 |
if names:
|
| 205 |
return [str(n) for n in names]
|
| 206 |
return list(fallback_features)
|
| 207 |
|
| 208 |
def _make_X(df_raw: pd.DataFrame, model, fallback_features: list[str]) -> np.ndarray:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
df_abbr = normalize_to_abbr(df_raw)
|
|
|
|
| 210 |
colmap = { _abbr(c): c for c in df_abbr.columns }
|
|
|
|
| 211 |
train_names = _training_feature_order(model, fallback_features)
|
| 212 |
+
order_cols, missing = [], []
|
|
|
|
| 213 |
for nm in train_names:
|
| 214 |
ab = _abbr(nm)
|
| 215 |
if ab in colmap:
|
| 216 |
order_cols.append(colmap[ab])
|
| 217 |
else:
|
| 218 |
missing.append(nm)
|
|
|
|
| 219 |
if missing:
|
| 220 |
st.markdown(
|
| 221 |
'<div class="st-message-box st-error">Missing required columns for prediction (by model training): '
|
| 222 |
+
+ ", ".join(missing) + '</div>', unsafe_allow_html=True
|
|
|
|
| 223 |
)
|
| 224 |
st.stop()
|
|
|
|
| 225 |
X_df = df_abbr[order_cols].apply(pd.to_numeric, errors="coerce")
|
| 226 |
+
return np.asarray(X_df.to_numpy(dtype=float, copy=False), dtype=float)
|
|
|
|
|
|
|
| 227 |
|
| 228 |
def ensure_required_features(df: pd.DataFrame, model, fallback_features: list[str]) -> bool:
|
| 229 |
df_abbr = normalize_to_abbr(df)
|
|
|
|
| 236 |
return True
|
| 237 |
|
| 238 |
def safe_predict(model, df_raw: pd.DataFrame, fallback_features: list[str]) -> np.ndarray:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
X = _make_X(df_raw, model, fallback_features)
|
| 240 |
try:
|
| 241 |
return model.predict(X)
|
| 242 |
+
except Exception:
|
|
|
|
| 243 |
return model.predict(np.asarray(X, dtype=float))
|
| 244 |
|
| 245 |
def find_sheet(book, names):
|
|
|
|
| 262 |
)
|
| 263 |
st.dataframe(styler, use_container_width=True, hide_index=hide_index)
|
| 264 |
|
| 265 |
+
# ---- Excel writer engine (robust to missing xlsxwriter) ----
|
| 266 |
+
def _excel_engine() -> str | None:
|
| 267 |
+
try:
|
| 268 |
+
import xlsxwriter # noqa: F401
|
| 269 |
+
return "xlsxwriter"
|
| 270 |
+
except Exception:
|
| 271 |
+
try:
|
| 272 |
+
import openpyxl # noqa: F401
|
| 273 |
+
return "openpyxl"
|
| 274 |
+
except Exception:
|
| 275 |
+
return None # let pandas choose if possible
|
| 276 |
+
|
| 277 |
# =========================
|
| 278 |
# Cross plot (Matplotlib)
|
| 279 |
# =========================
|
| 280 |
def cross_plot_static(actual, pred):
|
| 281 |
a = pd.Series(actual, dtype=float)
|
| 282 |
p = pd.Series(pred, dtype=float)
|
|
|
|
| 283 |
lo = float(min(a.min(), p.min()))
|
| 284 |
hi = float(max(a.max(), p.max()))
|
| 285 |
pad = 0.03 * (hi - lo if hi > lo else 1.0)
|
| 286 |
lo2, hi2 = lo - pad, hi + pad
|
| 287 |
ticks = np.linspace(lo2, hi2, 5)
|
|
|
|
| 288 |
dpi = 110
|
| 289 |
fig, ax = plt.subplots(figsize=(CROSS_W / dpi, CROSS_H / dpi), dpi=dpi, constrained_layout=False)
|
|
|
|
| 290 |
ax.scatter(a, p, s=14, c=COLORS["pred"], alpha=0.9, linewidths=0)
|
| 291 |
ax.plot([lo2, hi2], [lo2, hi2], linestyle="--", linewidth=1.2, color=COLORS["ref"])
|
|
|
|
| 292 |
ax.set_xlim(lo2, hi2); ax.set_ylim(lo2, hi2)
|
| 293 |
ax.set_xticks(ticks); ax.set_yticks(ticks)
|
| 294 |
ax.set_aspect("equal", adjustable="box")
|
|
|
|
| 295 |
fmt = FuncFormatter(lambda x, _: f"{x:,.1f}")
|
| 296 |
ax.xaxis.set_major_formatter(fmt); ax.yaxis.set_major_formatter(fmt)
|
|
|
|
| 297 |
ax.set_xlabel("Actual TOC (%)", fontweight="bold", fontsize=10, color="black")
|
| 298 |
ax.set_ylabel("Predicted TOC (%)", fontweight="bold", fontsize=10, color="black")
|
| 299 |
ax.tick_params(labelsize=6, colors="black")
|
|
|
|
| 300 |
ax.grid(True, linestyle=":", alpha=0.3)
|
| 301 |
for spine in ax.spines.values():
|
| 302 |
spine.set_linewidth(1.1); spine.set_color("#444")
|
|
|
|
| 303 |
fig.subplots_adjust(left=0.16, bottom=0.16, right=0.98, top=0.98)
|
| 304 |
return fig
|
| 305 |
|
|
|
|
| 311 |
depth_col = next((c for c in df.columns if 'depth' in str(c).lower() or c == "Depth"), None)
|
| 312 |
if depth_col is not None:
|
| 313 |
y = pd.Series(df[depth_col]).astype(float); ylab = depth_col
|
| 314 |
+
y_range = [float(y.max()), float(y.min())]
|
| 315 |
else:
|
| 316 |
y = pd.Series(np.arange(1, len(df) + 1)); ylab = "Point Index"
|
| 317 |
y_range = [float(y.max()), float(y.min())]
|
|
|
|
| 318 |
x_series = pd.Series(df.get(PRED_COL, pd.Series(dtype=float))).astype(float)
|
| 319 |
if include_actual and TARGET in df.columns:
|
| 320 |
x_series = pd.concat([x_series, pd.Series(df[TARGET]).astype(float)], ignore_index=True)
|
|
|
|
| 322 |
x_pad = 0.03 * (x_hi - x_lo if x_hi > x_lo else 1.0)
|
| 323 |
xmin, xmax = x_lo - x_pad, x_hi + x_pad
|
| 324 |
tick0 = _nice_tick0(xmin, step=0.5)
|
|
|
|
| 325 |
fig = go.Figure()
|
| 326 |
if PRED_COL in df.columns:
|
| 327 |
fig.add_trace(go.Scatter(
|
|
|
|
| 337 |
name=f"{TARGET} (actual)",
|
| 338 |
hovertemplate=f"{TARGET}: "+"%{x:.2f}<br>"+ylab+": %{y}<extra></extra>"
|
| 339 |
))
|
|
|
|
| 340 |
fig.update_layout(
|
| 341 |
height=TRACK_H, width=TRACK_W, autosize=False,
|
| 342 |
paper_bgcolor="#fff", plot_bgcolor="#fff",
|
|
|
|
| 374 |
fig, ax = plt.subplots(figsize=(4, 2))
|
| 375 |
ax.text(0.5, 0.5, "No selected columns", ha="center", va="center"); ax.axis("off")
|
| 376 |
return fig
|
|
|
|
| 377 |
depth_col = next((c for c in df.columns if 'depth' in str(c).lower() or c == "Depth"), None)
|
| 378 |
if depth_col is not None:
|
| 379 |
idx = pd.to_numeric(df[depth_col], errors="coerce")
|
|
|
|
| 381 |
else:
|
| 382 |
idx = pd.Series(np.arange(1, len(df) + 1))
|
| 383 |
y_label = "Point Index"
|
|
|
|
| 384 |
cmap = plt.get_cmap("tab20")
|
| 385 |
col_colors = {col: cmap(i % cmap.N) for i, col in enumerate(cols)}
|
|
|
|
| 386 |
fig, axes = plt.subplots(1, n, figsize=(2.3 * n, 7.0), sharey=True, dpi=100)
|
| 387 |
if n == 1: axes = [axes]
|
|
|
|
| 388 |
y_min, y_max = float(idx.min()), float(idx.max())
|
| 389 |
for i, (ax, col) in enumerate(zip(axes, cols)):
|
| 390 |
x = pd.to_numeric(df[col], errors="coerce")
|
| 391 |
ax.plot(x, idx, '-', lw=1.8, color=col_colors[col])
|
| 392 |
+
ax.set_xlabel(col)
|
| 393 |
ax.xaxis.set_label_position('top'); ax.xaxis.tick_top()
|
| 394 |
+
ax.set_ylim(y_max, y_min)
|
| 395 |
ax.grid(True, linestyle=":", alpha=0.3)
|
| 396 |
if i == 0:
|
| 397 |
ax.set_ylabel(y_label)
|
|
|
|
| 507 |
"2) Click **Run Model** to compute metrics and plots. \n"
|
| 508 |
"3) **Proceed to Validation** (with actual TOC) or **Proceed to Prediction** (no TOC)."
|
| 509 |
)
|
| 510 |
+
st.subheader("Input Features Used by the Model")
|
| 511 |
+
st.markdown("""
|
| 512 |
+
The TOC estimation model uses the following eight well-logging features:
|
| 513 |
+
|
| 514 |
+
- **AHT90 (Average Hydrocarbon Tool 90° Phase)**
|
| 515 |
+
- **DT (Delta-T Sonic Travel Time)**
|
| 516 |
+
- **GR (Gamma Ray)**
|
| 517 |
+
- **K (Potassium)**
|
| 518 |
+
- **RHOB (Bulk Density)**
|
| 519 |
+
- **TNPH (Thermal Neutron Porosity)**
|
| 520 |
+
- **Th (Thorium)**
|
| 521 |
+
- **Ur (Uranium)**
|
| 522 |
+
""")
|
| 523 |
+
|
| 524 |
if st.button("Start Showcase", type="primary"):
|
| 525 |
st.session_state.app_step = "dev"; st.rerun()
|
| 526 |
|
|
|
|
| 573 |
tr = normalize_to_abbr(tr_raw)
|
| 574 |
te = normalize_to_abbr(te_raw)
|
| 575 |
|
|
|
|
| 576 |
tr[PRED_COL] = safe_predict(model, tr_raw, FEATURES)
|
| 577 |
te[PRED_COL] = safe_predict(model, te_raw, FEATURES)
|
| 578 |
|
|
|
|
| 619 |
st.divider()
|
| 620 |
st.markdown("### Export to Excel")
|
| 621 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 622 |
def _excel_safe_name(name: str) -> str:
|
| 623 |
bad = '[]:*?/\\'
|
| 624 |
safe = ''.join('_' if ch in bad else ch for ch in str(name))
|
|
|
|
| 710 |
sheets["Info"] = info; order.append("Info")
|
| 711 |
|
| 712 |
bio = io.BytesIO()
|
| 713 |
+
engine = _excel_engine()
|
| 714 |
+
with pd.ExcelWriter(bio, engine=engine) as writer:
|
| 715 |
for name in order:
|
| 716 |
sheets[name].to_excel(writer, sheet_name=_excel_safe_name(name), index=False)
|
| 717 |
bio.seek(0)
|
|
|
|
| 813 |
|
| 814 |
st.divider()
|
| 815 |
st.markdown("### Export to Excel")
|
| 816 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 817 |
def _export_val():
|
| 818 |
res = st.session_state.get("results", {})
|
| 819 |
sheets = {}
|
|
|
|
| 829 |
{"Key":"ExportedAt","Value":datetime.now().strftime("%Y-%m-%d %H:%M:%S")},
|
| 830 |
])
|
| 831 |
bio = io.BytesIO()
|
| 832 |
+
engine = _excel_engine()
|
| 833 |
+
with pd.ExcelWriter(bio, engine=engine) as writer:
|
| 834 |
for k,v in sheets.items():
|
| 835 |
v.to_excel(writer, sheet_name=k[:31], index=False)
|
| 836 |
bio.seek(0)
|
| 837 |
return bio.getvalue(), f"TOC_Validation_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
|
| 838 |
+
|
| 839 |
data_x, fn_x = _export_val()
|
| 840 |
st.download_button("⬇️ Export Excel", data=data_x, file_name=fn_x,
|
| 841 |
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
|
|
|
|
| 906 |
use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
|
| 907 |
|
| 908 |
st.divider()
|
|
|
|
| 909 |
def _export_pred():
|
| 910 |
res = st.session_state.get("results", {})
|
| 911 |
sheets = {"Prediction": res["PredictOnly"], "Prediction_Summary": pd.DataFrame([sv])}
|
|
|
|
| 916 |
{"Key":"ExportedAt","Value":datetime.now().strftime("%Y-%m-%d %H:%M:%S")},
|
| 917 |
])
|
| 918 |
bio = io.BytesIO()
|
| 919 |
+
engine = _excel_engine()
|
| 920 |
+
with pd.ExcelWriter(bio, engine=engine) as writer:
|
| 921 |
for k,v in sheets.items():
|
| 922 |
v.to_excel(writer, sheet_name=k[:31], index=False)
|
| 923 |
bio.seek(0)
|
| 924 |
return bio.getvalue(), f"TOC_Prediction_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
|
| 925 |
+
|
| 926 |
data_x, fn_x = _export_pred()
|
| 927 |
st.download_button("⬇️ Export Excel", data=data_x, file_name=fn_x,
|
| 928 |
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
|