TOC

Sleeping

App Files Files Community

UCS2014 commited on Nov 24, 2025

Commit

8329e07

verified ·

1 Parent(s): 881d940

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -66

app.py CHANGED Viewed

@@ -175,7 +175,6 @@ def _abbr(name: str) -> str:
         "KPercent": "K", "K%": "K", "Potassium": "K",
         "AHT_90": "AHT90", "AHT90AverageHydrocarbonTool90°Phase": "AHT90",
     }
-    # preserve core mnemonics
     if n.upper() in {"GR", "DT", "RHOB"}: return n.upper() if n.upper() != "DT" else "DT"
     if n.upper() == "AHT90": return "AHT90"
     if n.upper() == "TNPH": return "TNPH"
@@ -188,12 +187,12 @@ def normalize_to_abbr(df: pd.DataFrame) -> pd.DataFrame:
     newcols = []
     for c in out.columns:
         ac = _abbr(c)
-        if ac in FEATURES:            # map features to their abbreviations
             newcols.append(ac)
         elif str(c).strip().lower() in {"toc", "toc (%)", "totalorganiccarbon"}:
             newcols.append(TARGET)
         elif "depth" in str(c).lower():
-            newcols.append("Depth")    # for track plotting only
         else:
             newcols.append(str(c))
     out.columns = newcols
@@ -201,43 +200,30 @@ def normalize_to_abbr(df: pd.DataFrame) -> pd.DataFrame:
 # ---- Model feature order + X builder (returns NumPy to bypass name checks) ----
 def _training_feature_order(model, fallback_features: list[str]) -> list[str]:
-    """Return the model's training feature order if available, else fallback."""
     names = list(getattr(model, "feature_names_in_", []))
     if names:
         return [str(n) for n in names]
     return list(fallback_features)
 def _make_X(df_raw: pd.DataFrame, model, fallback_features: list[str]) -> np.ndarray:
-    """
-    Returns a NumPy array with columns ordered exactly as in model training.
-    Using np.ndarray bypasses sklearn's feature-name validation.
-    """
     df_abbr = normalize_to_abbr(df_raw)
-    # mapping abbr -> actual column present
     colmap = { _abbr(c): c for c in df_abbr.columns }
     train_names = _training_feature_order(model, fallback_features)
-    order_cols = []
-    missing = []
     for nm in train_names:
         ab = _abbr(nm)
         if ab in colmap:
             order_cols.append(colmap[ab])
         else:
             missing.append(nm)
     if missing:
         st.markdown(
             '<div class="st-message-box st-error">Missing required columns for prediction (by model training): '
-            + ", ".join(missing) + '</div>',
-            unsafe_allow_html=True
         )
         st.stop()
     X_df = df_abbr[order_cols].apply(pd.to_numeric, errors="coerce")
-    X_np = X_df.to_numpy(dtype=float, copy=False)
-    # Safety: ensure plain ndarray (no pandas attrs)
-    return np.asarray(X_np, dtype=float)
 def ensure_required_features(df: pd.DataFrame, model, fallback_features: list[str]) -> bool:
     df_abbr = normalize_to_abbr(df)
@@ -250,16 +236,10 @@ def ensure_required_features(df: pd.DataFrame, model, fallback_features: list[st
     return True
 def safe_predict(model, df_raw: pd.DataFrame, fallback_features: list[str]) -> np.ndarray:
-    """
-    Centralized, name-check-proof prediction:
-    - Builds X in training order
-    - Converts to NumPy (bypasses sklearn feature-name validation)
-    """
     X = _make_X(df_raw, model, fallback_features)
     try:
         return model.predict(X)
-    except Exception as e:
-        # As a last resort, try basic float casting
         return model.predict(np.asarray(X, dtype=float))
 def find_sheet(book, names):
@@ -282,40 +262,44 @@ def df_centered_rounded(df: pd.DataFrame, hide_index=True):
     )
     st.dataframe(styler, use_container_width=True, hide_index=hide_index)
 # =========================
 # Cross plot (Matplotlib)
 # =========================
 def cross_plot_static(actual, pred):
     a = pd.Series(actual, dtype=float)
     p = pd.Series(pred, dtype=float)
     lo = float(min(a.min(), p.min()))
     hi = float(max(a.max(), p.max()))
     pad = 0.03 * (hi - lo if hi > lo else 1.0)
     lo2, hi2 = lo - pad, hi + pad
     ticks = np.linspace(lo2, hi2, 5)
     dpi = 110
     fig, ax = plt.subplots(figsize=(CROSS_W / dpi, CROSS_H / dpi), dpi=dpi, constrained_layout=False)
     ax.scatter(a, p, s=14, c=COLORS["pred"], alpha=0.9, linewidths=0)
     ax.plot([lo2, hi2], [lo2, hi2], linestyle="--", linewidth=1.2, color=COLORS["ref"])
     ax.set_xlim(lo2, hi2); ax.set_ylim(lo2, hi2)
     ax.set_xticks(ticks);  ax.set_yticks(ticks)
     ax.set_aspect("equal", adjustable="box")
     fmt = FuncFormatter(lambda x, _: f"{x:,.1f}")
     ax.xaxis.set_major_formatter(fmt); ax.yaxis.set_major_formatter(fmt)
     ax.set_xlabel("Actual TOC (%)",    fontweight="bold", fontsize=10, color="black")
     ax.set_ylabel("Predicted TOC (%)", fontweight="bold", fontsize=10, color="black")
     ax.tick_params(labelsize=6, colors="black")
     ax.grid(True, linestyle=":", alpha=0.3)
     for spine in ax.spines.values():
         spine.set_linewidth(1.1); spine.set_color("#444")
     fig.subplots_adjust(left=0.16, bottom=0.16, right=0.98, top=0.98)
     return fig
@@ -327,11 +311,10 @@ def track_plot(df, include_actual=True):
     depth_col = next((c for c in df.columns if 'depth' in str(c).lower() or c == "Depth"), None)
     if depth_col is not None:
         y = pd.Series(df[depth_col]).astype(float); ylab = depth_col
-        y_range = [float(y.max()), float(y.min())]  # reversed
     else:
         y = pd.Series(np.arange(1, len(df) + 1)); ylab = "Point Index"
         y_range = [float(y.max()), float(y.min())]
     x_series = pd.Series(df.get(PRED_COL, pd.Series(dtype=float))).astype(float)
     if include_actual and TARGET in df.columns:
         x_series = pd.concat([x_series, pd.Series(df[TARGET]).astype(float)], ignore_index=True)
@@ -339,7 +322,6 @@ def track_plot(df, include_actual=True):
     x_pad = 0.03 * (x_hi - x_lo if x_hi > x_lo else 1.0)
     xmin, xmax = x_lo - x_pad, x_hi + x_pad
     tick0 = _nice_tick0(xmin, step=0.5)
     fig = go.Figure()
     if PRED_COL in df.columns:
         fig.add_trace(go.Scatter(
@@ -355,7 +337,6 @@ def track_plot(df, include_actual=True):
             name=f"{TARGET} (actual)",
             hovertemplate=f"{TARGET}: "+"%{x:.2f}<br>"+ylab+": %{y}<extra></extra>"
         ))
     fig.update_layout(
         height=TRACK_H, width=TRACK_W, autosize=False,
         paper_bgcolor="#fff", plot_bgcolor="#fff",
@@ -393,7 +374,6 @@ def preview_tracks(df: pd.DataFrame, cols: list[str]):
         fig, ax = plt.subplots(figsize=(4, 2))
         ax.text(0.5, 0.5, "No selected columns", ha="center", va="center"); ax.axis("off")
         return fig
     depth_col = next((c for c in df.columns if 'depth' in str(c).lower() or c == "Depth"), None)
     if depth_col is not None:
         idx = pd.to_numeric(df[depth_col], errors="coerce")
@@ -401,20 +381,17 @@ def preview_tracks(df: pd.DataFrame, cols: list[str]):
     else:
         idx = pd.Series(np.arange(1, len(df) + 1))
         y_label = "Point Index"
     cmap = plt.get_cmap("tab20")
     col_colors = {col: cmap(i % cmap.N) for i, col in enumerate(cols)}
     fig, axes = plt.subplots(1, n, figsize=(2.3 * n, 7.0), sharey=True, dpi=100)
     if n == 1: axes = [axes]
     y_min, y_max = float(idx.min()), float(idx.max())
     for i, (ax, col) in enumerate(zip(axes, cols)):
         x = pd.to_numeric(df[col], errors="coerce")
         ax.plot(x, idx, '-', lw=1.8, color=col_colors[col])
-        ax.set_xlabel(col)          # abbreviations only
         ax.xaxis.set_label_position('top'); ax.xaxis.tick_top()
-        ax.set_ylim(y_max, y_min)   # reversed
         ax.grid(True, linestyle=":", alpha=0.3)
         if i == 0:
             ax.set_ylabel(y_label)
@@ -530,6 +507,20 @@ if st.session_state.app_step == "intro":
         "2) Click **Run Model** to compute metrics and plots. \n"
         "3) **Proceed to Validation** (with actual TOC) or **Proceed to Prediction** (no TOC)."
     )
     if st.button("Start Showcase", type="primary"):
         st.session_state.app_step = "dev"; st.rerun()
@@ -582,7 +573,6 @@ if st.session_state.app_step == "dev":
         tr = normalize_to_abbr(tr_raw)
         te = normalize_to_abbr(te_raw)
-        # ---- SAFE PREDICT (NumPy only) ----
         tr[PRED_COL] = safe_predict(model, tr_raw, FEATURES)
         te[PRED_COL] = safe_predict(model, te_raw, FEATURES)
@@ -629,14 +619,6 @@ if st.session_state.app_step == "dev":
         st.divider()
         st.markdown("### Export to Excel")
-        # Export builder
-        def _excel_engine() -> str:
-            try:
-                import xlsxwriter  # noqa: F401
-                return "xlsxwriter"
-            except Exception:
-                return "openpyxl"
         def _excel_safe_name(name: str) -> str:
             bad = '[]:*?/\\'
             safe = ''.join('_' if ch in bad else ch for ch in str(name))
@@ -728,7 +710,8 @@ if st.session_state.app_step == "dev":
             sheets["Info"] = info; order.append("Info")
             bio = io.BytesIO()
-            with pd.ExcelWriter(bio, engine=_excel_engine()) as writer:
                 for name in order:
                     sheets[name].to_excel(writer, sheet_name=_excel_safe_name(name), index=False)
             bio.seek(0)
@@ -830,15 +813,7 @@ if st.session_state.app_step == "validate":
         st.divider()
         st.markdown("### Export to Excel")
-        # Reuse export from dev by enabling chosen sections
-        def _available_sections_val():
-            res = st.session_state.get("results", {})
-            sections = ["Validation","Validation_Metrics","Validation_Summary"]
-            if isinstance(res.get("oor_tbl"), pd.DataFrame) and not res["oor_tbl"].empty:
-                sections += ["Validation_OOR"]
-            sections += ["Info"]
-            return sections
-        # Minimal export for validation
         def _export_val():
             res = st.session_state.get("results", {})
             sheets = {}
@@ -854,11 +829,13 @@ if st.session_state.app_step == "validate":
                 {"Key":"ExportedAt","Value":datetime.now().strftime("%Y-%m-%d %H:%M:%S")},
             ])
             bio = io.BytesIO()
-            with pd.ExcelWriter(bio, engine="xlsxwriter") as writer:
                 for k,v in sheets.items():
                     v.to_excel(writer, sheet_name=k[:31], index=False)
             bio.seek(0)
             return bio.getvalue(), f"TOC_Validation_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
         data_x, fn_x = _export_val()
         st.download_button("⬇️ Export Excel", data=data_x, file_name=fn_x,
                            mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
@@ -929,7 +906,6 @@ if st.session_state.app_step == "predict":
                             use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
         st.divider()
-        # Simple export
         def _export_pred():
             res = st.session_state.get("results", {})
             sheets = {"Prediction": res["PredictOnly"], "Prediction_Summary": pd.DataFrame([sv])}
@@ -940,11 +916,13 @@ if st.session_state.app_step == "predict":
                 {"Key":"ExportedAt","Value":datetime.now().strftime("%Y-%m-%d %H:%M:%S")},
             ])
             bio = io.BytesIO()
-            with pd.ExcelWriter(bio, engine="xlsxwriter") as writer:
                 for k,v in sheets.items():
                     v.to_excel(writer, sheet_name=k[:31], index=False)
             bio.seek(0)
             return bio.getvalue(), f"TOC_Prediction_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
         data_x, fn_x = _export_pred()
         st.download_button("⬇️ Export Excel", data=data_x, file_name=fn_x,
                            mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")

         "KPercent": "K", "K%": "K", "Potassium": "K",
         "AHT_90": "AHT90", "AHT90AverageHydrocarbonTool90°Phase": "AHT90",
     }
     if n.upper() in {"GR", "DT", "RHOB"}: return n.upper() if n.upper() != "DT" else "DT"
     if n.upper() == "AHT90": return "AHT90"
     if n.upper() == "TNPH": return "TNPH"
     newcols = []
     for c in out.columns:
         ac = _abbr(c)
+        if ac in FEATURES:
             newcols.append(ac)
         elif str(c).strip().lower() in {"toc", "toc (%)", "totalorganiccarbon"}:
             newcols.append(TARGET)
         elif "depth" in str(c).lower():
+            newcols.append("Depth")
         else:
             newcols.append(str(c))
     out.columns = newcols
 # ---- Model feature order + X builder (returns NumPy to bypass name checks) ----
 def _training_feature_order(model, fallback_features: list[str]) -> list[str]:
     names = list(getattr(model, "feature_names_in_", []))
     if names:
         return [str(n) for n in names]
     return list(fallback_features)
 def _make_X(df_raw: pd.DataFrame, model, fallback_features: list[str]) -> np.ndarray:
     df_abbr = normalize_to_abbr(df_raw)
     colmap = { _abbr(c): c for c in df_abbr.columns }
     train_names = _training_feature_order(model, fallback_features)
+    order_cols, missing = [], []
     for nm in train_names:
         ab = _abbr(nm)
         if ab in colmap:
             order_cols.append(colmap[ab])
         else:
             missing.append(nm)
     if missing:
         st.markdown(
             '<div class="st-message-box st-error">Missing required columns for prediction (by model training): '
+            + ", ".join(missing) + '</div>', unsafe_allow_html=True
         )
         st.stop()
     X_df = df_abbr[order_cols].apply(pd.to_numeric, errors="coerce")
+    return np.asarray(X_df.to_numpy(dtype=float, copy=False), dtype=float)
 def ensure_required_features(df: pd.DataFrame, model, fallback_features: list[str]) -> bool:
     df_abbr = normalize_to_abbr(df)
     return True
 def safe_predict(model, df_raw: pd.DataFrame, fallback_features: list[str]) -> np.ndarray:
     X = _make_X(df_raw, model, fallback_features)
     try:
         return model.predict(X)
+    except Exception:
         return model.predict(np.asarray(X, dtype=float))
 def find_sheet(book, names):
     )
     st.dataframe(styler, use_container_width=True, hide_index=hide_index)
+# ---- Excel writer engine (robust to missing xlsxwriter) ----
+def _excel_engine() -> str | None:
+    try:
+        import xlsxwriter  # noqa: F401
+        return "xlsxwriter"
+    except Exception:
+        try:
+            import openpyxl  # noqa: F401
+            return "openpyxl"
+        except Exception:
+            return None  # let pandas choose if possible
 # =========================
 # Cross plot (Matplotlib)
 # =========================
 def cross_plot_static(actual, pred):
     a = pd.Series(actual, dtype=float)
     p = pd.Series(pred, dtype=float)
     lo = float(min(a.min(), p.min()))
     hi = float(max(a.max(), p.max()))
     pad = 0.03 * (hi - lo if hi > lo else 1.0)
     lo2, hi2 = lo - pad, hi + pad
     ticks = np.linspace(lo2, hi2, 5)
     dpi = 110
     fig, ax = plt.subplots(figsize=(CROSS_W / dpi, CROSS_H / dpi), dpi=dpi, constrained_layout=False)
     ax.scatter(a, p, s=14, c=COLORS["pred"], alpha=0.9, linewidths=0)
     ax.plot([lo2, hi2], [lo2, hi2], linestyle="--", linewidth=1.2, color=COLORS["ref"])
     ax.set_xlim(lo2, hi2); ax.set_ylim(lo2, hi2)
     ax.set_xticks(ticks);  ax.set_yticks(ticks)
     ax.set_aspect("equal", adjustable="box")
     fmt = FuncFormatter(lambda x, _: f"{x:,.1f}")
     ax.xaxis.set_major_formatter(fmt); ax.yaxis.set_major_formatter(fmt)
     ax.set_xlabel("Actual TOC (%)",    fontweight="bold", fontsize=10, color="black")
     ax.set_ylabel("Predicted TOC (%)", fontweight="bold", fontsize=10, color="black")
     ax.tick_params(labelsize=6, colors="black")
     ax.grid(True, linestyle=":", alpha=0.3)
     for spine in ax.spines.values():
         spine.set_linewidth(1.1); spine.set_color("#444")
     fig.subplots_adjust(left=0.16, bottom=0.16, right=0.98, top=0.98)
     return fig
     depth_col = next((c for c in df.columns if 'depth' in str(c).lower() or c == "Depth"), None)
     if depth_col is not None:
         y = pd.Series(df[depth_col]).astype(float); ylab = depth_col
+        y_range = [float(y.max()), float(y.min())]
     else:
         y = pd.Series(np.arange(1, len(df) + 1)); ylab = "Point Index"
         y_range = [float(y.max()), float(y.min())]
     x_series = pd.Series(df.get(PRED_COL, pd.Series(dtype=float))).astype(float)
     if include_actual and TARGET in df.columns:
         x_series = pd.concat([x_series, pd.Series(df[TARGET]).astype(float)], ignore_index=True)
     x_pad = 0.03 * (x_hi - x_lo if x_hi > x_lo else 1.0)
     xmin, xmax = x_lo - x_pad, x_hi + x_pad
     tick0 = _nice_tick0(xmin, step=0.5)
     fig = go.Figure()
     if PRED_COL in df.columns:
         fig.add_trace(go.Scatter(
             name=f"{TARGET} (actual)",
             hovertemplate=f"{TARGET}: "+"%{x:.2f}<br>"+ylab+": %{y}<extra></extra>"
         ))
     fig.update_layout(
         height=TRACK_H, width=TRACK_W, autosize=False,
         paper_bgcolor="#fff", plot_bgcolor="#fff",
         fig, ax = plt.subplots(figsize=(4, 2))
         ax.text(0.5, 0.5, "No selected columns", ha="center", va="center"); ax.axis("off")
         return fig
     depth_col = next((c for c in df.columns if 'depth' in str(c).lower() or c == "Depth"), None)
     if depth_col is not None:
         idx = pd.to_numeric(df[depth_col], errors="coerce")
     else:
         idx = pd.Series(np.arange(1, len(df) + 1))
         y_label = "Point Index"
     cmap = plt.get_cmap("tab20")
     col_colors = {col: cmap(i % cmap.N) for i, col in enumerate(cols)}
     fig, axes = plt.subplots(1, n, figsize=(2.3 * n, 7.0), sharey=True, dpi=100)
     if n == 1: axes = [axes]
     y_min, y_max = float(idx.min()), float(idx.max())
     for i, (ax, col) in enumerate(zip(axes, cols)):
         x = pd.to_numeric(df[col], errors="coerce")
         ax.plot(x, idx, '-', lw=1.8, color=col_colors[col])
+        ax.set_xlabel(col)
         ax.xaxis.set_label_position('top'); ax.xaxis.tick_top()
+        ax.set_ylim(y_max, y_min)
         ax.grid(True, linestyle=":", alpha=0.3)
         if i == 0:
             ax.set_ylabel(y_label)
         "2) Click **Run Model** to compute metrics and plots. \n"
         "3) **Proceed to Validation** (with actual TOC) or **Proceed to Prediction** (no TOC)."
     )
+        st.subheader("Input Features Used by the Model")
+    st.markdown("""
+    The TOC estimation model uses the following eight well-logging features:
+    - **AHT90 (Average Hydrocarbon Tool 90° Phase)**
+    - **DT (Delta-T Sonic Travel Time)**
+    - **GR (Gamma Ray)**
+    - **K (Potassium)**
+    - **RHOB (Bulk Density)**
+    - **TNPH (Thermal Neutron Porosity)**
+    - **Th (Thorium)**
+    - **Ur (Uranium)**
+    """)
     if st.button("Start Showcase", type="primary"):
         st.session_state.app_step = "dev"; st.rerun()
         tr = normalize_to_abbr(tr_raw)
         te = normalize_to_abbr(te_raw)
         tr[PRED_COL] = safe_predict(model, tr_raw, FEATURES)
         te[PRED_COL] = safe_predict(model, te_raw, FEATURES)
         st.divider()
         st.markdown("### Export to Excel")
         def _excel_safe_name(name: str) -> str:
             bad = '[]:*?/\\'
             safe = ''.join('_' if ch in bad else ch for ch in str(name))
             sheets["Info"] = info; order.append("Info")
             bio = io.BytesIO()
+            engine = _excel_engine()
+            with pd.ExcelWriter(bio, engine=engine) as writer:
                 for name in order:
                     sheets[name].to_excel(writer, sheet_name=_excel_safe_name(name), index=False)
             bio.seek(0)
         st.divider()
         st.markdown("### Export to Excel")
         def _export_val():
             res = st.session_state.get("results", {})
             sheets = {}
                 {"Key":"ExportedAt","Value":datetime.now().strftime("%Y-%m-%d %H:%M:%S")},
             ])
             bio = io.BytesIO()
+            engine = _excel_engine()
+            with pd.ExcelWriter(bio, engine=engine) as writer:
                 for k,v in sheets.items():
                     v.to_excel(writer, sheet_name=k[:31], index=False)
             bio.seek(0)
             return bio.getvalue(), f"TOC_Validation_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
         data_x, fn_x = _export_val()
         st.download_button("⬇️ Export Excel", data=data_x, file_name=fn_x,
                            mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
                             use_container_width=False, config={"displayModeBar": False, "scrollZoom": True})
         st.divider()
         def _export_pred():
             res = st.session_state.get("results", {})
             sheets = {"Prediction": res["PredictOnly"], "Prediction_Summary": pd.DataFrame([sv])}
                 {"Key":"ExportedAt","Value":datetime.now().strftime("%Y-%m-%d %H:%M:%S")},
             ])
             bio = io.BytesIO()
+            engine = _excel_engine()
+            with pd.ExcelWriter(bio, engine=engine) as writer:
                 for k,v in sheets.items():
                     v.to_excel(writer, sheet_name=k[:31], index=False)
             bio.seek(0)
             return bio.getvalue(), f"TOC_Prediction_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
         data_x, fn_x = _export_pred()
         st.download_button("⬇️ Export Excel", data=data_x, file_name=fn_x,
                            mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")