GR

Paused

App Files Files Community

UCS2014 commited on Sep 2, 2025

Commit

b6e23ab

verified ·

1 Parent(s): b914b72

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -28

app.py CHANGED Viewed

@@ -20,10 +20,12 @@ from sklearn.metrics import mean_squared_error, mean_absolute_error
 # =========================
 APP_NAME = "ST_GR"
 TAGLINE  = "Gamma Ray Prediction"
 # If meta.json is present, these will be overridden
 FEATURES = ["GPM", "SPP", "RPM", "WOB", "T", "ROP"]
-TARGET   = "log(GR)"     # or "GR" if your model predicts GR directly
-PRED_POSTPROCESS = "exp" # or None if direct GR
 MODELS_DIR = Path("models")
 DEFAULT_MODEL = MODELS_DIR / "gr_rf.joblib"
@@ -63,7 +65,7 @@ div[data-testid="stVerticalBlock"] { overflow: unset !important; }
 </style>
 """, unsafe_allow_html=True)
-# Hide uploader helper text ("Drag and drop file here", limits, etc.)
 st.markdown("""
 <style>
 section[data-testid="stFileUploader"] div[data-testid="stMarkdownContainer"]{display:none !important;}
@@ -166,7 +168,13 @@ def parse_excel(data_bytes: bytes):
     xl = pd.ExcelFile(bio)
     return {sh: xl.parse(sh) for sh in xl.sheet_names}
-def read_book_bytes(b: bytes): return parse_excel(b) if b else {}
 def ensure_cols(df, cols):
     miss = [c for c in cols if c not in df.columns]
@@ -211,10 +219,8 @@ def to_actual_series(df: pd.DataFrame, target_col: str, actual_col_hint: str, tr
     """
     if actual_col_hint and actual_col_hint in df.columns:
         return pd.Series(df[actual_col_hint], dtype=float)
-    # else, if target exists, invert:
     if target_col in df.columns:
         return pd.Series(inverse_target(np.asarray(df[target_col], dtype=float), transform), dtype=float)
-    # fallback: if a column named "GR" exists, use it
     if "GR" in df.columns:
         return pd.Series(df["GR"], dtype=float)
     raise ValueError("Cannot find actual GR column or target to invert.")
@@ -222,8 +228,7 @@ def to_actual_series(df: pd.DataFrame, target_col: str, actual_col_hint: str, tr
 # =========================
 # Cross plot (Matplotlib) — auto limits for GR
 # =========================
-def _nice_bounds(arr_min, arr_max, n_ticks=5):
-    # pick a "nice" range and step for GR (typically 0–200+ API)
     if not np.isfinite(arr_min) or not np.isfinite(arr_max):
         return 0.0, 100.0, 20.0
     span = arr_max - arr_min
@@ -241,7 +246,6 @@ def cross_plot_static(actual, pred):
     a = pd.Series(actual, dtype=float)
     p = pd.Series(pred,   dtype=float)
-    # auto bounds & ticks for GR
     lo = min(a.min(), p.min())
     hi = max(a.max(), p.max())
     fixed_min, fixed_max, step = _nice_bounds(lo, hi, n_ticks=6)
@@ -262,7 +266,7 @@ def cross_plot_static(actual, pred):
     ax.set_ylim(fixed_min, fixed_max)
     ax.set_xticks(ticks)
     ax.set_yticks(ticks)
-    ax.set_aspect("equal", adjustable="box")  # true 1:1
     fmt = FuncFormatter(lambda x, _: f"{int(x):,}")
     ax.xaxis.set_major_formatter(fmt)
@@ -517,7 +521,7 @@ if st.session_state.app_step == "dev":
         if sh_train is None or sh_test is None:
             st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training and Test/Testing sheets.</div>', unsafe_allow_html=True)
             st.stop()
-        tr = book[sh_train].copy(); te = book[sh_test].copy()
         if not (ensure_cols(tr, FEATURES) and ensure_cols(te, FEATURES)):
             st.markdown('<div class="st-message-box st-error">Missing required feature columns.</div>', unsafe_allow_html=True); st.stop()
@@ -562,9 +566,9 @@ if st.session_state.app_step == "dev":
         col_track, col_cross = st.columns([2, 3], gap="large")
         with col_track:
             st.plotly_chart(
-                track_plot(df.rename(columns={"GR_Actual":"GR"}), include_actual=True,
-                           pred_col="GR_Pred", actual_col="GR"),
                 use_container_width=False,
                 config={"displayModeBar": False, "scrollZoom": True}
             )
@@ -600,16 +604,18 @@ if st.session_state.app_step == "validate":
     if go_btn and up is not None:
         book = read_book_bytes(up.getvalue())
         name = find_sheet(book, ["Validation","Validate","validation2","Val","val"]) or list(book.keys())[0]
-        df = book[name].copy()
-        if not ensure_cols(df, FEATURES): st.markdown('<div class="st-message-box st-error">Missing required feature columns.</div>', unsafe_allow_html=True); st.stop()
         pred_raw = model.predict(df[FEATURES])
         df["GR_Pred"] = inverse_target(np.asarray(pred_raw, dtype=float), TARGET_TRANSFORM)
-        # actual GR
         try:
             df["GR_Actual"] = to_actual_series(df, TARGET, ACTUAL_COL, TARGET_TRANSFORM)
         except Exception:
-            st.markdown('<div class="st-message-box st-error">Validation sheet must include actual GR (or a target column that can be inverse-transformed).</div>', unsafe_allow_html=True); st.stop()
         st.session_state.results["Validate"]=df
@@ -685,8 +691,10 @@ if st.session_state.app_step == "predict":
     if go_btn and up is not None:
         book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
-        df = book[name].copy()
-        if not ensure_cols(df, FEATURES): st.markdown('<div class="st-message-box st-error">Missing required feature columns.</div>', unsafe_allow_html=True); st.stop()
         pred_raw = model.predict(df[FEATURES])
         df["GR_Pred"] = inverse_target(np.asarray(pred_raw, dtype=float), TARGET_TRANSFORM)
@@ -724,8 +732,7 @@ if st.session_state.app_step == "predict":
             st.caption("**★ OOR** = % of rows whose input features fall outside the training min–max range.")
         with col_right:
             st.plotly_chart(
-                track_plot(df.rename(columns={"GR_Pred":"GR_Pred"}), include_actual=False,
-                           pred_col="GR_Pred", actual_col="GR"),
                 use_container_width=False, config={"displayModeBar": False, "scrollZoom": True}
             )
@@ -747,15 +754,23 @@ if st.session_state.show_preview_modal:
             tabs = st.tabs(names)
             for t, name in zip(tabs, names):
                 with t:
-                    df = book_to_preview[name]
                     t1, t2 = st.tabs(["Tracks", "Summary"])
                     with t1:
-                        st.pyplot(preview_tracks(df, FEATURES), use_container_width=True)
                     with t2:
-                        tbl = (df[FEATURES]
-                                 .agg(['min','max','mean','std'])
-                                 .T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"}))
-                        df_centered_rounded(tbl.reset_index(names="Feature"))
     st.session_state.show_preview_modal = False
 # =========================
@@ -767,4 +782,4 @@ st.markdown("""
 <div style='text-align:center;color:#6b7280;font-size:0.8em;'>
     © 2024 Smart Thinking AI-Solutions Team. All rights reserved.
 </div>
-""", unsafe_allow_html=True)

 # =========================
 APP_NAME = "ST_GR"
 TAGLINE  = "Gamma Ray Prediction"
 # If meta.json is present, these will be overridden
 FEATURES = ["GPM", "SPP", "RPM", "WOB", "T", "ROP"]
+TARGET   = "log(GR)"       # or "GR" if your model predicts GR directly
+TARGET_TRANSFORM = "ln"    # "ln" (exp back), "log10" (10**x), or "none"
+ACTUAL_COL = "GR"          # where the actual API values live (if present)
 MODELS_DIR = Path("models")
 DEFAULT_MODEL = MODELS_DIR / "gr_rf.joblib"
 </style>
 """, unsafe_allow_html=True)
+# Hide uploader helper text
 st.markdown("""
 <style>
 section[data-testid="stFileUploader"] div[data-testid="stMarkdownContainer"]{display:none !important;}
     xl = pd.ExcelFile(bio)
     return {sh: xl.parse(sh) for sh in xl.sheet_names}
+def read_book_bytes(b: bytes):
+    return parse_excel(b) if b else {}
+def normalize_df(df: pd.DataFrame) -> pd.DataFrame:
+    out = df.copy()
+    out.columns = [str(c).strip() for c in out.columns]
+    return out
 def ensure_cols(df, cols):
     miss = [c for c in cols if c not in df.columns]
     """
     if actual_col_hint and actual_col_hint in df.columns:
         return pd.Series(df[actual_col_hint], dtype=float)
     if target_col in df.columns:
         return pd.Series(inverse_target(np.asarray(df[target_col], dtype=float), transform), dtype=float)
     if "GR" in df.columns:
         return pd.Series(df["GR"], dtype=float)
     raise ValueError("Cannot find actual GR column or target to invert.")
 # =========================
 # Cross plot (Matplotlib) — auto limits for GR
 # =========================
+def _nice_bounds(arr_min, arr_max, n_ticks=6):
     if not np.isfinite(arr_min) or not np.isfinite(arr_max):
         return 0.0, 100.0, 20.0
     span = arr_max - arr_min
     a = pd.Series(actual, dtype=float)
     p = pd.Series(pred,   dtype=float)
     lo = min(a.min(), p.min())
     hi = max(a.max(), p.max())
     fixed_min, fixed_max, step = _nice_bounds(lo, hi, n_ticks=6)
     ax.set_ylim(fixed_min, fixed_max)
     ax.set_xticks(ticks)
     ax.set_yticks(ticks)
+    ax.set_aspect("equal", adjustable="box")
     fmt = FuncFormatter(lambda x, _: f"{int(x):,}")
     ax.xaxis.set_major_formatter(fmt)
         if sh_train is None or sh_test is None:
             st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training and Test/Testing sheets.</div>', unsafe_allow_html=True)
             st.stop()
+        tr = normalize_df(book[sh_train].copy()); te = normalize_df(book[sh_test].copy())
         if not (ensure_cols(tr, FEATURES) and ensure_cols(te, FEATURES)):
             st.markdown('<div class="st-message-box st-error">Missing required feature columns.</div>', unsafe_allow_html=True); st.stop()
         col_track, col_cross = st.columns([2, 3], gap="large")
         with col_track:
+            df_for_plot = df.rename(columns={"GR_Actual":"GR"})
             st.plotly_chart(
+                track_plot(df_for_plot, include_actual=True, pred_col="GR_Pred", actual_col="GR"),
                 use_container_width=False,
                 config={"displayModeBar": False, "scrollZoom": True}
             )
     if go_btn and up is not None:
         book = read_book_bytes(up.getvalue())
         name = find_sheet(book, ["Validation","Validate","validation2","Val","val"]) or list(book.keys())[0]
+        df = normalize_df(book[name].copy())
+        if not ensure_cols(df, FEATURES):
+            st.markdown('<div class="st-message-box st-error">Missing required feature columns.</div>', unsafe_allow_html=True);
+            st.stop()
         pred_raw = model.predict(df[FEATURES])
         df["GR_Pred"] = inverse_target(np.asarray(pred_raw, dtype=float), TARGET_TRANSFORM)
         try:
             df["GR_Actual"] = to_actual_series(df, TARGET, ACTUAL_COL, TARGET_TRANSFORM)
         except Exception:
+            st.markdown('<div class="st-message-box st-error">Validation sheet must include actual GR (or a target column that can be inverse-transformed).</div>', unsafe_allow_html=True);
+            st.stop()
         st.session_state.results["Validate"]=df
     if go_btn and up is not None:
         book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
+        df = normalize_df(book[name].copy())
+        if not ensure_cols(df, FEATURES):
+            st.markdown('<div class="st-message-box st-error">Missing required feature columns.</div>', unsafe_allow_html=True);
+            st.stop()
         pred_raw = model.predict(df[FEATURES])
         df["GR_Pred"] = inverse_target(np.asarray(pred_raw, dtype=float), TARGET_TRANSFORM)
             st.caption("**★ OOR** = % of rows whose input features fall outside the training min–max range.")
         with col_right:
             st.plotly_chart(
+                track_plot(df, include_actual=False, pred_col="GR_Pred", actual_col="GR"),
                 use_container_width=False, config={"displayModeBar": False, "scrollZoom": True}
             )
             tabs = st.tabs(names)
             for t, name in zip(tabs, names):
                 with t:
+                    df = normalize_df(book_to_preview[name])
                     t1, t2 = st.tabs(["Tracks", "Summary"])
                     with t1:
+                        if any(c in df.columns for c in FEATURES):
+                            st.pyplot(preview_tracks(df, FEATURES), use_container_width=True)
+                        else:
+                            st.info(f"None of the expected feature columns were found in this sheet. "
+                                    f"Expected any of: {FEATURES}. Found: {list(df.columns)}")
                     with t2:
+                        present = [c for c in FEATURES if c in df.columns]
+                        if present:
+                            tbl = (df[present]
+                                     .agg(['min','max','mean','std'])
+                                     .T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"}))
+                            df_centered_rounded(tbl.reset_index(names="Feature"))
+                        else:
+                            st.info("No expected feature columns found to summarize.")
     st.session_state.show_preview_modal = False
 # =========================
 <div style='text-align:center;color:#6b7280;font-size:0.8em;'>
     © 2024 Smart Thinking AI-Solutions Team. All rights reserved.
 </div>
+""", unsafe_allow_html=True)