UCS2014 commited on
Commit
b6e23ab
·
verified ·
1 Parent(s): b914b72

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -28
app.py CHANGED
@@ -20,10 +20,12 @@ from sklearn.metrics import mean_squared_error, mean_absolute_error
20
  # =========================
21
  APP_NAME = "ST_GR"
22
  TAGLINE = "Gamma Ray Prediction"
 
23
  # If meta.json is present, these will be overridden
24
  FEATURES = ["GPM", "SPP", "RPM", "WOB", "T", "ROP"]
25
- TARGET = "log(GR)" # or "GR" if your model predicts GR directly
26
- PRED_POSTPROCESS = "exp" # or None if direct GR
 
27
 
28
  MODELS_DIR = Path("models")
29
  DEFAULT_MODEL = MODELS_DIR / "gr_rf.joblib"
@@ -63,7 +65,7 @@ div[data-testid="stVerticalBlock"] { overflow: unset !important; }
63
  </style>
64
  """, unsafe_allow_html=True)
65
 
66
- # Hide uploader helper text ("Drag and drop file here", limits, etc.)
67
  st.markdown("""
68
  <style>
69
  section[data-testid="stFileUploader"] div[data-testid="stMarkdownContainer"]{display:none !important;}
@@ -166,7 +168,13 @@ def parse_excel(data_bytes: bytes):
166
  xl = pd.ExcelFile(bio)
167
  return {sh: xl.parse(sh) for sh in xl.sheet_names}
168
 
169
- def read_book_bytes(b: bytes): return parse_excel(b) if b else {}
 
 
 
 
 
 
170
 
171
  def ensure_cols(df, cols):
172
  miss = [c for c in cols if c not in df.columns]
@@ -211,10 +219,8 @@ def to_actual_series(df: pd.DataFrame, target_col: str, actual_col_hint: str, tr
211
  """
212
  if actual_col_hint and actual_col_hint in df.columns:
213
  return pd.Series(df[actual_col_hint], dtype=float)
214
- # else, if target exists, invert:
215
  if target_col in df.columns:
216
  return pd.Series(inverse_target(np.asarray(df[target_col], dtype=float), transform), dtype=float)
217
- # fallback: if a column named "GR" exists, use it
218
  if "GR" in df.columns:
219
  return pd.Series(df["GR"], dtype=float)
220
  raise ValueError("Cannot find actual GR column or target to invert.")
@@ -222,8 +228,7 @@ def to_actual_series(df: pd.DataFrame, target_col: str, actual_col_hint: str, tr
222
  # =========================
223
  # Cross plot (Matplotlib) — auto limits for GR
224
  # =========================
225
- def _nice_bounds(arr_min, arr_max, n_ticks=5):
226
- # pick a "nice" range and step for GR (typically 0–200+ API)
227
  if not np.isfinite(arr_min) or not np.isfinite(arr_max):
228
  return 0.0, 100.0, 20.0
229
  span = arr_max - arr_min
@@ -241,7 +246,6 @@ def cross_plot_static(actual, pred):
241
  a = pd.Series(actual, dtype=float)
242
  p = pd.Series(pred, dtype=float)
243
 
244
- # auto bounds & ticks for GR
245
  lo = min(a.min(), p.min())
246
  hi = max(a.max(), p.max())
247
  fixed_min, fixed_max, step = _nice_bounds(lo, hi, n_ticks=6)
@@ -262,7 +266,7 @@ def cross_plot_static(actual, pred):
262
  ax.set_ylim(fixed_min, fixed_max)
263
  ax.set_xticks(ticks)
264
  ax.set_yticks(ticks)
265
- ax.set_aspect("equal", adjustable="box") # true 1:1
266
 
267
  fmt = FuncFormatter(lambda x, _: f"{int(x):,}")
268
  ax.xaxis.set_major_formatter(fmt)
@@ -517,7 +521,7 @@ if st.session_state.app_step == "dev":
517
  if sh_train is None or sh_test is None:
518
  st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training and Test/Testing sheets.</div>', unsafe_allow_html=True)
519
  st.stop()
520
- tr = book[sh_train].copy(); te = book[sh_test].copy()
521
  if not (ensure_cols(tr, FEATURES) and ensure_cols(te, FEATURES)):
522
  st.markdown('<div class="st-message-box st-error">Missing required feature columns.</div>', unsafe_allow_html=True); st.stop()
523
 
@@ -562,9 +566,9 @@ if st.session_state.app_step == "dev":
562
 
563
  col_track, col_cross = st.columns([2, 3], gap="large")
564
  with col_track:
 
565
  st.plotly_chart(
566
- track_plot(df.rename(columns={"GR_Actual":"GR"}), include_actual=True,
567
- pred_col="GR_Pred", actual_col="GR"),
568
  use_container_width=False,
569
  config={"displayModeBar": False, "scrollZoom": True}
570
  )
@@ -600,16 +604,18 @@ if st.session_state.app_step == "validate":
600
  if go_btn and up is not None:
601
  book = read_book_bytes(up.getvalue())
602
  name = find_sheet(book, ["Validation","Validate","validation2","Val","val"]) or list(book.keys())[0]
603
- df = book[name].copy()
604
- if not ensure_cols(df, FEATURES): st.markdown('<div class="st-message-box st-error">Missing required feature columns.</div>', unsafe_allow_html=True); st.stop()
 
 
605
 
606
  pred_raw = model.predict(df[FEATURES])
607
  df["GR_Pred"] = inverse_target(np.asarray(pred_raw, dtype=float), TARGET_TRANSFORM)
608
- # actual GR
609
  try:
610
  df["GR_Actual"] = to_actual_series(df, TARGET, ACTUAL_COL, TARGET_TRANSFORM)
611
  except Exception:
612
- st.markdown('<div class="st-message-box st-error">Validation sheet must include actual GR (or a target column that can be inverse-transformed).</div>', unsafe_allow_html=True); st.stop()
 
613
 
614
  st.session_state.results["Validate"]=df
615
 
@@ -685,8 +691,10 @@ if st.session_state.app_step == "predict":
685
 
686
  if go_btn and up is not None:
687
  book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
688
- df = book[name].copy()
689
- if not ensure_cols(df, FEATURES): st.markdown('<div class="st-message-box st-error">Missing required feature columns.</div>', unsafe_allow_html=True); st.stop()
 
 
690
 
691
  pred_raw = model.predict(df[FEATURES])
692
  df["GR_Pred"] = inverse_target(np.asarray(pred_raw, dtype=float), TARGET_TRANSFORM)
@@ -724,8 +732,7 @@ if st.session_state.app_step == "predict":
724
  st.caption("**★ OOR** = % of rows whose input features fall outside the training min–max range.")
725
  with col_right:
726
  st.plotly_chart(
727
- track_plot(df.rename(columns={"GR_Pred":"GR_Pred"}), include_actual=False,
728
- pred_col="GR_Pred", actual_col="GR"),
729
  use_container_width=False, config={"displayModeBar": False, "scrollZoom": True}
730
  )
731
 
@@ -747,15 +754,23 @@ if st.session_state.show_preview_modal:
747
  tabs = st.tabs(names)
748
  for t, name in zip(tabs, names):
749
  with t:
750
- df = book_to_preview[name]
751
  t1, t2 = st.tabs(["Tracks", "Summary"])
752
  with t1:
753
- st.pyplot(preview_tracks(df, FEATURES), use_container_width=True)
 
 
 
 
754
  with t2:
755
- tbl = (df[FEATURES]
756
- .agg(['min','max','mean','std'])
757
- .T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"}))
758
- df_centered_rounded(tbl.reset_index(names="Feature"))
 
 
 
 
759
  st.session_state.show_preview_modal = False
760
 
761
  # =========================
@@ -767,4 +782,4 @@ st.markdown("""
767
  <div style='text-align:center;color:#6b7280;font-size:0.8em;'>
768
  © 2024 Smart Thinking AI-Solutions Team. All rights reserved.
769
  </div>
770
- """, unsafe_allow_html=True)
 
20
  # =========================
21
  APP_NAME = "ST_GR"
22
  TAGLINE = "Gamma Ray Prediction"
23
+
24
  # If meta.json is present, these will be overridden
25
  FEATURES = ["GPM", "SPP", "RPM", "WOB", "T", "ROP"]
26
+ TARGET = "log(GR)" # or "GR" if your model predicts GR directly
27
+ TARGET_TRANSFORM = "ln" # "ln" (exp back), "log10" (10**x), or "none"
28
+ ACTUAL_COL = "GR" # where the actual API values live (if present)
29
 
30
  MODELS_DIR = Path("models")
31
  DEFAULT_MODEL = MODELS_DIR / "gr_rf.joblib"
 
65
  </style>
66
  """, unsafe_allow_html=True)
67
 
68
+ # Hide uploader helper text
69
  st.markdown("""
70
  <style>
71
  section[data-testid="stFileUploader"] div[data-testid="stMarkdownContainer"]{display:none !important;}
 
168
  xl = pd.ExcelFile(bio)
169
  return {sh: xl.parse(sh) for sh in xl.sheet_names}
170
 
171
+ def read_book_bytes(b: bytes):
172
+ return parse_excel(b) if b else {}
173
+
174
+ def normalize_df(df: pd.DataFrame) -> pd.DataFrame:
175
+ out = df.copy()
176
+ out.columns = [str(c).strip() for c in out.columns]
177
+ return out
178
 
179
  def ensure_cols(df, cols):
180
  miss = [c for c in cols if c not in df.columns]
 
219
  """
220
  if actual_col_hint and actual_col_hint in df.columns:
221
  return pd.Series(df[actual_col_hint], dtype=float)
 
222
  if target_col in df.columns:
223
  return pd.Series(inverse_target(np.asarray(df[target_col], dtype=float), transform), dtype=float)
 
224
  if "GR" in df.columns:
225
  return pd.Series(df["GR"], dtype=float)
226
  raise ValueError("Cannot find actual GR column or target to invert.")
 
228
  # =========================
229
  # Cross plot (Matplotlib) — auto limits for GR
230
  # =========================
231
+ def _nice_bounds(arr_min, arr_max, n_ticks=6):
 
232
  if not np.isfinite(arr_min) or not np.isfinite(arr_max):
233
  return 0.0, 100.0, 20.0
234
  span = arr_max - arr_min
 
246
  a = pd.Series(actual, dtype=float)
247
  p = pd.Series(pred, dtype=float)
248
 
 
249
  lo = min(a.min(), p.min())
250
  hi = max(a.max(), p.max())
251
  fixed_min, fixed_max, step = _nice_bounds(lo, hi, n_ticks=6)
 
266
  ax.set_ylim(fixed_min, fixed_max)
267
  ax.set_xticks(ticks)
268
  ax.set_yticks(ticks)
269
+ ax.set_aspect("equal", adjustable="box")
270
 
271
  fmt = FuncFormatter(lambda x, _: f"{int(x):,}")
272
  ax.xaxis.set_major_formatter(fmt)
 
521
  if sh_train is None or sh_test is None:
522
  st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training and Test/Testing sheets.</div>', unsafe_allow_html=True)
523
  st.stop()
524
+ tr = normalize_df(book[sh_train].copy()); te = normalize_df(book[sh_test].copy())
525
  if not (ensure_cols(tr, FEATURES) and ensure_cols(te, FEATURES)):
526
  st.markdown('<div class="st-message-box st-error">Missing required feature columns.</div>', unsafe_allow_html=True); st.stop()
527
 
 
566
 
567
  col_track, col_cross = st.columns([2, 3], gap="large")
568
  with col_track:
569
+ df_for_plot = df.rename(columns={"GR_Actual":"GR"})
570
  st.plotly_chart(
571
+ track_plot(df_for_plot, include_actual=True, pred_col="GR_Pred", actual_col="GR"),
 
572
  use_container_width=False,
573
  config={"displayModeBar": False, "scrollZoom": True}
574
  )
 
604
  if go_btn and up is not None:
605
  book = read_book_bytes(up.getvalue())
606
  name = find_sheet(book, ["Validation","Validate","validation2","Val","val"]) or list(book.keys())[0]
607
+ df = normalize_df(book[name].copy())
608
+ if not ensure_cols(df, FEATURES):
609
+ st.markdown('<div class="st-message-box st-error">Missing required feature columns.</div>', unsafe_allow_html=True);
610
+ st.stop()
611
 
612
  pred_raw = model.predict(df[FEATURES])
613
  df["GR_Pred"] = inverse_target(np.asarray(pred_raw, dtype=float), TARGET_TRANSFORM)
 
614
  try:
615
  df["GR_Actual"] = to_actual_series(df, TARGET, ACTUAL_COL, TARGET_TRANSFORM)
616
  except Exception:
617
+ st.markdown('<div class="st-message-box st-error">Validation sheet must include actual GR (or a target column that can be inverse-transformed).</div>', unsafe_allow_html=True);
618
+ st.stop()
619
 
620
  st.session_state.results["Validate"]=df
621
 
 
691
 
692
  if go_btn and up is not None:
693
  book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
694
+ df = normalize_df(book[name].copy())
695
+ if not ensure_cols(df, FEATURES):
696
+ st.markdown('<div class="st-message-box st-error">Missing required feature columns.</div>', unsafe_allow_html=True);
697
+ st.stop()
698
 
699
  pred_raw = model.predict(df[FEATURES])
700
  df["GR_Pred"] = inverse_target(np.asarray(pred_raw, dtype=float), TARGET_TRANSFORM)
 
732
  st.caption("**★ OOR** = % of rows whose input features fall outside the training min–max range.")
733
  with col_right:
734
  st.plotly_chart(
735
+ track_plot(df, include_actual=False, pred_col="GR_Pred", actual_col="GR"),
 
736
  use_container_width=False, config={"displayModeBar": False, "scrollZoom": True}
737
  )
738
 
 
754
  tabs = st.tabs(names)
755
  for t, name in zip(tabs, names):
756
  with t:
757
+ df = normalize_df(book_to_preview[name])
758
  t1, t2 = st.tabs(["Tracks", "Summary"])
759
  with t1:
760
+ if any(c in df.columns for c in FEATURES):
761
+ st.pyplot(preview_tracks(df, FEATURES), use_container_width=True)
762
+ else:
763
+ st.info(f"None of the expected feature columns were found in this sheet. "
764
+ f"Expected any of: {FEATURES}. Found: {list(df.columns)}")
765
  with t2:
766
+ present = [c for c in FEATURES if c in df.columns]
767
+ if present:
768
+ tbl = (df[present]
769
+ .agg(['min','max','mean','std'])
770
+ .T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"}))
771
+ df_centered_rounded(tbl.reset_index(names="Feature"))
772
+ else:
773
+ st.info("No expected feature columns found to summarize.")
774
  st.session_state.show_preview_modal = False
775
 
776
  # =========================
 
782
  <div style='text-align:center;color:#6b7280;font-size:0.8em;'>
783
  © 2024 Smart Thinking AI-Solutions Team. All rights reserved.
784
  </div>
785
+ """, unsafe_allow_html=True)