UCS2014 commited on
Commit
876c0a9
·
verified ·
1 Parent(s): 703b520

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -60
app.py CHANGED
@@ -1,4 +1,5 @@
1
  # app.py — ST_Sonic_Ts (Shear Slowness Ts)
 
2
  import io, json, os, base64, math
3
  from pathlib import Path
4
  import streamlit as st
@@ -22,9 +23,16 @@ from sklearn.metrics import mean_squared_error, mean_absolute_error
22
  APP_NAME = "ST_Log_Sonic (Ts)"
23
  TAGLINE = "Real-Time Shear Slowness (Ts) Prediction"
24
 
25
- # Defaults (will be overridden by meta if present)
26
- FEATURES = ["WOB(klbf)", "TORQUE(kft.lbf)", "SPP(psi)", "RPM(1/min)", "ROP(ft/h)", "Flow Rate, gpm"]
27
- TARGET = "Ts"
 
 
 
 
 
 
 
28
  PRED_COL = "Ts_Pred"
29
 
30
  MODELS_DIR = Path("models")
@@ -32,7 +40,7 @@ DEFAULT_MODEL = MODELS_DIR / "ts_model.joblib"
32
  MODEL_FALLBACKS = [MODELS_DIR / "model.joblib", MODELS_DIR / "model.pkl"]
33
  COLORS = {"pred": "#1f77b4", "actual": "#f2b702", "ref": "#5a5a5a"}
34
 
35
- # Toggle to show strict version banner from meta
36
  STRICT_VERSION_CHECK = False
37
 
38
  # ---- Plot sizing ----
@@ -50,14 +58,11 @@ st.set_page_config(page_title=APP_NAME, page_icon="logo.png", layout="wide")
50
  st.markdown("""
51
  <style>
52
  .brand-logo { width: 200px; height: auto; object-fit: contain; }
53
- .sidebar-header { display:flex; align-items:center; gap:12px; }
54
- .sidebar-header .text h1 { font-size: 1.05rem; margin:0; line-height:1.1; }
55
- .sidebar-header .text .tag { font-size: .85rem; color:#6b7280; margin:2px 0 0; }
56
  .centered-container { display: flex; flex-direction: column; align-items: center; text-align: center; }
57
  .st-message-box { background-color: #f0f2f6; color: #333; padding: 10px; border-radius: 10px; border: 1px solid #e6e9ef; }
58
  .st-message-box.st-success { background-color: #d4edda; color: #155724; border-color: #c3e6cb; }
59
  .st-message-box.st-warning { background-color: #fff3cd; color: #856404; border-color: #ffeeba; }
60
- .st-message-box.st-error { background-color: #f8d7da; color: #721c24; border-color: #f5c6cb; }
61
  .main .block-container { overflow: unset !important; }
62
  div[data-testid="stVerticalBlock"] { overflow: unset !important; }
63
  div[data-testid="stExpander"] > details > summary {
@@ -140,47 +145,49 @@ def parse_excel(data_bytes: bytes):
140
  xl = pd.ExcelFile(bio)
141
  return {sh: xl.parse(sh) for sh in xl.sheet_names}
142
 
143
- def read_book_bytes(b: bytes):
144
  return parse_excel(b) if b else {}
145
 
146
  # ---- Canonical feature aliasing ------------------------------------------
147
  def _build_alias_map(canonical_features: list[str], target_name: str) -> dict:
148
  """
149
- Returns a dict of common header variants -> canonical names as per the model's FEATURES.
150
- We choose the canonical for each family by checking which string exists in canonical_features.
151
  """
152
- def pick(expected_list, family_variants):
153
- # pick the first variant that exists in expected_list, else fall back to first in family_variants
154
- for v in family_variants:
155
  if v in expected_list:
156
  return v
157
- return family_variants[0]
158
 
159
- can_WOB = pick(canonical_features, ["WOB, klbf","WOB(klbf)","WOB (klbf)"])
160
- can_TORQUE = pick(canonical_features, ["Torque(kft.lbf)","TORQUE(kft.lbf)"])
161
- can_SPP = pick(canonical_features, ["SPP(psi)"])
162
- can_RPM = pick(canonical_features, ["RPM(1/min)","RPM (1/min)"])
163
- can_ROP = pick(canonical_features, ["ROP(ft/h)","ROP (ft/h)"])
164
- can_FR = pick(canonical_features, ["Flow Rate, gpm","Flow Rate , gpm","Flow Rate,gpm"])
 
165
 
166
  alias = {
167
- # WOB
168
- "WOB, klbf": can_WOB, "WOB(klbf)": can_WOB, "WOB (klbf)": can_WOB, "WOB( klbf)": can_WOB, "WOB , klbf": can_WOB,
169
- # Torque
170
- "Torque(kft.lbf)": can_TORQUE, "TORQUE(kft.lbf)": can_TORQUE,
171
- # SPP
172
- "SPP(psi)": can_SPP,
173
- # RPM
174
- "RPM(1/min)": can_RPM, "RPM (1/min)": can_RPM,
175
- # ROP
176
- "ROP(ft/h)": can_ROP, "ROP (ft/h)": can_ROP,
177
- # Flow
178
- "Flow Rate, gpm": can_FR, "Fow Rate, gpm": can_FR, "Fow Rate, gpm ": can_FR, "Flow Rate , gpm": can_FR, "Flow Rate,gpm": can_FR,
179
- # Depth (for plotting only)
180
- "Depth, ft": "Depth, ft", "Depth(ft)": "Depth, ft", "DEPTH, ft": "Depth, ft",
181
- # Ts targets (map all to the chosen TARGET)
182
- "Ts": target_name, "Ts,us/ft_Actual": target_name, "Ts, us/ft_Actual": target_name,
183
- "TS_Actual": target_name, "Ts (us/ft)_Actual": target_name
 
 
184
  }
185
  return alias
186
 
@@ -188,7 +195,6 @@ def _normalize_columns(df: pd.DataFrame, canonical_features: list[str], target_n
188
  out = df.copy()
189
  out.columns = [str(c).strip().replace(" ,", ",").replace(", ", ", ").replace(" ", " ") for c in out.columns]
190
  alias = _build_alias_map(canonical_features, target_name)
191
- # only rename keys that actually exist
192
  actual = {k: v for k, v in alias.items() if k in out.columns and k != v}
193
  return out.rename(columns=actual)
194
 
@@ -219,6 +225,17 @@ def df_centered_rounded(df: pd.DataFrame, hide_index=True):
219
  )
220
  st.dataframe(styler, use_container_width=True, hide_index=hide_index)
221
 
 
 
 
 
 
 
 
 
 
 
 
222
  # === Excel export helpers =================================================
223
  def _excel_engine() -> str:
224
  try:
@@ -497,27 +514,54 @@ def track_plot(df, include_actual=True):
497
 
498
  # ---------- Preview (matplotlib) ----------
499
  def preview_tracks(df: pd.DataFrame, cols: list[str]):
 
 
 
 
 
 
500
  cols = [c for c in cols if c in df.columns]
501
  n = len(cols)
502
  if n == 0:
503
  fig, ax = plt.subplots(figsize=(4, 2))
504
- ax.text(0.5,0.5,"No selected columns",ha="center",va="center"); ax.axis("off")
 
505
  return fig
506
- fig, axes = plt.subplots(1, n, figsize=(2.2*n, 7.0), sharey=True, dpi=100)
507
- if n == 1: axes = [axes]
508
  depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
509
  if depth_col is not None:
510
- idx = pd.to_numeric(df[depth_col], errors="coerce"); y_label = depth_col
 
511
  else:
512
- idx = pd.Series(np.arange(1, len(df) + 1)); y_label = "Point Index"
 
 
 
 
 
 
 
 
 
 
 
 
513
  for i, (ax, col) in enumerate(zip(axes, cols)):
514
- ax.plot(pd.to_numeric(df[col], errors="coerce"), idx, '-', lw=1.6, color="#333")
515
- ax.set_xlabel(col); ax.xaxis.set_label_position('top'); ax.xaxis.tick_top()
516
- ax.set_ylim(float(idx.max()), float(idx.min()))
 
 
 
517
  ax.grid(True, linestyle=":", alpha=0.3)
518
- if i == 0: ax.set_ylabel(y_label)
 
 
519
  else:
520
- ax.tick_params(labelleft=False); ax.set_ylabel("")
 
 
521
  fig.tight_layout()
522
  return fig
523
 
@@ -551,7 +595,7 @@ except Exception as e:
551
  st.error(f"Failed to load model: {e}")
552
  st.stop()
553
 
554
- # Prefer Ts meta
555
  meta = {}
556
  meta_candidates = [MODELS_DIR / "ts_meta.json", MODELS_DIR / "meta.json", MODELS_DIR / "ym_meta.json"]
557
  meta_path = next((p for p in meta_candidates if p.exists()), None)
@@ -564,7 +608,7 @@ if meta_path:
564
  except Exception as e:
565
  st.warning(f"Could not parse meta file ({meta_path.name}): {e}")
566
 
567
- # Optional: version banner (silenced by default)
568
  if STRICT_VERSION_CHECK and meta.get("versions"):
569
  import numpy as _np, sklearn as _skl
570
  mv = meta["versions"]; msg=[]
@@ -671,7 +715,6 @@ if st.session_state.app_step == "dev":
671
  st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training/training2 and Test/Testing/testing2 sheets.</div>', unsafe_allow_html=True)
672
  st.stop()
673
 
674
- # Use meta FEATURES as canonical when normalizing
675
  tr = _normalize_columns(book[sh_train].copy(), FEATURES, TARGET)
676
  te = _normalize_columns(book[sh_test].copy(), FEATURES, TARGET)
677
 
@@ -679,9 +722,9 @@ if st.session_state.app_step == "dev":
679
  st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True)
680
  st.stop()
681
 
682
- # Predict with exactly the columns the model was trained on
683
- tr[PRED_COL] = model.predict(tr[FEATURES])
684
- te[PRED_COL] = model.predict(te[FEATURES])
685
 
686
  st.session_state.results["Train"]=tr; st.session_state.results["Test"]=te
687
  st.session_state.results["m_train"]={
@@ -746,9 +789,9 @@ if st.session_state.app_step == "validate":
746
  book = read_book_bytes(up.getvalue())
747
  name = find_sheet(book, ["Validation","Validate","validation2","Val","val"]) or list(book.keys())[0]
748
  df = _normalize_columns(book[name].copy(), FEATURES, TARGET)
749
- if not ensure_cols(df, FEATURES+[TARGET]):
750
  st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
751
- df[PRED_COL] = model.predict(df[FEATURES])
752
  st.session_state.results["Validate"]=df
753
 
754
  ranges = st.session_state.train_ranges; oor_pct = 0.0; tbl=None
@@ -759,7 +802,9 @@ if st.session_state.app_step == "validate":
759
  tbl = df.loc[any_viol, FEATURES].copy()
760
  for c in FEATURES:
761
  if pd.api.types.is_numeric_dtype(tbl[c]): tbl[c] = tbl[c].round(2)
762
- tbl["Violations"] = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in FEATURES}).loc[any_viol].apply(lambda r:", ".join([c for c,v in r.items() if v]), axis=1)
 
 
763
  st.session_state.results["m_val"]={
764
  "R": pearson_r(df[TARGET], df[PRED_COL]),
765
  "RMSE": rmse(df[TARGET], df[PRED_COL]),
@@ -818,9 +863,9 @@ if st.session_state.app_step == "predict":
818
  if go_btn and up is not None:
819
  book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
820
  df = _normalize_columns(book[name].copy(), FEATURES, TARGET)
821
- if not ensure_cols(df, FEATURES):
822
  st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
823
- df[PRED_COL] = model.predict(df[FEATURES])
824
  st.session_state.results["PredictOnly"]=df
825
 
826
  ranges = st.session_state.train_ranges; oor_pct = 0.0
 
1
  # app.py — ST_Sonic_Ts (Shear Slowness Ts)
2
+
3
  import io, json, os, base64, math
4
  from pathlib import Path
5
  import streamlit as st
 
23
  APP_NAME = "ST_Log_Sonic (Ts)"
24
  TAGLINE = "Real-Time Shear Slowness (Ts) Prediction"
25
 
26
+ # Defaults (overridden by ts_meta.json if present)
27
+ FEATURES = [
28
+ "WOB (klbf)",
29
+ "Torque (kft.lbf)",
30
+ "SPP (psi)",
31
+ "RPM (1/min)",
32
+ "ROP (ft/h)",
33
+ "Flow Rate (gpm)",
34
+ ]
35
+ TARGET = "Ts (us/ft_Actual)"
36
  PRED_COL = "Ts_Pred"
37
 
38
  MODELS_DIR = Path("models")
 
40
  MODEL_FALLBACKS = [MODELS_DIR / "model.joblib", MODELS_DIR / "model.pkl"]
41
  COLORS = {"pred": "#1f77b4", "actual": "#f2b702", "ref": "#5a5a5a"}
42
 
43
+ # Optional env banner from meta
44
  STRICT_VERSION_CHECK = False
45
 
46
  # ---- Plot sizing ----
 
58
  st.markdown("""
59
  <style>
60
  .brand-logo { width: 200px; height: auto; object-fit: contain; }
 
 
 
61
  .centered-container { display: flex; flex-direction: column; align-items: center; text-align: center; }
62
  .st-message-box { background-color: #f0f2f6; color: #333; padding: 10px; border-radius: 10px; border: 1px solid #e6e9ef; }
63
  .st-message-box.st-success { background-color: #d4edda; color: #155724; border-color: #c3e6cb; }
64
  .st-message-box.st-warning { background-color: #fff3cd; color: #856404; border-color: #ffeeba; }
65
+ .st-message-box.st-error { background-color: #f8d7da; color: #721c24; border-color: #f5c6cb; }
66
  .main .block-container { overflow: unset !important; }
67
  div[data-testid="stVerticalBlock"] { overflow: unset !important; }
68
  div[data-testid="stExpander"] > details > summary {
 
145
  xl = pd.ExcelFile(bio)
146
  return {sh: xl.parse(sh) for sh in xl.sheet_names}
147
 
148
+ def read_book_bytes(b: bytes):
149
  return parse_excel(b) if b else {}
150
 
151
  # ---- Canonical feature aliasing ------------------------------------------
152
  def _build_alias_map(canonical_features: list[str], target_name: str) -> dict:
153
  """
154
+ Map common header variants -> the *canonical* names in canonical_features.
155
+ Whatever appears in canonical_features (from ts_meta.json) wins.
156
  """
157
+ def pick(expected_list, variants):
158
+ for v in variants:
 
159
  if v in expected_list:
160
  return v
161
+ return variants[0]
162
 
163
+ can_WOB = pick(canonical_features, ["WOB (klbf)", "WOB, klbf", "WOB(klbf)", "WOB( klbf)"])
164
+ can_TORQUE = pick(canonical_features, ["Torque (kft.lbf)", "Torque(kft.lbf)", "TORQUE(kft.lbf)"])
165
+ can_SPP = pick(canonical_features, ["SPP (psi)", "SPP(psi)"])
166
+ can_RPM = pick(canonical_features, ["RPM (1/min)", "RPM(1/min)"])
167
+ can_ROP = pick(canonical_features, ["ROP (ft/h)", "ROP(ft/h)"])
168
+ can_FR = pick(canonical_features, ["Flow Rate (gpm)", "Flow Rate, gpm", "Flow Rate,gpm", "Flow Rate , gpm", "Fow Rate, gpm", "Fow Rate, gpm "])
169
+ can_DEPTH = "Depth (ft)"
170
 
171
  alias = {
172
+ # Features
173
+ "WOB (klbf)": can_WOB, "WOB, klbf": can_WOB, "WOB(klbf)": can_WOB, "WOB( klbf)": can_WOB,
174
+ "Torque (kft.lbf)": can_TORQUE, "Torque(kft.lbf)": can_TORQUE, "TORQUE(kft.lbf)": can_TORQUE,
175
+ "SPP (psi)": can_SPP, "SPP(psi)": can_SPP,
176
+ "RPM (1/min)": can_RPM, "RPM(1/min)": can_RPM,
177
+ "ROP (ft/h)": can_ROP, "ROP(ft/h)": can_ROP,
178
+ "Flow Rate (gpm)": can_FR, "Flow Rate, gpm": can_FR, "Flow Rate,gpm": can_FR, "Flow Rate , gpm": can_FR,
179
+ "Fow Rate, gpm": can_FR, "Fow Rate, gpm ": can_FR,
180
+
181
+ # Depth (plot only)
182
+ "Depth (ft)": can_DEPTH, "Depth, ft": can_DEPTH, "Depth(ft)": can_DEPTH, "DEPTH, ft": can_DEPTH,
183
+
184
+ # Target family
185
+ "Ts (us/ft_Actual)": target_name,
186
+ "Ts,us/ft_Actual": target_name,
187
+ "Ts, us/ft_Actual": target_name,
188
+ "Ts": target_name,
189
+ "TS_Actual": target_name,
190
+ "Ts (us/ft)_Actual": target_name,
191
  }
192
  return alias
193
 
 
195
  out = df.copy()
196
  out.columns = [str(c).strip().replace(" ,", ",").replace(", ", ", ").replace(" ", " ") for c in out.columns]
197
  alias = _build_alias_map(canonical_features, target_name)
 
198
  actual = {k: v for k, v in alias.items() if k in out.columns and k != v}
199
  return out.rename(columns=actual)
200
 
 
225
  )
226
  st.dataframe(styler, use_container_width=True, hide_index=hide_index)
227
 
228
+ # ---------- Build X exactly as trained ----------
229
+ def _make_X(df: pd.DataFrame, features: list[str]) -> pd.DataFrame:
230
+ """
231
+ Reindex columns to the exact training feature order and coerce to numeric.
232
+ Prevents scikit-learn 'feature names should match' errors.
233
+ """
234
+ X = df.reindex(columns=features, copy=False)
235
+ for c in X.columns:
236
+ X[c] = pd.to_numeric(X[c], errors="coerce")
237
+ return X
238
+
239
  # === Excel export helpers =================================================
240
  def _excel_engine() -> str:
241
  try:
 
514
 
515
  # ---------- Preview (matplotlib) ----------
516
  def preview_tracks(df: pd.DataFrame, cols: list[str]):
517
+ """
518
+ Quick-look multi-track preview:
519
+ - one subplot per selected column
520
+ - distinct stable colors per column
521
+ - shared & reversed Y-axis (Depth downwards)
522
+ """
523
  cols = [c for c in cols if c in df.columns]
524
  n = len(cols)
525
  if n == 0:
526
  fig, ax = plt.subplots(figsize=(4, 2))
527
+ ax.text(0.5, 0.5, "No selected columns", ha="center", va="center")
528
+ ax.axis("off")
529
  return fig
530
+
531
+ # Depth or fallback to index
532
  depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
533
  if depth_col is not None:
534
+ idx = pd.to_numeric(df[depth_col], errors="coerce")
535
+ y_label = depth_col
536
  else:
537
+ idx = pd.Series(np.arange(1, len(df) + 1))
538
+ y_label = "Point Index"
539
+
540
+ y_min, y_max = float(idx.min()), float(idx.max())
541
+
542
+ # Stable qualitative palette
543
+ cmap = plt.get_cmap("tab20")
544
+ col_colors = {col: cmap(i % cmap.N) for i, col in enumerate(cols)}
545
+
546
+ fig, axes = plt.subplots(1, n, figsize=(2.3 * n, 7.0), sharey=True, dpi=100)
547
+ if n == 1:
548
+ axes = [axes]
549
+
550
  for i, (ax, col) in enumerate(zip(axes, cols)):
551
+ x = pd.to_numeric(df[col], errors="coerce")
552
+ ax.plot(x, idx, '-', lw=1.8, color=col_colors[col])
553
+ ax.set_xlabel(col)
554
+ ax.xaxis.set_label_position('top')
555
+ ax.xaxis.tick_top()
556
+ ax.set_ylim(y_max, y_min) # reversed Y (Depth down)
557
  ax.grid(True, linestyle=":", alpha=0.3)
558
+
559
+ if i == 0:
560
+ ax.set_ylabel(y_label)
561
  else:
562
+ ax.tick_params(labelleft=False)
563
+ ax.set_ylabel("")
564
+
565
  fig.tight_layout()
566
  return fig
567
 
 
595
  st.error(f"Failed to load model: {e}")
596
  st.stop()
597
 
598
+ # Load meta (prefer Ts-specific)
599
  meta = {}
600
  meta_candidates = [MODELS_DIR / "ts_meta.json", MODELS_DIR / "meta.json", MODELS_DIR / "ym_meta.json"]
601
  meta_path = next((p for p in meta_candidates if p.exists()), None)
 
608
  except Exception as e:
609
  st.warning(f"Could not parse meta file ({meta_path.name}): {e}")
610
 
611
+ # Optional: version banner
612
  if STRICT_VERSION_CHECK and meta.get("versions"):
613
  import numpy as _np, sklearn as _skl
614
  mv = meta["versions"]; msg=[]
 
715
  st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training/training2 and Test/Testing/testing2 sheets.</div>', unsafe_allow_html=True)
716
  st.stop()
717
 
 
718
  tr = _normalize_columns(book[sh_train].copy(), FEATURES, TARGET)
719
  te = _normalize_columns(book[sh_test].copy(), FEATURES, TARGET)
720
 
 
722
  st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True)
723
  st.stop()
724
 
725
+ # Predict with exactly the training feature order
726
+ tr[PRED_COL] = model.predict(_make_X(tr, FEATURES))
727
+ te[PRED_COL] = model.predict(_make_X(te, FEATURES))
728
 
729
  st.session_state.results["Train"]=tr; st.session_state.results["Test"]=te
730
  st.session_state.results["m_train"]={
 
789
  book = read_book_bytes(up.getvalue())
790
  name = find_sheet(book, ["Validation","Validate","validation2","Val","val"]) or list(book.keys())[0]
791
  df = _normalize_columns(book[name].copy(), FEATURES, TARGET)
792
+ if not ensure_cols(df, FEATURES+[TARGET]):
793
  st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
794
+ df[PRED_COL] = model.predict(_make_X(df, FEATURES))
795
  st.session_state.results["Validate"]=df
796
 
797
  ranges = st.session_state.train_ranges; oor_pct = 0.0; tbl=None
 
802
  tbl = df.loc[any_viol, FEATURES].copy()
803
  for c in FEATURES:
804
  if pd.api.types.is_numeric_dtype(tbl[c]): tbl[c] = tbl[c].round(2)
805
+ tbl["Violations"] = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in FEATURES}).loc[any_viol].apply(
806
+ lambda r:", ".join([c for c,v in r.items() if v]), axis=1
807
+ )
808
  st.session_state.results["m_val"]={
809
  "R": pearson_r(df[TARGET], df[PRED_COL]),
810
  "RMSE": rmse(df[TARGET], df[PRED_COL]),
 
863
  if go_btn and up is not None:
864
  book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
865
  df = _normalize_columns(book[name].copy(), FEATURES, TARGET)
866
+ if not ensure_cols(df, FEATURES):
867
  st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
868
+ df[PRED_COL] = model.predict(_make_X(df, FEATURES))
869
  st.session_state.results["PredictOnly"]=df
870
 
871
  ranges = st.session_state.train_ranges; oor_pct = 0.0