UCS2014 commited on
Commit
031665d
·
verified ·
1 Parent(s): f8836fa

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -405
app.py DELETED
@@ -1,405 +0,0 @@
1
-
2
- import io, json, os
3
- from pathlib import Path
4
- import streamlit as st
5
- import pandas as pd
6
- import numpy as np
7
- import joblib
8
- import matplotlib.pyplot as plt
9
- from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
10
-
11
- # =========================
12
- # Defaults (overridden by models/meta.json or model.feature_names_in_)
13
- # =========================
14
- FEATURES = ["Q, gpm", "SPP(psi)", "T (kft.lbf)", "WOB (klbf)", "ROP (ft/h)"]
15
- TARGET = "UCS"
16
- MODELS_DIR = Path("models")
17
- DEFAULT_MODEL = MODELS_DIR / "ucs_rf.joblib"
18
- MODEL_FALLBACKS = [MODELS_DIR / "model.joblib", MODELS_DIR / "model.pkl"]
19
-
20
- # =========================
21
- # Page / Theme
22
- # =========================
23
- st.set_page_config(page_title="ST_GeoMech_UCS", page_icon="🛠️", layout="wide")
24
- st.markdown("<style>header, footer{visibility:hidden !important;}</style>", unsafe_allow_html=True)
25
- st.markdown("""
26
-
27
- <style>
28
- .stApp { background: #FFFFFF; }
29
- section[data-testid="stSidebar"] { background: #F6F9FC; }
30
- .sidebar-card{ border:1px solid #E5E7EB; border-radius:12px; background:#FFFFFF;
31
- padding:10px 12px; margin:8px 0; box-shadow:0 1px 3px rgba(0,0,0,.06); display:inline-block; }
32
- .sidebar-card h3{ margin:0; font-size:1rem; line-height:1.2; text-align:center; }
33
- .stButton>button{ background:#007bff; color:#fff; font-weight:bold; border-radius:8px; border:none; padding:10px 24px; }
34
- .stButton>button:hover{ background:#0056b3; }
35
- .pill { display:inline-block; padding:2px 10px; border-radius:999px; border:1px solid #e5e7eb; margin:2px; background:#fff; font-size:.9rem; }
36
- </style>
37
- """, unsafe_allow_html=True)
38
-
39
- # =========================
40
- # Helpers
41
- # =========================
42
- def rmse(y_true, y_pred):
43
- return float(np.sqrt(mean_squared_error(y_true, y_pred)))
44
-
45
- def ensure_cols(df, cols):
46
- miss = [c for c in cols if c not in df.columns]
47
- if miss:
48
- st.error(f"Missing columns: {miss}\nFound: {list(df.columns)}")
49
- return False
50
- return True
51
-
52
- @st.cache_resource(show_spinner=False)
53
- def load_model(model_path: str):
54
- return joblib.load(model_path)
55
-
56
- @st.cache_data(show_spinner=False)
57
- def parse_excel(data_bytes: bytes):
58
- bio = io.BytesIO(data_bytes)
59
- xl = pd.ExcelFile(bio)
60
- return {sh: xl.parse(sh) for sh in xl.sheet_names}
61
-
62
- def read_book(upload):
63
- if upload is None:
64
- return {}
65
- try:
66
- return parse_excel(upload.getvalue())
67
- except Exception as e:
68
- st.error(f"Failed to read Excel: {e}")
69
- return {}
70
-
71
- def find_sheet(book, names):
72
- low2orig = {k.lower(): k for k in book.keys()}
73
- for nm in names:
74
- if nm.lower() in low2orig:
75
- return low2orig[nm.lower()]
76
- return None
77
-
78
- def cross_plot(actual, pred, title, size=(5.6,5.6)):
79
- fig, ax = plt.subplots(figsize=size)
80
- ax.scatter(actual, pred, s=16, alpha=0.7)
81
- lo = float(np.nanmin([actual.min(), pred.min()]))
82
- hi = float(np.nanmax([actual.max(), pred.max()]))
83
- ax.plot([lo,hi], [lo,hi], '--')
84
- ax.set_xlabel("Actual UCS"); ax.set_ylabel("Predicted UCS"); ax.set_title(title)
85
- ax.grid(True, ls=":", alpha=0.4)
86
- return fig
87
-
88
- def depth_or_index_track(df, title, include_actual=True):
89
- # If a depth-like column exists, plot UCS vs Depth (depth downward); else index track
90
- depth_col = None
91
- for c in df.columns:
92
- if 'depth' in str(c).lower():
93
- depth_col = c; break
94
- fig, ax = plt.subplots(figsize=(5.8, 7.5))
95
- if depth_col is not None:
96
- ax.plot(df["UCS_Pred"], df[depth_col], label="UCS_Pred")
97
- if include_actual and TARGET in df.columns:
98
- ax.plot(df[TARGET], df[depth_col], alpha=0.7, label="UCS (actual)")
99
- ax.set_ylabel(depth_col); ax.set_xlabel("UCS")
100
- ax.xaxis.set_label_position('top'); ax.xaxis.tick_top(); ax.invert_yaxis()
101
- else:
102
- idx = np.arange(1, len(df) + 1)
103
- ax.plot(df["UCS_Pred"], idx, label="UCS_Pred")
104
- if include_actual and TARGET in df.columns:
105
- ax.plot(df[TARGET], idx, alpha=0.7, label="UCS (actual)")
106
- ax.set_ylabel("Point Index"); ax.set_xlabel("UCS")
107
- ax.xaxis.set_label_position('top'); ax.xaxis.tick_top(); ax.invert_yaxis()
108
- ax.grid(True, linestyle=":", alpha=0.4); ax.set_title(title, pad=12); ax.legend()
109
- return fig
110
-
111
- def export_workbook(sheets_dict, summary_df=None):
112
- try:
113
- import openpyxl # ensure engine available
114
- except Exception:
115
- raise RuntimeError("Export requires openpyxl. Please add it to requirements or install it.")
116
- buf = io.BytesIO()
117
- with pd.ExcelWriter(buf, engine="openpyxl") as xw:
118
- for name, frame in sheets_dict.items():
119
- frame.to_excel(xw, sheet_name=name[:31], index=False)
120
- if summary_df is not None:
121
- summary_df.to_excel(xw, sheet_name="Summary", index=False)
122
- return buf.getvalue()
123
-
124
- def toast(msg):
125
- try: st.toast(msg)
126
- except Exception: st.info(msg)
127
-
128
- def infer_features_from_model(m):
129
- # Attempt to get feature names from model or last pipeline step
130
- try:
131
- if hasattr(m, "feature_names_in_") and len(getattr(m, "feature_names_in_")):
132
- return [str(x) for x in m.feature_names_in_]
133
- except Exception:
134
- pass
135
- try:
136
- if hasattr(m, "steps") and len(m.steps):
137
- last = m.steps[-1][1]
138
- if hasattr(last, "feature_names_in_") and len(last.feature_names_in_):
139
- return [str(x) for x in last.feature_names_in_]
140
- except Exception:
141
- pass
142
- return None
143
-
144
- # =========================
145
- # Model availability (download on cloud if needed)
146
- # =========================
147
- def _get_model_url():
148
- # Works even if there is NO secrets.toml in the Space
149
- try:
150
- return (st.secrets.get("MODEL_URL", "") or os.environ.get("MODEL_URL", "") or "").strip()
151
- except Exception:
152
- return (os.environ.get("MODEL_URL", "") or "").strip()
153
-
154
- MODEL_URL = _get_model_url()
155
-
156
- def ensure_model_present() -> Path:
157
- # Check local paths first
158
- for p in [DEFAULT_MODEL, *MODEL_FALLBACKS]:
159
- if p.exists():
160
- return p
161
- # Download if MODEL_URL provided
162
- if MODEL_URL:
163
- try:
164
- import requests
165
- except Exception:
166
- st.error("requests is required to download the model. Add 'requests' to requirements.txt.")
167
- return None
168
- try:
169
- DEFAULT_MODEL.parent.mkdir(parents=True, exist_ok=True)
170
- with requests.get(MODEL_URL, stream=True) as r:
171
- r.raise_for_status()
172
- with open(DEFAULT_MODEL, "wb") as f:
173
- for chunk in r.iter_content(chunk_size=1<<20):
174
- f.write(chunk)
175
- return DEFAULT_MODEL
176
- except Exception as e:
177
- st.error(f"Failed to download model from MODEL_URL. {e}")
178
- return None
179
-
180
- model_path = ensure_model_present()
181
- if not model_path:
182
- st.error("Model not found. Upload models/ucs_rf.joblib (or set MODEL_URL in Secrets).")
183
- st.stop()
184
-
185
- # Load model
186
- try:
187
- model = load_model(str(model_path))
188
- except Exception as e:
189
- st.error(f"Failed to load model: {model_path}\n{e}")
190
- st.stop()
191
-
192
- # Meta overrides
193
- meta_path = MODELS_DIR / "meta.json"
194
- if meta_path.exists():
195
- try:
196
- meta = json.loads(meta_path.read_text(encoding="utf-8"))
197
- FEATURES = meta.get("features", FEATURES)
198
- TARGET = meta.get("target", TARGET)
199
- except Exception:
200
- pass
201
- else:
202
- infer = infer_features_from_model(model)
203
- if infer:
204
- FEATURES = infer
205
-
206
- # =========================
207
- # Session state
208
- # =========================
209
- if "app_step" not in st.session_state: st.session_state.app_step = "intro"
210
- if "results" not in st.session_state: st.session_state.results = {}
211
- if "train_ranges" not in st.session_state: st.session_state.train_ranges = None
212
-
213
- # =========================
214
- # Sidebar: Model & schema
215
- # =========================
216
- st.sidebar.markdown('<div class="sidebar-card"><h3>Model</h3>', unsafe_allow_html=True)
217
- st.sidebar.write(f"**Loaded:** `{Path(model_path).name}`")
218
- st.sidebar.write("**Target:**", TARGET)
219
- st.sidebar.write("**Features:**")
220
- for f in FEATURES:
221
- st.sidebar.markdown(f"<span class='pill'>{f}</span>", unsafe_allow_html=True)
222
- st.sidebar.markdown('</div>', unsafe_allow_html=True)
223
-
224
- # =========================
225
- # Intro
226
- # =========================
227
- st.title("ST_GeoMech_UCS")
228
- st.caption("Real-Time UCS Tracking While Drilling — Cloud Ready")
229
- if st.session_state.app_step == "intro":
230
- st.header("Welcome!")
231
- st.write("Upload Train/Test data, run the model, then go to Prediction.")
232
- if st.button("Start ▶", type="primary"):
233
- st.session_state.app_step = "dev"; st.rerun()
234
-
235
- # =========================
236
- # Development (Train/Test)
237
- # =========================
238
- if st.session_state.app_step == "dev":
239
- st.sidebar.markdown('<div class="sidebar-card"><h3>Model Development Data</h3>', unsafe_allow_html=True)
240
- train_test_file = st.sidebar.file_uploader("Upload Train/Test Excel", type=["xlsx","xls"], key="dev_upload")
241
- run_btn = st.sidebar.button("Run Model", type="primary", use_container_width=True)
242
- if "Train" in st.session_state.results or "Test" in st.session_state.results:
243
- st.sidebar.button("Go to Prediction ▶", use_container_width=True, on_click=lambda: st.session_state.update(app_step="predict"))
244
- st.sidebar.markdown('</div>', unsafe_allow_html=True)
245
-
246
- if run_btn and train_test_file is not None:
247
- with st.status("Processing…", expanded=False) as status:
248
- book = read_book(train_test_file)
249
- if not book: status.update(label="Failed to read workbook.", state="error"); st.stop()
250
- status.update(label="Workbook read ✓")
251
-
252
- sh_train = find_sheet(book, ["Train","Training","training2","train","training"])
253
- sh_test = find_sheet(book, ["Test","Testing","testing2","test","testing"])
254
- if sh_train is None or sh_test is None:
255
- status.update(label="Workbook must include Train/Training/training2 and Test/Testing/testing2.", state="error"); st.stop()
256
-
257
- df_tr = book[sh_train].copy(); df_te = book[sh_test].copy()
258
- if not (ensure_cols(df_tr, FEATURES + [TARGET]) and ensure_cols(df_te, FEATURES + [TARGET])):
259
- status.update(label="Missing required columns.", state="error"); st.stop()
260
- status.update(label="Columns validated ✓")
261
- status.update(label="Predicting…")
262
-
263
- df_tr["UCS_Pred"] = model.predict(df_tr[FEATURES])
264
- df_te["UCS_Pred"] = model.predict(df_te[FEATURES])
265
- st.session_state.results["Train"] = df_tr; st.session_state.results["Test"] = df_te
266
-
267
- st.session_state.results["metrics_train"] = {
268
- "R2": r2_score(df_tr[TARGET], df_tr["UCS_Pred"]),
269
- "RMSE": rmse(df_tr[TARGET], df_tr["UCS_Pred"]),
270
- "MAE": mean_absolute_error(df_tr[TARGET], df_tr["UCS_Pred"]),
271
- }
272
- st.session_state.results["metrics_test"] = {
273
- "R2": r2_score(df_te[TARGET], df_te["UCS_Pred"]),
274
- "RMSE": rmse(df_te[TARGET], df_te["UCS_Pred"]),
275
- "MAE": mean_absolute_error(df_te[TARGET], df_te["UCS_Pred"]),
276
- }
277
-
278
- tr_min = df_tr[FEATURES].min().to_dict(); tr_max = df_tr[FEATURES].max().to_dict()
279
- st.session_state.train_ranges = {f:(float(tr_min[f]), float(tr_max[f])) for f in FEATURES}
280
-
281
- status.update(label="Done ✓", state="complete"); toast("Model run complete 🚀")
282
-
283
- if "Train" in st.session_state.results or "Test" in st.session_state.results:
284
- tab1, tab2 = st.tabs(["Training", "Testing"])
285
- if "Train" in st.session_state.results:
286
- with tab1:
287
- df = st.session_state.results["Train"]; m = st.session_state.results["metrics_train"]
288
- c1,c2,c3 = st.columns(3); c1.metric("R²", f"{m['R2']:.4f}"); c2.metric("RMSE", f"{m['RMSE']:.4f}"); c3.metric("MAE", f"{m['MAE']:.4f}")
289
- left,right = st.columns(2)
290
- with left: st.pyplot(cross_plot(df[TARGET], df["UCS_Pred"], "Training: Actual vs Predicted"), use_container_width=True)
291
- with right: st.pyplot(depth_or_index_track(df, "Training: Depth/Index Track", include_actual=True), use_container_width=True)
292
- if "Test" in st.session_state.results:
293
- with tab2:
294
- df = st.session_state.results["Test"]; m = st.session_state.results["metrics_test"]
295
- c1,c2,c3 = st.columns(3); c1.metric("R²", f"{m['R2']:.4f}"); c2.metric("RMSE", f"{m['RMSE']:.4f}"); c3.metric("MAE", f"{m['MAE']:.4f}")
296
- left,right = st.columns(2)
297
- with left: st.pyplot(cross_plot(df[TARGET], df["UCS_Pred"], "Testing: Actual vs Predicted"), use_container_width=True)
298
- with right: st.pyplot(depth_or_index_track(df, "Testing: Depth/Index Track", include_actual=True), use_container_width=True)
299
-
300
- # Export Dev results
301
- st.markdown("---")
302
- sheets = {}; rows = []
303
- if "Train" in st.session_state.results:
304
- sheets["Train_with_pred"] = st.session_state.results["Train"]
305
- rows.append({"Split":"Train", **{k:round(v,6) for k,v in st.session_state.results["metrics_train"].items()}})
306
- if "Test" in st.session_state.results:
307
- sheets["Test_with_pred"] = st.session_state.results["Test"]
308
- rows.append({"Split":"Test", **{k:round(v,6) for k,v in st.session_state.results["metrics_test"].items()}})
309
- summary_df = pd.DataFrame(rows) if rows else None
310
- try:
311
- data_bytes = export_workbook(sheets, summary_df)
312
- st.download_button("Export Train/Test Results to Excel",
313
- data=data_bytes, file_name="UCS_Dev_Results.xlsx",
314
- mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
315
- except RuntimeError as e:
316
- st.warning(str(e))
317
-
318
- # =========================
319
- # Prediction (Validation)
320
- # =========================
321
- if st.session_state.app_step == "predict":
322
- st.sidebar.markdown('<div class="sidebar-card"><h3>Prediction (Validation)</h3>', unsafe_allow_html=True)
323
- validation_file = st.sidebar.file_uploader("Upload Validation Excel", type=["xlsx","xls"], key="val_upload")
324
- predict_btn = st.sidebar.button("Predict", type="primary", use_container_width=True)
325
- st.sidebar.button("⬅ Back", on_click=lambda: st.session_state.update(app_step="dev"), use_container_width=True)
326
- st.sidebar.markdown('</div>', unsafe_allow_html=True)
327
-
328
- if predict_btn and validation_file is not None:
329
- with st.status("Predicting…", expanded=False) as status:
330
- vbook = read_book(validation_file)
331
- if not vbook: status.update(label="Could not read the Validation Excel.", state="error"); st.stop()
332
- status.update(label="Workbook read ✓")
333
- vname = find_sheet(vbook, ["Validation","Validate","validation2","Val","val"]) or list(vbook.keys())[0]
334
- df_val = vbook[vname].copy()
335
- if not ensure_cols(df_val, FEATURES): status.update(label="Missing required columns.", state="error"); st.stop()
336
- status.update(label="Columns validated ✓")
337
- df_val["UCS_Pred"] = model.predict(df_val[FEATURES])
338
- st.session_state.results["Validate"] = df_val
339
-
340
- # OOR check: min–max vs training
341
- ranges = st.session_state.train_ranges; oor_table = None; oor_pct = 0.0
342
- if ranges:
343
- viol = {f: (df_val[f] < ranges[f][0]) | (df_val[f] > ranges[f][1]) for f in FEATURES}
344
- any_viol = pd.DataFrame(viol).any(axis=1); oor_pct = float(any_viol.mean()*100.0)
345
- if any_viol.any():
346
- offenders = df_val.loc[any_viol, FEATURES].copy()
347
- offenders["Violations"] = pd.DataFrame(viol).loc[any_viol].apply(lambda r: ", ".join([c for c,v in r.items() if v]), axis=1)
348
- offenders.index = offenders.index + 1; oor_table = offenders
349
-
350
- metrics_val = None
351
- if TARGET in df_val.columns:
352
- metrics_val = {
353
- "R2": r2_score(df_val[TARGET], df_val["UCS_Pred"]),
354
- "RMSE": rmse(df_val[TARGET], df_val["UCS_Pred"]),
355
- "MAE": mean_absolute_error(df_val[TARGET], df_val["UCS_Pred"])
356
- }
357
- st.session_state.results["metrics_val"] = metrics_val
358
- st.session_state.results["summary_val"] = {
359
- "n_points": len(df_val),
360
- "pred_min": float(df_val["UCS_Pred"].min()),
361
- "pred_max": float(df_val["UCS_Pred"].max()),
362
- "oor_pct": oor_pct
363
- }
364
- st.session_state.results["oor_table"] = oor_table
365
- status.update(label="Predictions ready ✓", state="complete")
366
-
367
- if "Validate" in st.session_state.results:
368
- st.subheader("Validation Results")
369
- sv = st.session_state.results["summary_val"]; oor_table = st.session_state.results.get("oor_table")
370
- c1,c2,c3,c4 = st.columns(4)
371
- c1.metric("# points", f"{sv['n_points']}"); c2.metric("Pred min", f"{sv['pred_min']:.2f}")
372
- c3.metric("Pred max", f"{sv['pred_max']:.2f}"); c4.metric("OOR %", f"{sv['oor_pct']:.1f}%")
373
- left,right = st.columns(2)
374
- with left:
375
- if TARGET in st.session_state.results["Validate"].columns:
376
- st.pyplot(cross_plot(st.session_state.results["Validate"][TARGET], st.session_state.results["Validate"]["UCS_Pred"], "Validation: Actual vs Predicted"), use_container_width=True)
377
- else:
378
- st.info("Actual UCS values are not available in the validation data. Cross-plot cannot be generated.")
379
- with right:
380
- st.pyplot(depth_or_index_track(st.session_state.results["Validate"], "Validation: Depth/Index Track", include_actual=(TARGET in st.session_state.results["Validate"].columns)), use_container_width=True)
381
- if oor_table is not None:
382
- st.write("*Out-of-range rows (vs. Training min–max):*")
383
- st.dataframe(oor_table, use_container_width=True)
384
-
385
- # Export
386
- st.markdown("---")
387
- sheets = {"Validate_with_pred": st.session_state.results["Validate"]}
388
- rows = []
389
- for name, key in [("Train","metrics_train"), ("Test","metrics_test"), ("Validate","metrics_val")]:
390
- m = st.session_state.results.get(key)
391
- if m: rows.append({"Split": name, **{k: round(v,6) for k,v in m.items()}})
392
- summary_df = pd.DataFrame(rows) if rows else None
393
- try:
394
- data_bytes = export_workbook(sheets, summary_df)
395
- st.download_button("Export Validation Results to Excel",
396
- data=data_bytes, file_name="UCS_Validation_Results.xlsx",
397
- mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
398
- except RuntimeError as e:
399
- st.warning(str(e))
400
-
401
- # =========================
402
- # Footer
403
- # =========================
404
- st.markdown("---")
405
- st.markdown("<div style='text-align:center; color:#6b7280;'>ST_GeoMech_UCS • © Smart Thinking</div>", unsafe_allow_html=True)