UCS2014 commited on
Commit
e95c275
·
verified ·
1 Parent(s): 7ee81ea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +627 -622
app.py CHANGED
@@ -1,4 +1,3 @@
1
- # app.py — ST_Log_GR (Gamma Ray) app adapted from your UCS app, same flow & design
2
  import io, json, os, base64, math
3
  from pathlib import Path
4
  import streamlit as st
@@ -16,89 +15,128 @@ import plotly.graph_objects as go
16
  from sklearn.metrics import mean_squared_error, mean_absolute_error
17
 
18
  # =========================
19
- # Constants (GR)
20
  # =========================
21
- APP_NAME = "ST_Log_GR"
22
- TAGLINE = "Gamma Ray Prediction Using Drilling Data"
23
- # If meta.json is present, these will be overridden
24
- FEATURES = ["Feat1","Feat2","Feat3","Feat4","Feat5","Feat6"] # 6 inputs (placeholder; meta.json wins)
25
- TARGET = "log_GR" # typical training target; meta.json wins
26
- TARGET_TRANSFORM = "log10" # "log10" | "ln" | "none" (meta.json wins)
27
- ACTUAL_COL = "GR" # if present in sheets; if not, we'll derive from TARGET + transform
28
-
29
  MODELS_DIR = Path("models")
30
  DEFAULT_MODEL = MODELS_DIR / "gr_rf.joblib"
31
  MODEL_FALLBACKS = [MODELS_DIR / "model.joblib", MODELS_DIR / "model.pkl"]
32
-
33
  COLORS = {"pred": "#1f77b4", "actual": "#f2b702", "ref": "#5a5a5a"}
34
 
35
  # ---- Plot sizing controls ----
36
- CROSS_W = 350 # px (matplotlib figure size; Streamlit will still scale)
37
  CROSS_H = 350
38
- TRACK_H = 1000 # px (plotly height)
39
- TRACK_W = 500 # px (plotly width)
40
- FONT_SZ = 13
41
- BOLD_FONT = "Arial Black, Arial, sans-serif"
 
42
 
43
  # =========================
44
  # Page / CSS
45
  # =========================
46
- st.set_page_config(page_title=APP_NAME, page_icon="logo.png", layout="wide")
47
 
48
- # General CSS
49
  st.markdown("""
50
  <style>
51
  .brand-logo { width: 200px; height: auto; object-fit: contain; }
52
  .sidebar-header { display:flex; align-items:center; gap:12px; }
53
  .sidebar-header .text h1 { font-size: 1.05rem; margin:0; line-height:1.1; }
54
  .sidebar-header .text .tag { font-size: .85rem; color:#6b7280; margin:2px 0 0; }
55
- .centered-container { display:flex; flex-direction:column; align-items:center; text-align:center; }
 
 
 
 
 
56
  </style>
57
  """, unsafe_allow_html=True)
58
 
59
- # Allow sticky bits (preview expander header & tabs)
60
  st.markdown("""
61
  <style>
62
- .main .block-container { overflow: unset !important; }
63
- div[data-testid="stVerticalBlock"] { overflow: unset !important; }
 
 
 
 
 
 
 
64
  </style>
65
  """, unsafe_allow_html=True)
66
 
67
  # Hide uploader helper text ("Drag and drop file here", limits, etc.)
68
  st.markdown("""
69
  <style>
 
70
  section[data-testid="stFileUploader"] div[data-testid="stMarkdownContainer"]{display:none !important;}
 
71
  section[data-testid="stFileUploader"] [data-testid="stFileUploaderDropzone"] > div:first-child{display:none !important;}
 
72
  section[data-testid="stFileUploader"] [data-testid="stFileUploaderInstructions"]{display:none !important;}
 
73
  section[data-testid="stFileUploader"] p, section[data-testid="stFileUploader"] small{display:none !important;}
74
  </style>
75
  """, unsafe_allow_html=True)
76
 
77
- # Sticky Preview expander & its tabs
78
  st.markdown("""
79
  <style>
80
  div[data-testid="stExpander"] > details > summary {
81
- position: sticky; top: 0; z-index: 10; background: #fff; border-bottom: 1px solid #eee;
 
 
 
 
82
  }
83
  div[data-testid="stExpander"] div[data-baseweb="tab-list"] {
84
- position: sticky; top: 42px; z-index: 9; background: #fff; padding-top: 6px;
 
 
 
 
85
  }
86
  </style>
87
  """, unsafe_allow_html=True)
88
 
89
  # Center text in all pandas Styler tables (headers + cells)
90
  TABLE_CENTER_CSS = [
91
- dict(selector="th", props=[("text-align", "center")]),
92
- dict(selector="td", props=[("text-align", "center")]),
93
  ]
94
 
95
- # Message box styles
96
  st.markdown("""
97
  <style>
98
- .st-message-box { background:#f0f2f6; color:#333; padding:10px; border-radius:10px; border:1px solid #e6e9ef; }
99
- .st-message-box.st-success { background:#d4edda; color:#155724; border-color:#c3e6cb; }
100
- .st-message-box.st-warning { background:#fff3cd; color:#856404; border-color:#ffeeba; }
101
- .st-message-box.st-error { background:#f8d7da; color:#721c24; border-color:#f5c6cb; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  </style>
103
  """, unsafe_allow_html=True)
104
 
@@ -106,357 +144,307 @@ st.markdown("""
106
  # Password gate
107
  # =========================
108
  def inline_logo(path="logo.png") -> str:
109
- try:
110
- p = Path(path)
111
- if not p.exists(): return ""
112
- return f"data:image/png;base64,{base64.b64encode(p.read_bytes()).decode('ascii')}"
113
- except Exception:
114
- return ""
115
 
116
  def add_password_gate() -> None:
117
- try:
118
- required = st.secrets.get("APP_PASSWORD", "")
119
- except Exception:
120
- required = os.environ.get("APP_PASSWORD", "")
121
-
122
- if not required:
123
- st.warning("Set APP_PASSWORD in Secrets (or environment) and restart.")
124
- st.stop()
125
-
126
- if st.session_state.get("auth_ok", False):
127
- return
128
-
129
- st.sidebar.markdown(f"""
130
- <div class="centered-container">
131
- <img src="{inline_logo('logo.png')}" style="width: 200px; height: auto; object-fit: contain;">
132
- <div style='font-weight:800;font-size:1.2rem;'>{APP_NAME}</div>
133
- <div style='color:#667085;'>Smart Thinking • Secure Access</div>
134
- </div>
135
- """, unsafe_allow_html=True
136
- )
137
- pwd = st.sidebar.text_input("Access key", type="password", placeholder="••••••••")
138
- if st.sidebar.button("Unlock", type="primary"):
139
- if pwd == required:
140
- st.session_state.auth_ok = True
141
- st.rerun()
142
- else:
143
- st.error("Incorrect key.")
144
  st.stop()
145
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  add_password_gate()
147
 
148
  # =========================
149
  # Utilities
150
  # =========================
151
  def rmse(y_true, y_pred) -> float:
152
- return float(np.sqrt(mean_squared_error(y_true, y_pred)))
153
 
154
  def pearson_r(y_true, y_pred) -> float:
155
- a = np.asarray(y_true, dtype=float)
156
- p = np.asarray(y_pred, dtype=float)
157
- if a.size < 2: return float("nan")
158
- return float(np.corrcoef(a, p)[0, 1])
159
 
160
  @st.cache_resource(show_spinner=False)
161
  def load_model(model_path: str):
162
- return joblib.load(model_path)
163
 
164
  @st.cache_data(show_spinner=False)
165
  def parse_excel(data_bytes: bytes):
166
- bio = io.BytesIO(data_bytes)
167
- xl = pd.ExcelFile(bio)
168
- return {sh: xl.parse(sh) for sh in xl.sheet_names}
169
 
170
  def read_book_bytes(b: bytes): return parse_excel(b) if b else {}
171
 
172
  def ensure_cols(df, cols):
173
- miss = [c for c in cols if c not in df.columns]
174
- if miss:
175
- st.error(f"Missing columns: {miss}\nFound: {list(df.columns)}")
176
- return False
177
- return True
178
 
179
  def find_sheet(book, names):
180
- low2orig = {k.lower(): k for k in book.keys()}
181
- for nm in names:
182
- if nm.lower() in low2orig: return low2orig[nm.lower()]
183
- return None
184
 
185
- def _nice_tick0(xmin: float, step: int = 5) -> float:
186
- return step * math.floor(xmin / step) if np.isfinite(xmin) else xmin
187
 
188
  def df_centered_rounded(df: pd.DataFrame, hide_index=True):
189
- out = df.copy()
190
- numcols = out.select_dtypes(include=[np.number]).columns
191
- styler = (
192
- out.style
193
- .format({c: "{:.2f}" for c in numcols})
194
- .set_properties(**{"text-align": "center"})
195
- .set_table_styles(TABLE_CENTER_CSS)
196
- )
197
- st.dataframe(styler, use_container_width=True, hide_index=hide_index)
198
- def safe_summary(df: pd.DataFrame, features: list[str]) -> None:
199
- """
200
- Show a centered, 2-decimal summary for whatever expected features
201
- actually exist in `df`. Never crashes if some are missing or non-numeric.
202
- """
203
- # Which expected columns are present / missing in this sheet?
204
- present = [c for c in features if c in df.columns]
205
- missing = [c for c in features if c not in df.columns]
206
-
207
- if not present:
208
- st.info(
209
- "None of the expected feature columns were found in this sheet.\n"
210
- f"Expected any of: {features}\n"
211
- f"Found: {list(df.columns)}"
212
- )
213
- return
214
-
215
- if missing:
216
- st.caption(f"Columns not found in this sheet (omitted): {missing}")
217
-
218
- # Work only with present columns and coerce to numeric (text → NaN)
219
- work = df[present].copy()
220
- for c in present:
221
- work[c] = pd.to_numeric(work[c], errors="coerce")
222
-
223
- # Build stats, 2 decimals, then show centered
224
- tbl = (
225
- work.agg(["min", "max", "mean", "std"])
226
- .T.rename(columns={"min": "Min", "max": "Max", "mean": "Mean", "std": "Std"})
227
- .round(2)
228
- )
229
- df_centered_rounded(tbl.reset_index(names="Feature"))
230
-
231
- # Small note if coercion created NaNs
232
- if work.isna().any().any():
233
- st.caption("Note: non-numeric values were ignored after numeric coercion.")
234
-
235
-
236
- # --- target transform helpers (to support models trained on log(GR)) ---
237
- def inverse_target(x: np.ndarray, transform: str) -> np.ndarray:
238
- t = (transform or "none").lower()
239
- if t in ["log10", "log_10", "log10()"]:
240
- return np.power(10.0, x)
241
- if t in ["ln", "log", "log_e", "natural"]:
242
- return np.exp(x)
243
- return x # "none"
244
-
245
- def to_actual_series(df: pd.DataFrame, target_col: str, actual_col_hint: str, transform: str) -> pd.Series:
246
- """
247
- Return the 'actual GR' series (API).
248
- If an explicit actual column exists, use it; else invert the target.
249
- """
250
- if actual_col_hint and actual_col_hint in df.columns:
251
- return pd.Series(df[actual_col_hint], dtype=float)
252
- # else, if target exists, invert:
253
- if target_col in df.columns:
254
- return pd.Series(inverse_target(np.asarray(df[target_col], dtype=float), transform), dtype=float)
255
- # fallback: if a column named "GR" exists, use it
256
- if "GR" in df.columns:
257
- return pd.Series(df["GR"], dtype=float)
258
- raise ValueError("Cannot find actual GR column or target to invert.")
259
-
260
- # =========================
261
- # Cross plot (Matplotlib) — auto limits for GR
262
- # =========================
263
- def _nice_bounds(arr_min, arr_max, n_ticks=5):
264
- # pick a "nice" range and step for GR (typically 0–200+ API)
265
- if not np.isfinite(arr_min) or not np.isfinite(arr_max):
266
- return 0.0, 100.0, 20.0
267
- span = arr_max - arr_min
268
- if span <= 0:
269
- return max(arr_min-5, 0), arr_max+5, 5.0
270
- raw_step = span / max(n_ticks, 1)
271
- mag = 10 ** math.floor(math.log10(raw_step))
272
- steps = np.array([1, 2, 2.5, 5, 10]) * mag
273
- step = steps[np.argmin(np.abs(steps - raw_step))]
274
- lo = step * math.floor(arr_min / step)
275
- hi = step * math.ceil(arr_max / step)
276
- return float(lo), float(hi), float(step)
277
 
 
 
 
278
  def cross_plot_static(actual, pred):
279
- a = pd.Series(actual, dtype=float)
280
- p = pd.Series(pred, dtype=float)
281
-
282
- # auto bounds & ticks for GR
283
- lo = min(a.min(), p.min())
284
- hi = max(a.max(), p.max())
285
- fixed_min, fixed_max, step = _nice_bounds(lo, hi, n_ticks=6)
286
- ticks = np.arange(fixed_min, fixed_max + step, step)
287
-
288
- dpi = 110
289
- fig, ax = plt.subplots(
290
- figsize=(CROSS_W / dpi, CROSS_H / dpi),
291
- dpi=dpi,
292
- constrained_layout=False
293
- )
294
-
295
- ax.scatter(a, p, s=14, c=COLORS["pred"], alpha=0.9, linewidths=0)
296
- ax.plot([fixed_min, fixed_max], [fixed_min, fixed_max],
297
- linestyle="--", linewidth=1.2, color=COLORS["ref"])
298
-
299
- ax.set_xlim(fixed_min, fixed_max)
300
- ax.set_ylim(fixed_min, fixed_max)
301
- ax.set_xticks(ticks)
302
- ax.set_yticks(ticks)
303
- ax.set_aspect("equal", adjustable="box") # true 1:1
304
-
305
- fmt = FuncFormatter(lambda x, _: f"{int(x):,}")
306
- ax.xaxis.set_major_formatter(fmt)
307
- ax.yaxis.set_major_formatter(fmt)
308
-
309
- ax.set_xlabel("Actual GR (API)", fontweight="bold", fontsize=10, color="black")
310
- ax.set_ylabel("Predicted GR (API)", fontweight="bold", fontsize=10, color="black")
311
- ax.tick_params(labelsize=8, colors="black")
312
-
313
- ax.grid(True, linestyle=":", alpha=0.3)
314
- for spine in ax.spines.values():
315
- spine.set_linewidth(1.1)
316
- spine.set_color("#444")
317
-
318
- fig.subplots_adjust(left=0.16, bottom=0.16, right=0.98, top=0.98)
319
- return fig
320
 
321
  # =========================
322
  # Track plot (Plotly)
323
  # =========================
324
- def track_plot(df, include_actual=True, pred_col="GR_Pred", actual_col="GR"):
325
- depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
326
- if depth_col is not None:
327
- y = pd.Series(df[depth_col]).astype(float)
328
- ylab = depth_col
329
- y_range = [float(y.max()), float(y.min())] # reverse for logs
330
- else:
331
- y = pd.Series(np.arange(1, len(df) + 1))
332
- ylab = "Point Index"
333
- y_range = [float(y.max()), float(y.min())]
334
-
335
- # X (GR) range & ticks
336
- x_series = pd.Series(df.get(pred_col, pd.Series(dtype=float))).astype(float)
337
- if include_actual and actual_col in df.columns:
338
- x_series = pd.concat([x_series, pd.Series(df[actual_col]).astype(float)], ignore_index=True)
339
- x_lo, x_hi = float(x_series.min()), float(x_series.max())
340
- x_pad = 0.03 * (x_hi - x_lo if x_hi > x_lo else 1.0)
341
- xmin, xmax = x_lo - x_pad, x_hi + x_pad
342
- tick0 = _nice_tick0(xmin, step=5)
343
-
344
- fig = go.Figure()
 
 
 
 
 
 
 
345
  fig.add_trace(go.Scatter(
346
- x=df[pred_col], y=y, mode="lines",
347
- line=dict(color=COLORS["pred"], width=1.8),
348
- name="GR_Pred",
349
- hovertemplate="GR_Pred: %{x:.0f}<br>"+ylab+": %{y}<extra></extra>"
350
  ))
351
- if include_actual and actual_col in df.columns:
352
- fig.add_trace(go.Scatter(
353
- x=df[actual_col], y=y, mode="lines",
354
- line=dict(color=COLORS["actual"], width=2.0, dash="dot"),
355
- name="GR (actual)",
356
- hovertemplate="GR (actual): %{x:.0f}<br>"+ylab+": %{y}<extra></extra>"
357
- ))
358
-
359
- fig.update_layout(
360
- height=TRACK_H, width=TRACK_W, autosize=False,
361
- paper_bgcolor="#fff", plot_bgcolor="#fff",
362
- margin=dict(l=64, r=16, t=36, b=48), hovermode="closest",
363
- font=dict(size=FONT_SZ, color="#000"),
364
- legend=dict(
365
- x=0.98, y=0.05, xanchor="right", yanchor="bottom",
366
- bgcolor="rgba(255,255,255,0.75)", bordercolor="#ccc", borderwidth=1
367
- ),
368
- legend_title_text=""
369
- )
370
- fig.update_xaxes(
371
- title_text="GR (API)",
372
- title_font=dict(size=20, family=BOLD_FONT, color="#000"),
373
- tickfont=dict(size=15, family=BOLD_FONT, color="#000"),
374
- side="top", range=[xmin, xmax],
375
- ticks="outside", tickformat=",.0f", tickmode="auto", tick0=tick0,
376
- showline=True, linewidth=1.2, linecolor="#444", mirror=True,
377
- showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True
378
- )
379
- fig.update_yaxes(
380
- title_text=f"{ylab}",
381
- title_font=dict(size=20, family=BOLD_FONT, color="#000"),
382
- tickfont=dict(size=15, family=BOLD_FONT, color="#000"),
383
- range=y_range, ticks="outside",
384
- showline=True, linewidth=1.2, linecolor="#444", mirror=True,
385
- showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True
386
- )
387
- return fig
 
 
 
388
 
389
  # ---------- Preview modal (matplotlib) ----------
390
  def preview_tracks(df: pd.DataFrame, cols: list[str]):
391
- cols = [c for c in cols if c in df.columns]
392
- n = len(cols)
393
- if n == 0:
394
- fig, ax = plt.subplots(figsize=(4, 2))
395
- ax.text(0.5,0.5,"No selected columns",ha="center",va="center"); ax.axis("off")
396
- return fig
397
- fig, axes = plt.subplots(1, n, figsize=(2.2*n, 7.0), sharey=True, dpi=100)
398
- if n == 1: axes = [axes]
399
- idx = np.arange(1, len(df) + 1)
400
- for ax, col in zip(axes, cols):
401
- ax.plot(df[col], idx, '-', lw=1.4, color="#333")
402
- ax.set_xlabel(col); ax.xaxis.set_label_position('top'); ax.xaxis.tick_top(); ax.invert_yaxis()
403
- ax.grid(True, linestyle=":", alpha=0.3)
404
- for s in ax.spines.values(): s.set_visible(True)
405
- axes[0].set_ylabel("Point Index")
406
  return fig
 
 
 
 
 
 
 
 
 
 
407
 
408
  # Modal wrapper (Streamlit compatibility)
409
  try:
410
- dialog = st.dialog
411
  except AttributeError:
412
- def dialog(title):
413
- def deco(fn):
414
- def wrapper(*args, **kwargs):
415
- with st.expander(title, expanded=True):
416
- return fn(*args, **kwargs)
417
- return wrapper
418
- return deco
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
419
 
420
  # =========================
421
- # Load model + meta
422
  # =========================
423
  def ensure_model() -> Path|None:
424
- for p in [DEFAULT_MODEL, *MODEL_FALLBACKS]:
425
- if p.exists() and p.stat().st_size > 0: return p
426
- url = os.environ.get("MODEL_URL", "")
427
- if not url: return None
428
- try:
429
- import requests
430
- DEFAULT_MODEL.parent.mkdir(parents=True, exist_ok=True)
431
- with requests.get(url, stream=True, timeout=30) as r:
432
- r.raise_for_status()
433
- with open(DEFAULT_MODEL, "wb") as f:
434
- for chunk in r.iter_content(1<<20):
435
- if chunk: f.write(chunk)
436
- return DEFAULT_MODEL
437
- except Exception:
438
- return None
439
 
440
  mpath = ensure_model()
441
  if not mpath:
442
- st.error("Model not found. Upload models/gr_rf.joblib (or set MODEL_URL).")
443
- st.stop()
444
  try:
445
- model = load_model(str(mpath))
446
  except Exception as e:
447
- st.error(f"Failed to load model: {e}")
448
- st.stop()
449
 
450
  meta_path = MODELS_DIR / "meta.json"
451
  if meta_path.exists():
452
- try:
453
- meta = json.loads(meta_path.read_text(encoding="utf-8"))
454
- FEATURES = meta.get("features", FEATURES)
455
- TARGET = meta.get("target", TARGET)
456
- TARGET_TRANSFORM = meta.get("target_transform", TARGET_TRANSFORM)
457
- ACTUAL_COL = meta.get("actual_col", ACTUAL_COL)
458
- except Exception:
459
- pass
460
 
461
  # =========================
462
  # Session state
@@ -468,329 +456,346 @@ st.session_state.setdefault("dev_file_name","")
468
  st.session_state.setdefault("dev_file_bytes",b"")
469
  st.session_state.setdefault("dev_file_loaded",False)
470
  st.session_state.setdefault("dev_preview",False)
471
- st.session_state.setdefault("show_preview_modal", False)
472
 
473
  # =========================
474
  # Branding in Sidebar
475
  # =========================
476
  st.sidebar.markdown(f"""
477
- <div class="centered-container">
478
- <img src="{inline_logo('logo.png')}" style="width: 200px; height: auto; object-fit: contain;">
479
- <div style='font-weight:800;font-size:1.2rem;'>{APP_NAME}</div>
480
- <div style='color:#667085;'>{TAGLINE}</div>
481
- </div>
482
- """, unsafe_allow_html=True
483
  )
484
 
485
- # Reusable sticky header
 
 
486
  def sticky_header(title, message):
487
- st.markdown(
488
- f"""
489
- <style>
490
- .sticky-container {{
491
- position: sticky; top: 0; background-color: white; z-index: 100;
492
- padding-top: 10px; padding-bottom: 10px; border-bottom: 1px solid #eee;
493
- }}
494
- </style>
495
- <div class="sticky-container">
496
- <h3>{title}</h3>
497
- <p>{message}</p>
498
- </div>
499
- """,
500
- unsafe_allow_html=True
501
- )
 
 
 
 
 
502
 
503
  # =========================
504
  # INTRO
505
  # =========================
506
  if st.session_state.app_step == "intro":
507
- st.header("Welcome!")
508
- st.markdown("This software is developed by *Smart Thinking AI-Solutions Team* to estimate Gamma Ray (GR) from input features.")
509
- st.subheader("How It Works")
510
- st.markdown(
511
- "1) **Upload your data to build the case and preview the performance of our model.** \n"
512
- "2) Click **Run Model** to compute metrics and plots. \n"
513
- "3) **Proceed to Validation** (with actual GR) or **Proceed to Prediction** (no GR)."
514
- )
515
- if st.button("Start Showcase", type="primary"):
516
- st.session_state.app_step = "dev"; st.rerun()
517
 
518
  # =========================
519
  # CASE BUILDING
520
  # =========================
521
  if st.session_state.app_step == "dev":
522
- st.sidebar.header("Case Building")
523
- up = st.sidebar.file_uploader("Upload Your Data File", type=["xlsx","xls"])
524
- if up is not None:
525
- st.session_state.dev_file_bytes = up.getvalue()
526
- st.session_state.dev_file_name = up.name
527
- st.session_state.dev_file_loaded = True
528
- st.session_state.dev_preview = False
529
- if st.session_state.dev_file_loaded:
530
- tmp = read_book_bytes(st.session_state.dev_file_bytes)
531
- if tmp:
532
- df0 = next(iter(tmp.values()))
533
- st.sidebar.caption(f"**Data loaded:** {st.session_state.dev_file_name} • {df0.shape[0]} rows × {df0.shape[1]} cols")
534
-
535
- if st.sidebar.button("Preview data", use_container_width=True, disabled=not st.session_state.dev_file_loaded):
536
- st.session_state.show_preview_modal = True
537
- st.session_state.dev_preview = True
538
-
539
- run = st.sidebar.button("Run Model", type="primary", use_container_width=True)
540
- if st.sidebar.button("Proceed to Validation ▶", use_container_width=True): st.session_state.app_step="validate"; st.rerun()
541
- if st.sidebar.button("Proceed to Prediction ▶", use_container_width=True): st.session_state.app_step="predict"; st.rerun()
542
-
543
- # Sticky helper
544
- if st.session_state.dev_file_loaded and st.session_state.dev_preview:
545
- sticky_header("Case Building", "Previewed ✓ — now click **Run Model**.")
546
- elif st.session_state.dev_file_loaded:
547
- sticky_header("Case Building", "📄 **Preview uploaded data** using the sidebar button, then click **Run Model**.")
548
- else:
549
- sticky_header("Case Building", "**Upload your data to build a case, then run the model to review development performance.**")
550
-
551
- if run and st.session_state.dev_file_bytes:
552
- book = read_book_bytes(st.session_state.dev_file_bytes)
553
- sh_train = find_sheet(book, ["Train","Training","training2","train","training"])
554
- sh_test = find_sheet(book, ["Test","Testing","testing2","test","testing"])
555
- if sh_train is None or sh_test is None:
556
- st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training and Test/Testing sheets.</div>', unsafe_allow_html=True)
557
- st.stop()
558
- tr = book[sh_train].copy(); te = book[sh_test].copy()
559
- if not (ensure_cols(tr, FEATURES) and ensure_cols(te, FEATURES)):
560
- st.markdown('<div class="st-message-box st-error">Missing required feature columns.</div>', unsafe_allow_html=True); st.stop()
561
-
562
- # predictions (handle log targets)
563
- tr_pred_raw = model.predict(tr[FEATURES])
564
- te_pred_raw = model.predict(te[FEATURES])
565
- tr["GR_Pred"] = inverse_target(np.asarray(tr_pred_raw, dtype=float), TARGET_TRANSFORM)
566
- te["GR_Pred"] = inverse_target(np.asarray(te_pred_raw, dtype=float), TARGET_TRANSFORM)
567
-
568
- # actual GR (for metrics/plots)
569
- tr["GR_Actual"] = to_actual_series(tr, TARGET, ACTUAL_COL, TARGET_TRANSFORM)
570
- te["GR_Actual"] = to_actual_series(te, TARGET, ACTUAL_COL, TARGET_TRANSFORM)
571
-
572
- st.session_state.results["Train"]=tr; st.session_state.results["Test"]=te
573
- st.session_state.results["m_train"]={
574
- "R": pearson_r(tr["GR_Actual"], tr["GR_Pred"]),
575
- "RMSE": rmse(tr["GR_Actual"], tr["GR_Pred"]),
576
- "MAE": mean_absolute_error(tr["GR_Actual"], tr["GR_Pred"])
577
- }
578
- st.session_state.results["m_test"]={
579
- "R": pearson_r(te["GR_Actual"], te["GR_Pred"]),
580
- "RMSE": rmse(te["GR_Actual"], te["GR_Pred"]),
581
- "MAE": mean_absolute_error(te["GR_Actual"], te["GR_Pred"])
582
- }
583
-
584
- tr_min = tr[FEATURES].min().to_dict(); tr_max = tr[FEATURES].max().to_dict()
585
- st.session_state.train_ranges = {f:(float(tr_min[f]), float(tr_max[f])) for f in FEATURES}
586
- st.markdown('<div class="st-message-box st-success">Case has been built and results are displayed below.</div>', unsafe_allow_html=True)
587
-
588
- def _dev_block(df, m):
589
- c1,c2,c3 = st.columns(3)
590
- c1.metric("R", f"{m['R']:.2f}")
591
- c2.metric("RMSE", f"{m['RMSE']:.2f}")
592
- c3.metric("MAE", f"{m['MAE']:.2f}")
593
- st.markdown("""
594
- <div style='text-align:left;font-size:0.8em;color:#6b7280;margin-top:-16px;margin-bottom:8px;'>
595
- <strong>R:</strong> Pearson Correlation Coefficient<br>
596
- <strong>RMSE:</strong> Root Mean Square Error<br>
597
- <strong>MAE:</strong> Mean Absolute Error
598
- </div>
599
- """, unsafe_allow_html=True)
600
-
601
- col_track, col_cross = st.columns([2, 3], gap="large")
602
- with col_track:
603
- st.plotly_chart(
604
- track_plot(df.rename(columns={"GR_Actual":"GR"}), include_actual=True,
605
- pred_col="GR_Pred", actual_col="GR"),
606
- use_container_width=False,
607
- config={"displayModeBar": False, "scrollZoom": True}
608
- )
609
- with col_cross:
610
- st.pyplot(cross_plot_static(df["GR_Actual"], df["GR_Pred"]), use_container_width=False)
611
-
612
- if "Train" in st.session_state.results or "Test" in st.session_state.results:
613
- tab1, tab2 = st.tabs(["Training", "Testing"])
614
- if "Train" in st.session_state.results:
615
- with tab1: _dev_block(st.session_state.results["Train"], st.session_state.results["m_train"])
616
- if "Test" in st.session_state.results:
617
- with tab2: _dev_block(st.session_state.results["Test"], st.session_state.results["m_test"])
 
 
 
618
 
619
  # =========================
620
  # VALIDATION (with actual GR)
621
  # =========================
622
  if st.session_state.app_step == "validate":
623
- st.sidebar.header("Validate the Model")
624
- up = st.sidebar.file_uploader("Upload Validation Excel", type=["xlsx","xls"])
625
- if up is not None:
626
- book = read_book_bytes(up.getvalue())
627
- if book:
628
- df0 = next(iter(book.values()))
629
- st.sidebar.caption(f"**Data loaded:** {up.name} • {df0.shape[0]} rows × {df0.shape[1]} cols")
630
- if st.sidebar.button("Preview data", use_container_width=True, disabled=(up is None)):
631
- st.session_state.show_preview_modal = True
632
- go_btn = st.sidebar.button("Predict & Validate", type="primary", use_container_width=True)
633
- if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
634
- if st.sidebar.button("Proceed to Prediction ▶", use_container_width=True): st.session_state.app_step="predict"; st.rerun()
635
-
636
- sticky_header("Validate the Model", "Upload a dataset with the same **features** and **GR** to evaluate performance.")
637
-
638
- if go_btn and up is not None:
639
- book = read_book_bytes(up.getvalue())
640
- name = find_sheet(book, ["Validation","Validate","validation2","Val","val"]) or list(book.keys())[0]
641
- df = book[name].copy()
642
- if not ensure_cols(df, FEATURES): st.markdown('<div class="st-message-box st-error">Missing required feature columns.</div>', unsafe_allow_html=True); st.stop()
643
-
644
- pred_raw = model.predict(df[FEATURES])
645
- df["GR_Pred"] = inverse_target(np.asarray(pred_raw, dtype=float), TARGET_TRANSFORM)
646
- # actual GR
647
- try:
648
- df["GR_Actual"] = to_actual_series(df, TARGET, ACTUAL_COL, TARGET_TRANSFORM)
649
- except Exception:
650
- st.markdown('<div class="st-message-box st-error">Validation sheet must include actual GR (or a target column that can be inverse-transformed).</div>', unsafe_allow_html=True); st.stop()
651
-
652
- st.session_state.results["Validate"]=df
653
-
654
- ranges = st.session_state.train_ranges; oor_pct = 0.0; tbl=None
655
- if ranges:
656
- any_viol = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in FEATURES}).any(axis=1)
657
- oor_pct = float(any_viol.mean()*100.0)
658
- if any_viol.any():
659
- tbl = df.loc[any_viol, FEATURES].copy()
660
- for c in FEATURES:
661
- if pd.api.types.is_numeric_dtype(tbl[c]): tbl[c] = tbl[c].round(2)
662
- tbl["Violations"] = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in FEATURES}).loc[any_viol].apply(lambda r:", ".join([c for c,v in r.items() if v]), axis=1)
663
-
664
- st.session_state.results["m_val"]={
665
- "R": pearson_r(df["GR_Actual"], df["GR_Pred"]),
666
- "RMSE": rmse(df["GR_Actual"], df["GR_Pred"]),
667
- "MAE": mean_absolute_error(df["GR_Actual"], df["GR_Pred"])
668
- }
669
- st.session_state.results["sv_val"]={"n":len(df),"pred_min":float(df["GR_Pred"].min()),"pred_max":float(df["GR_Pred"].max()),"oor":oor_pct}
670
- st.session_state.results["oor_tbl"]=tbl
671
-
672
- if "Validate" in st.session_state.results:
673
- m = st.session_state.results["m_val"]
674
- c1,c2,c3 = st.columns(3)
675
- c1.metric("R", f"{m['R']:.2f}")
676
- c2.metric("RMSE", f"{m['RMSE']:.2f}")
677
- c3.metric("MAE", f"{m['MAE']:.2f}")
678
- st.markdown("""
679
- <div style='text-align:left;font-size:0.8em;color:#6b7280;margin-top:-16px;margin-bottom:8px;'>
680
- <strong>R:</strong> Pearson Correlation Coefficient<br>
681
- <strong>RMSE:</strong> Root Mean Square Error<br>
682
- <strong>MAE:</strong> Mean Absolute Error
683
- </div>
684
- """, unsafe_allow_html=True)
685
-
686
- col_track, col_cross = st.columns([2, 3], gap="large")
687
- with col_track:
688
- st.plotly_chart(
689
- track_plot(st.session_state.results["Validate"].rename(columns={"GR_Actual":"GR"}),
690
- include_actual=True, pred_col="GR_Pred", actual_col="GR"),
691
- use_container_width=False, config={"displayModeBar": False, "scrollZoom": True}
692
- )
693
- with col_cross:
694
- st.pyplot(
695
- cross_plot_static(st.session_state.results["Validate"]["GR_Actual"],
696
- st.session_state.results["Validate"]["GR_Pred"]),
697
- use_container_width=False
698
- )
699
-
700
- sv = st.session_state.results["sv_val"]
701
- if sv["oor"] > 0: st.markdown('<div class="st-message-box st-warning">Some inputs fall outside **training min–max** ranges.</div>', unsafe_allow_html=True)
702
- if st.session_state.results["oor_tbl"] is not None:
703
- st.write("*Out-of-range rows (vs. Training min–max):*")
704
- df_centered_rounded(st.session_state.results["oor_tbl"])
705
 
706
  # =========================
707
  # PREDICTION (no actual GR)
708
  # =========================
709
  if st.session_state.app_step == "predict":
710
- st.sidebar.header("Prediction (No Actual GR)")
711
- up = st.sidebar.file_uploader("Upload Prediction Excel", type=["xlsx","xls"])
712
- if up is not None:
713
- book = read_book_bytes(up.getvalue())
714
- if book:
715
- df0 = next(iter(book.values()))
716
- st.sidebar.caption(f"**Data loaded:** {up.name} • {df0.shape[0]} rows × {df0.shape[1]} cols")
717
- if st.sidebar.button("Preview data", use_container_width=True, disabled=(up is None)):
718
- st.session_state.show_preview_modal = True
719
- go_btn = st.sidebar.button("Predict", type="primary", use_container_width=True)
720
- if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
721
-
722
- sticky_header("Prediction", "Upload a dataset with the feature columns (no **GR**).")
723
-
724
- if go_btn and up is not None:
725
- book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
726
- df = book[name].copy()
727
- if not ensure_cols(df, FEATURES): st.markdown('<div class="st-message-box st-error">Missing required feature columns.</div>', unsafe_allow_html=True); st.stop()
728
-
729
- pred_raw = model.predict(df[FEATURES])
730
- df["GR_Pred"] = inverse_target(np.asarray(pred_raw, dtype=float), TARGET_TRANSFORM)
731
- st.session_state.results["PredictOnly"]=df
732
-
733
- ranges = st.session_state.train_ranges; oor_pct = 0.0
734
- if ranges:
735
- any_viol = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in FEATURES}).any(axis=1)
736
- oor_pct = float(any_viol.mean()*100.0)
737
- st.session_state.results["sv_pred"]={
738
- "n":len(df),
739
- "pred_min":float(df["GR_Pred"].min()),
740
- "pred_max":float(df["GR_Pred"].max()),
741
- "pred_mean":float(df["GR_Pred"].mean()),
742
- "pred_std":float(df["GR_Pred"].std(ddof=0)),
743
- "oor":oor_pct
744
- }
745
-
746
- if "PredictOnly" in st.session_state.results:
747
- df = st.session_state.results["PredictOnly"]; sv = st.session_state.results["sv_pred"]
748
-
749
- col_left, col_right = st.columns([2,3], gap="large")
750
- with col_left:
751
- table = pd.DataFrame({
752
- "Metric": ["# points","Pred min","Pred max","Pred mean","Pred std","OOR %"],
753
- "Value": [sv["n"],
754
- round(sv["pred_min"],2),
755
- round(sv["pred_max"],2),
756
- round(sv["pred_mean"],2),
757
- round(sv["pred_std"],2),
758
- f'{sv["oor"]:.1f}%']
759
- })
760
- st.markdown('<div class="st-message-box st-success">Predictions ready ✓</div>', unsafe_allow_html=True)
761
- df_centered_rounded(table, hide_index=True)
762
- st.caption("**★ OOR** = % of rows whose input features fall outside the training min–max range.")
763
- with col_right:
764
- st.plotly_chart(
765
- track_plot(df.rename(columns={"GR_Pred":"GR_Pred"}), include_actual=False,
766
- pred_col="GR_Pred", actual_col="GR"),
767
- use_container_width=False, config={"displayModeBar": False, "scrollZoom": True}
768
- )
769
-
770
- # =========================
771
- # Preview modal (re-usable)
 
 
772
  # =========================
773
  if st.session_state.show_preview_modal:
774
- book_to_preview = {}
775
- if st.session_state.app_step == "dev":
776
- book_to_preview = read_book_bytes(st.session_state.dev_file_bytes)
777
- elif st.session_state.app_step in ["validate", "predict"] and 'up' in locals() and up is not None:
778
- book_to_preview = read_book_bytes(up.getvalue())
779
-
780
- with st.expander("Preview data", expanded=True):
781
- if not book_to_preview:
782
- st.markdown('<div class="st-message-box">No data loaded yet.</div>', unsafe_allow_html=True)
783
- else:
784
- names = list(book_to_preview.keys())
785
- tabs = st.tabs(names)
786
- for t, name in zip(tabs, names):
787
- with t:
788
- df = book_to_preview[name]
789
- t1, t2 = st.tabs(["Tracks", "Summary"])
790
- with t1:
791
- st.pyplot(preview_tracks(df, FEATURES), use_container_width=True)
792
- with t2:
793
- safe_summary(df, FEATURES)
 
 
 
 
 
 
794
  # =========================
795
  # Footer
796
  # =========================
@@ -799,6 +804,6 @@ st.markdown("""
799
  <hr>
800
  <div style='text-align:center;color:#6b7280;font-size:1.0em;'>
801
  © 2024 Smart Thinking AI-Solutions Team. All rights reserved.<br>
802
- Contact: <a href="mailto:smartthinking@smartthinking.com.sa">smartthinking@smartthinking.com.sa</a>
803
  </div>
804
  """, unsafe_allow_html=True)
 
 
1
  import io, json, os, base64, math
2
  from pathlib import Path
3
  import streamlit as st
 
15
  from sklearn.metrics import mean_squared_error, mean_absolute_error
16
 
17
  # =========================
18
+ # Constants
19
  # =========================
20
+ # The features and target are loaded from meta.json if available
21
+ FEATURES = []
22
+ TARGET = ""
23
+ TARGET_DISPLAY_NAME = "GR"
24
+ TARGET_UNITS = "API"
 
 
 
25
  MODELS_DIR = Path("models")
26
  DEFAULT_MODEL = MODELS_DIR / "gr_rf.joblib"
27
  MODEL_FALLBACKS = [MODELS_DIR / "model.joblib", MODELS_DIR / "model.pkl"]
 
28
  COLORS = {"pred": "#1f77b4", "actual": "#f2b702", "ref": "#5a5a5a"}
29
 
30
  # ---- Plot sizing controls ----
31
+ CROSS_W = 350 # px (matplotlib figure size; Streamlit will still scale)
32
  CROSS_H = 350
33
+ TRACK_H = 1000 # px (plotly height; width auto-fits column)
34
+ # NEW: Add a TRACK_W variable to control the width
35
+ TRACK_W = 500 # px (plotly width)
36
+ FONT_SZ = 13
37
+ BOLD_FONT = "Arial Black, Arial, sans-serif" # used for bold axis titles & ticks
38
 
39
  # =========================
40
  # Page / CSS
41
  # =========================
42
+ st.set_page_config(page_title="ST_GeoMech_GR", page_icon="logo.png", layout="wide")
43
 
44
+ # General CSS (logo helpers etc.)
45
  st.markdown("""
46
  <style>
47
  .brand-logo { width: 200px; height: auto; object-fit: contain; }
48
  .sidebar-header { display:flex; align-items:center; gap:12px; }
49
  .sidebar-header .text h1 { font-size: 1.05rem; margin:0; line-height:1.1; }
50
  .sidebar-header .text .tag { font-size: .85rem; color:#6b7280; margin:2px 0 0; }
51
+ .centered-container {
52
+ display: flex;
53
+ flex-direction: column;
54
+ align-items: center;
55
+ text-align: center;
56
+ }
57
  </style>
58
  """, unsafe_allow_html=True)
59
 
60
+ # CSS to make sticky headers work correctly by overriding Streamlit's overflow property
61
  st.markdown("""
62
  <style>
63
+ /* This targets the main content area */
64
+ .main .block-container {
65
+ overflow: unset !important;
66
+ }
67
+
68
+ /* This targets the vertical block that holds all your elements */
69
+ div[data-testid="stVerticalBlock"] {
70
+ overflow: unset !important;
71
+ }
72
  </style>
73
  """, unsafe_allow_html=True)
74
 
75
  # Hide uploader helper text ("Drag and drop file here", limits, etc.)
76
  st.markdown("""
77
  <style>
78
+ /* Older builds (helper wrapped in a Markdown container) */
79
  section[data-testid="stFileUploader"] div[data-testid="stMarkdownContainer"]{display:none !important;}
80
+ /* 1.31–1.34: helper is the first child in the dropzone */
81
  section[data-testid="stFileUploader"] [data-testid="stFileUploaderDropzone"] > div:first-child{display:none !important;}
82
+ /* 1.35+: explicit helper container */
83
  section[data-testid="stFileUploader"] [data-testid="stFileUploaderInstructions"]{display:none !important;}
84
+ /* Fallback: any paragraph/small text inside the uploader */
85
  section[data-testid="stFileUploader"] p, section[data-testid="stFileUploader"] small{display:none !important;}
86
  </style>
87
  """, unsafe_allow_html=True)
88
 
89
+ # Make the Preview expander title & tabs sticky (pinned to the top)
90
  st.markdown("""
91
  <style>
92
  div[data-testid="stExpander"] > details > summary {
93
+ position: sticky;
94
+ top: 0;
95
+ z-index: 10;
96
+ background: #fff;
97
+ border-bottom: 1px solid #eee;
98
  }
99
  div[data-testid="stExpander"] div[data-baseweb="tab-list"] {
100
+ position: sticky;
101
+ top: 42px; /* adjust if your expander header height differs */
102
+ z-index: 9;
103
+ background: #fff;
104
+ padding-top: 6px;
105
  }
106
  </style>
107
  """, unsafe_allow_html=True)
108
 
109
  # Center text in all pandas Styler tables (headers + cells)
110
  TABLE_CENTER_CSS = [
111
+ dict(selector="th", props=[("text-align", "center")]),
112
+ dict(selector="td", props=[("text-align", "center")]),
113
  ]
114
 
115
+ # NEW: CSS for the message box
116
  st.markdown("""
117
  <style>
118
+ .st-message-box {
119
+ background-color: #f0f2f6;
120
+ color: #333333;
121
+ padding: 10px;
122
+ border-radius: 10px;
123
+ border: 1px solid #e6e9ef;
124
+ }
125
+ .st-message-box.st-success {
126
+ background-color: #d4edda;
127
+ color: #155724;
128
+ border-color: #c3e6cb;
129
+ }
130
+ .st-message-box.st-warning {
131
+ background-color: #fff3cd;
132
+ color: #856404;
133
+ border-color: #ffeeba;
134
+ }
135
+ .st-message-box.st-error {
136
+ background-color: #f8d7da;
137
+ color: #721c24;
138
+ border-color: #f5c6cb;
139
+ }
140
  </style>
141
  """, unsafe_allow_html=True)
142
 
 
144
  # Password gate
145
  # =========================
146
  def inline_logo(path="logo.png") -> str:
147
+ try:
148
+ p = Path(path)
149
+ if not p.exists(): return ""
150
+ return f"data:image/png;base64,{base64.b64encode(p.read_bytes()).decode('ascii')}"
151
+ except Exception:
152
+ return ""
153
 
154
  def add_password_gate() -> None:
155
+ try:
156
+ required = st.secrets.get("APP_PASSWORD", "")
157
+ except Exception:
158
+ required = os.environ.get("APP_PASSWORD", "")
159
+
160
+ if not required:
161
+ st.warning("Set APP_PASSWORD in Secrets (or environment) and restart.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  st.stop()
163
 
164
+ if st.session_state.get("auth_ok", False):
165
+ return
166
+
167
+ st.sidebar.markdown(f"""
168
+ <div class="centered-container">
169
+ <img src="{inline_logo('logo.png')}" style="width: 200px; height: auto; object-fit: contain;">
170
+ <div style='font-weight:800;font-size:1.2rem; margin-top: 10px;'>ST_GeoMech_GR</div>
171
+ <div style='color:#667085;'>Smart Thinking • Secure Access</div>
172
+ </div>
173
+ """, unsafe_allow_html=True
174
+ )
175
+ pwd = st.sidebar.text_input("Access key", type="password", placeholder="••••••••")
176
+ if st.sidebar.button("Unlock", type="primary"):
177
+ if pwd == required:
178
+ st.session_state.auth_ok = True
179
+ st.rerun()
180
+ else:
181
+ st.error("Incorrect key.")
182
+ st.stop()
183
+
184
  add_password_gate()
185
 
186
  # =========================
187
  # Utilities
188
  # =========================
189
  def rmse(y_true, y_pred) -> float:
190
+ return float(np.sqrt(mean_squared_error(y_true, y_pred)))
191
 
192
  def pearson_r(y_true, y_pred) -> float:
193
+ a = np.asarray(y_true, dtype=float)
194
+ p = np.asarray(y_pred, dtype=float)
195
+ if a.size < 2: return float("nan")
196
+ return float(np.corrcoef(a, p)[0, 1])
197
 
198
  @st.cache_resource(show_spinner=False)
199
  def load_model(model_path: str):
200
+ return joblib.load(model_path)
201
 
202
  @st.cache_data(show_spinner=False)
203
  def parse_excel(data_bytes: bytes):
204
+ bio = io.BytesIO(data_bytes)
205
+ xl = pd.ExcelFile(bio)
206
+ return {sh: xl.parse(sh) for sh in xl.sheet_names}
207
 
208
  def read_book_bytes(b: bytes): return parse_excel(b) if b else {}
209
 
210
  def ensure_cols(df, cols):
211
+ miss = [c for c in cols if c not in df.columns]
212
+ if miss:
213
+ st.error(f"Missing columns: {miss}\nFound: {list(df.columns)}")
214
+ return False
215
+ return True
216
 
217
  def find_sheet(book, names):
218
+ low2orig = {k.lower(): k for k in book.keys()}
219
+ for nm in names:
220
+ if nm.lower() in low2orig: return low2orig[nm.lower()]
221
+ return None
222
 
223
+ def _nice_tick0(xmin: float, step: int = 100) -> float:
224
+ return step * math.floor(xmin / step) if np.isfinite(xmin) else xmin
225
 
226
  def df_centered_rounded(df: pd.DataFrame, hide_index=True):
227
+ """Center headers & cells; format numeric columns to 2 decimals."""
228
+ out = df.copy()
229
+ numcols = out.select_dtypes(include=[np.number]).columns
230
+ styler = (
231
+ out.style
232
+ .format({c: "{:.2f}" for c in numcols})
233
+ .set_properties(**{"text-align": "center"})
234
+ .set_table_styles(TABLE_CENTER_CSS)
235
+ )
236
+ st.dataframe(styler, use_container_width=True, hide_index=hide_index)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
 
238
+ # =========================
239
+ # Cross plot (Matplotlib, fixed limits & ticks)
240
+ # =========================
241
  def cross_plot_static(actual, pred):
242
+ a = pd.Series(actual, dtype=float)
243
+ p = pd.Series(pred, dtype=float)
244
+
245
+ # Dynamic limits based on data
246
+ all_data = pd.concat([a, p])
247
+ fixed_min, fixed_max = all_data.min() - 0.1, all_data.max() + 0.1
248
+ # Dynamic ticks based on data range
249
+ ticks = np.linspace(fixed_min, fixed_max, num=5, endpoint=True)
250
+
251
+ dpi = 110
252
+ fig, ax = plt.subplots(
253
+ figsize=(CROSS_W / dpi, CROSS_H / dpi),
254
+ dpi=dpi,
255
+ constrained_layout=False
256
+ )
257
+
258
+ ax.scatter(a, p, s=14, c=COLORS["pred"], alpha=0.9, linewidths=0)
259
+ ax.plot([fixed_min, fixed_max], [fixed_min, fixed_max],
260
+ linestyle="--", linewidth=1.2, color=COLORS["ref"])
261
+
262
+ ax.set_xlim(fixed_min, fixed_max)
263
+ ax.set_ylim(fixed_min, fixed_max)
264
+ ax.set_xticks(ticks)
265
+ ax.set_yticks(ticks)
266
+ ax.set_aspect("equal", adjustable="box") # true 45°
267
+
268
+ fmt = FuncFormatter(lambda x, _: f"{x:.2f}")
269
+ ax.xaxis.set_major_formatter(fmt)
270
+ ax.yaxis.set_major_formatter(fmt)
271
+
272
+ ax.set_xlabel(f"Actual {TARGET_DISPLAY_NAME} ({TARGET_UNITS})", fontweight="bold", fontsize=10, color="black")
273
+ ax.set_ylabel(f"Predicted {TARGET_DISPLAY_NAME} ({TARGET_UNITS})", fontweight="bold", fontsize=10, color="black")
274
+ ax.tick_params(labelsize=6, colors="black")
275
+
276
+ ax.grid(True, linestyle=":", alpha=0.3)
277
+ for spine in ax.spines.values():
278
+ spine.set_linewidth(1.1)
279
+ spine.set_color("#444")
280
+
281
+ fig.subplots_adjust(left=0.16, bottom=0.16, right=0.98, top=0.98)
282
+ return fig
283
 
284
  # =========================
285
  # Track plot (Plotly)
286
  # =========================
287
+ def track_plot(df, include_actual=True):
288
+ depth_col = next((c for c in df.columns if 'depth' in str(c).lower()), None)
289
+ if depth_col is not None:
290
+ y = pd.Series(df[depth_col]).astype(float)
291
+ ylab = depth_col
292
+ y_range = [float(y.max()), float(y.min())] # reverse
293
+ else:
294
+ y = pd.Series(np.arange(1, len(df) + 1))
295
+ ylab = "Point Index"
296
+ y_range = [float(y.max()), float(y.min())]
297
+
298
+ # X (GR) range & ticks
299
+ x_series = pd.Series(df.get(f"{TARGET_DISPLAY_NAME}_Pred", pd.Series(dtype=float))).astype(float)
300
+ if include_actual and TARGET_DISPLAY_NAME in df.columns:
301
+ x_series = pd.concat([x_series, pd.Series(df[TARGET_DISPLAY_NAME]).astype(float)], ignore_index=True)
302
+ x_lo, x_hi = float(x_series.min()), float(x_series.max())
303
+ x_pad = 0.03 * (x_hi - x_lo if x_hi > x_lo else 1.0)
304
+ xmin, xmax = x_lo - x_pad, x_hi + x_pad
305
+ tick0 = _nice_tick0(xmin, step=10)
306
+
307
+ fig = go.Figure()
308
+ fig.add_trace(go.Scatter(
309
+ x=df[f"{TARGET_DISPLAY_NAME}_Pred"], y=y, mode="lines",
310
+ line=dict(color=COLORS["pred"], width=1.8),
311
+ name=f"{TARGET_DISPLAY_NAME}_Pred",
312
+ hovertemplate=f"{TARGET_DISPLAY_NAME}_Pred: %{{x:.2f}}<br>"+ylab+": %{y}<extra></extra>"
313
+ ))
314
+ if include_actual and TARGET_DISPLAY_NAME in df.columns:
315
  fig.add_trace(go.Scatter(
316
+ x=df[TARGET_DISPLAY_NAME], y=y, mode="lines",
317
+ line=dict(color=COLORS["actual"], width=2.0, dash="dot"),
318
+ name=f"{TARGET_DISPLAY_NAME} (actual)",
319
+ hovertemplate=f"{TARGET_DISPLAY_NAME} (actual): %{{x:.2f}}<br>"+ylab+": %{y}<extra></extra>"
320
  ))
321
+
322
+ fig.update_layout(
323
+ height=TRACK_H,
324
+ width=TRACK_W, # Set the width here
325
+ autosize=False, # Disable autosizing to respect the width
326
+ paper_bgcolor="#fff", plot_bgcolor="#fff",
327
+ margin=dict(l=64, r=16, t=36, b=48), hovermode="closest",
328
+ font=dict(size=FONT_SZ, color="#000"),
329
+ legend=dict(
330
+ x=0.98, y=0.05, xanchor="right", yanchor="bottom",
331
+ bgcolor="rgba(255,255,255,0.75)", bordercolor="#ccc", borderwidth=1
332
+ ),
333
+ legend_title_text=""
334
+ )
335
+
336
+ # Bold, black axis titles & ticks
337
+ fig.update_xaxes(
338
+ title_text=f"{TARGET_DISPLAY_NAME} ({TARGET_UNITS})",
339
+ title_font=dict(size=20, family=BOLD_FONT, color="#000"),
340
+ tickfont=dict(size=15, family=BOLD_FONT, color="#000"),
341
+ side="top",
342
+ range=[xmin, xmax],
343
+ ticks="outside",
344
+ tickformat=",.2f",
345
+ tickmode="auto",
346
+ tick0=tick0,
347
+ showline=True, linewidth=1.2, linecolor="#444", mirror=True,
348
+ showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True
349
+ )
350
+ fig.update_yaxes(
351
+ title_text=ylab,
352
+ title_font=dict(size=20, family=BOLD_FONT, color="#000"),
353
+ tickfont=dict(size=15, family=BOLD_FONT, color="#000"),
354
+ range=y_range,
355
+ ticks="outside",
356
+ showline=True, linewidth=1.2, linecolor="#444", mirror=True,
357
+ showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True
358
+ )
359
+
360
+ return fig
361
 
362
  # ---------- Preview modal (matplotlib) ----------
363
  def preview_tracks(df: pd.DataFrame, cols: list[str]):
364
+ cols = [c for c in cols if c in df.columns]
365
+ n = len(cols)
366
+ if n == 0:
367
+ fig, ax = plt.subplots(figsize=(4, 2))
368
+ ax.text(0.5,0.5,"No selected columns",ha="center",va="center"); ax.axis("off")
 
 
 
 
 
 
 
 
 
 
369
  return fig
370
+ fig, axes = plt.subplots(1, n, figsize=(2.2*n, 7.0), sharey=True, dpi=100)
371
+ if n == 1: axes = [axes]
372
+ idx = np.arange(1, len(df) + 1)
373
+ for ax, col in zip(axes, cols):
374
+ ax.plot(df[col], idx, '-', lw=1.4, color="#333")
375
+ ax.set_xlabel(col); ax.xaxis.set_label_position('top'); ax.xaxis.tick_top(); ax.invert_yaxis()
376
+ ax.grid(True, linestyle=":", alpha=0.3)
377
+ for s in ax.spines.values(): s.set_visible(True)
378
+ axes[0].set_ylabel("Point Index")
379
+ return fig
380
 
381
  # Modal wrapper (Streamlit compatibility)
382
  try:
383
+ dialog = st.dialog
384
  except AttributeError:
385
+ def dialog(title):
386
+ def deco(fn):
387
+ def wrapper(*args, **kwargs):
388
+ with st.expander(title, expanded=True):
389
+ return fn(*args, **kwargs)
390
+ return wrapper
391
+ return deco
392
+
393
+ def preview_modal(book: dict[str, pd.DataFrame]):
394
+ if not book:
395
+ st.info("No data loaded yet."); return
396
+ names = list(book.keys())
397
+ tabs = st.tabs(names)
398
+ for t, name in zip(tabs, names):
399
+ with t:
400
+ df = book[name]
401
+ t1, t2 = st.tabs(["Tracks", "Summary"])
402
+ with t1:
403
+ st.pyplot(preview_tracks(df, FEATURES), use_container_width=True)
404
+ with t2:
405
+ tbl = (df[FEATURES]
406
+ .agg(['min','max','mean','std'])
407
+ .T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"}))
408
+ df_centered_rounded(tbl.reset_index(names="Feature"))
409
 
410
  # =========================
411
+ # Load model
412
  # =========================
413
  def ensure_model() -> Path|None:
414
+ for p in [DEFAULT_MODEL, *MODEL_FALLBACKS]:
415
+ if p.exists() and p.stat().st_size > 0: return p
416
+ url = os.environ.get("MODEL_URL", "")
417
+ if not url: return None
418
+ try:
419
+ import requests
420
+ DEFAULT_MODEL.parent.mkdir(parents=True, exist_ok=True)
421
+ with requests.get(url, stream=True, timeout=30) as r:
422
+ r.raise_for_status()
423
+ with open(DEFAULT_MODEL, "wb") as f:
424
+ for chunk in r.iter_content(1<<20):
425
+ if chunk: f.write(chunk)
426
+ return DEFAULT_MODEL
427
+ except Exception:
428
+ return None
429
 
430
  mpath = ensure_model()
431
  if not mpath:
432
+ st.error("Model not found. Upload models/gr_rf.joblib (or set MODEL_URL).")
433
+ st.stop()
434
  try:
435
+ model = load_model(str(mpath))
436
  except Exception as e:
437
+ st.error(f"Failed to load model: {e}")
438
+ st.stop()
439
 
440
  meta_path = MODELS_DIR / "meta.json"
441
  if meta_path.exists():
442
+ try:
443
+ meta = json.loads(meta_path.read_text(encoding="utf-8"))
444
+ FEATURES = meta.get("features", FEATURES)
445
+ TARGET = meta.get("target", TARGET)
446
+ except Exception:
447
+ pass
 
 
448
 
449
  # =========================
450
  # Session state
 
456
  st.session_state.setdefault("dev_file_bytes",b"")
457
  st.session_state.setdefault("dev_file_loaded",False)
458
  st.session_state.setdefault("dev_preview",False)
459
+ st.session_state.setdefault("show_preview_modal", False) # New state variable
460
 
461
  # =========================
462
  # Branding in Sidebar
463
  # =========================
464
  st.sidebar.markdown(f"""
465
+ <div class="centered-container">
466
+ <img src="{inline_logo('logo.png')}" style="width: 200px; height: auto; object-fit: contain;">
467
+ <div style='font-weight:800;font-size:1.2rem;'>ST_GeoMech_GR</div>
468
+ <div style='color:#667085;'>Real-Time GR Tracking While Drilling</div>
469
+ </div>
470
+ """, unsafe_allow_html=True
471
  )
472
 
473
+ # =========================
474
+ # Reusable Sticky Header Function
475
+ # =========================
476
  def sticky_header(title, message):
477
+ st.markdown(
478
+ f"""
479
+ <style>
480
+ .sticky-container {{
481
+ position: sticky;
482
+ top: 0;
483
+ background-color: white;
484
+ z-index: 100;
485
+ padding-top: 10px;
486
+ padding-bottom: 10px;
487
+ border-bottom: 1px solid #eee;
488
+ }}
489
+ </style>
490
+ <div class="sticky-container">
491
+ <h3>{title}</h3>
492
+ <p>{message}</p>
493
+ </div>
494
+ """,
495
+ unsafe_allow_html=True
496
+ )
497
 
498
  # =========================
499
  # INTRO
500
  # =========================
501
  if st.session_state.app_step == "intro":
502
+ st.header("Welcome!")
503
+ st.markdown("This software is developed by *Smart Thinking AI-Solutions Team* to estimate GR from drilling data.")
504
+ st.subheader("How It Works")
505
+ st.markdown(
506
+ "1) **Upload your data to build the case and preview the performance of our model.** \n"
507
+ "2) Click **Run Model** to compute metrics and plots. \n"
508
+ "3) **Proceed to Validation** (with actual GR) or **Proceed to Prediction** (no GR)."
509
+ )
510
+ if st.button("Start Showcase", type="primary"):
511
+ st.session_state.app_step = "dev"; st.rerun()
512
 
513
  # =========================
514
  # CASE BUILDING
515
  # =========================
516
  if st.session_state.app_step == "dev":
517
+ st.sidebar.header("Case Building")
518
+ up = st.sidebar.file_uploader("Upload Your Data File", type=["xlsx","xls"])
519
+ if up is not None:
520
+ st.session_state.dev_file_bytes = up.getvalue()
521
+ st.session_state.dev_file_name = up.name
522
+ st.session_state.dev_file_loaded = True
523
+ st.session_state.dev_preview = False
524
+ if st.session_state.dev_file_loaded:
525
+ tmp = read_book_bytes(st.session_state.dev_file_bytes)
526
+ if tmp:
527
+ df0 = next(iter(tmp.values()))
528
+ st.sidebar.caption(f"**Data loaded:** {st.session_state.dev_file_name} • {df0.shape[0]} rows × {df0.shape[1]} cols")
529
+
530
+ if st.sidebar.button("Preview data", use_container_width=True, disabled=not st.session_state.dev_file_loaded):
531
+ st.session_state.show_preview_modal = True # Set state to show modal
532
+ st.session_state.dev_preview = True
533
+
534
+ run = st.sidebar.button("Run Model", type="primary", use_container_width=True)
535
+ if st.sidebar.button("Proceed to Validation ▶", use_container_width=True): st.session_state.app_step="validate"; st.rerun()
536
+ if st.sidebar.button("Proceed to Prediction ▶", use_container_width=True): st.session_state.app_step="predict"; st.rerun()
537
+
538
+ # Apply sticky header
539
+ if st.session_state.dev_file_loaded and st.session_state.dev_preview:
540
+ sticky_header("Case Building", "Previewed ✓ — now click **Run Model**.")
541
+ elif st.session_state.dev_file_loaded:
542
+ sticky_header("Case Building", "📄 **Preview uploaded data** using the sidebar button, then click **Run Model**.")
543
+ else:
544
+ sticky_header("Case Building", "**Upload your data to build a case, then run the model to review development performance.**")
545
+
546
+ if run and st.session_state.dev_file_bytes:
547
+ book = read_book_bytes(st.session_state.dev_file_bytes)
548
+ sh_train = find_sheet(book, ["Train","Training","training2","train","training"])
549
+ sh_test = find_sheet(book, ["Test","Testing","testing2","test","testing"])
550
+ if sh_train is None or sh_test is None:
551
+ st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training/training2 and Test/Testing/testing2 sheets.</div>', unsafe_allow_html=True)
552
+ st.stop()
553
+ tr = book[sh_train].copy(); te = book[sh_test].copy()
554
+ if not (ensure_cols(tr, FEATURES+[TARGET]) and ensure_cols(te, FEATURES+[TARGET])):
555
+ st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True)
556
+ st.stop()
557
+
558
+ # Prediction and inverse transform
559
+ tr[f"{TARGET}_Pred"] = model.predict(tr[FEATURES])
560
+ te[f"{TARGET}_Pred"] = model.predict(te[FEATURES])
561
+ tr[f"{TARGET_DISPLAY_NAME}_Pred"] = np.power(10, tr[f"{TARGET}_Pred"])
562
+ te[f"{TARGET_DISPLAY_NAME}_Pred"] = np.power(10, te[f"{TARGET}_Pred"])
563
+
564
+ tr[TARGET_DISPLAY_NAME] = np.power(10, tr[TARGET])
565
+ te[TARGET_DISPLAY_NAME] = np.power(10, te[TARGET])
566
+
567
+ st.session_state.results["Train"]=tr; st.session_state.results["Test"]=te
568
+ st.session_state.results["m_train"]={
569
+ "R": pearson_r(tr[TARGET_DISPLAY_NAME], tr[f"{TARGET_DISPLAY_NAME}_Pred"]),
570
+ "RMSE": rmse(tr[TARGET_DISPLAY_NAME], tr[f"{TARGET_DISPLAY_NAME}_Pred"]),
571
+ "MAE": mean_absolute_error(tr[TARGET_DISPLAY_NAME], tr[f"{TARGET_DISPLAY_NAME}_Pred"])
572
+ }
573
+ st.session_state.results["m_test"]={
574
+ "R": pearson_r(te[TARGET_DISPLAY_NAME], te[f"{TARGET_DISPLAY_NAME}_Pred"]),
575
+ "RMSE": rmse(te[TARGET_DISPLAY_NAME], te[f"{TARGET_DISPLAY_NAME}_Pred"]),
576
+ "MAE": mean_absolute_error(te[TARGET_DISPLAY_NAME], te[f"{TARGET_DISPLAY_NAME}_Pred"])
577
+ }
578
+
579
+ tr_min = tr[FEATURES].min().to_dict(); tr_max = tr[FEATURES].max().to_dict()
580
+ st.session_state.train_ranges = {f:(float(tr_min[f]), float(tr_max[f])) for f in FEATURES}
581
+ st.markdown('<div class="st-message-box st-success">Case has been built and results are displayed below.</div>', unsafe_allow_html=True)
582
+
583
+ def _dev_block(df, m):
584
+ c1,c2,c3 = st.columns(3)
585
+ c1.metric("R", f"{m['R']:.2f}")
586
+ c2.metric("RMSE", f"{m['RMSE']:.2f}")
587
+ c3.metric("MAE", f"{m['MAE']:.2f}")
588
+
589
+ # NEW: Footer for metric abbreviations
590
+ st.markdown("""
591
+ <div style='text-align: left; font-size: 0.8em; color: #6b7280; margin-top: -16px; margin-bottom: 8px;'>
592
+ <strong>R:</strong> Pearson Correlation Coefficient<br>
593
+ <strong>RMSE:</strong> Root Mean Square Error<br>
594
+ <strong>MAE:</strong> Mean Absolute Error
595
+ </div>
596
+ """, unsafe_allow_html=True)
597
+
598
+ # 2-column layout, big gap (prevents overlap)
599
+ col_track, col_cross = st.columns([2, 3], gap="large")
600
+ with col_track:
601
+ st.plotly_chart(
602
+ track_plot(df, include_actual=True),
603
+ use_container_width=False, # Set to False to honor the width in track_plot()
604
+ config={"displayModeBar": False, "scrollZoom": True}
605
+ )
606
+ with col_cross:
607
+ st.pyplot(cross_plot_static(df[TARGET_DISPLAY_NAME], df[f"{TARGET_DISPLAY_NAME}_Pred"]), use_container_width=False)
608
+
609
+
610
+ if "Train" in st.session_state.results or "Test" in st.session_state.results:
611
+ tab1, tab2 = st.tabs(["Training", "Testing"])
612
+ if "Train" in st.session_state.results:
613
+ with tab1: _dev_block(st.session_state.results["Train"], st.session_state.results["m_train"])
614
+ if "Test" in st.session_state.results:
615
+ with tab2: _dev_block(st.session_state.results["Test"], st.session_state.results["m_test"])
616
 
617
  # =========================
618
  # VALIDATION (with actual GR)
619
  # =========================
620
  if st.session_state.app_step == "validate":
621
+ st.sidebar.header("Validate the Model")
622
+ up = st.sidebar.file_uploader("Upload Validation Excel", type=["xlsx","xls"])
623
+ if up is not None:
624
+ book = read_book_bytes(up.getvalue())
625
+ if book:
626
+ df0 = next(iter(book.values()))
627
+ st.sidebar.caption(f"**Data loaded:** {up.name} • {df0.shape[0]} rows × {df0.shape[1]} cols")
628
+ if st.sidebar.button("Preview data", use_container_width=True, disabled=(up is None)):
629
+ st.session_state.show_preview_modal = True # Set state to show modal
630
+ go_btn = st.sidebar.button("Predict & Validate", type="primary", use_container_width=True)
631
+ if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
632
+ if st.sidebar.button("Proceed to Prediction ▶", use_container_width=True): st.session_state.app_step="predict"; st.rerun()
633
+
634
+ sticky_header("Validate the Model", f"Upload a dataset with the same **features** and **{TARGET}** to evaluate performance.")
635
+
636
+ if go_btn and up is not None:
637
+ book = read_book_bytes(up.getvalue())
638
+ name = find_sheet(book, ["Validation","Validate","validation2","Val","val"]) or list(book.keys())[0]
639
+ df = book[name].copy()
640
+ if not ensure_cols(df, FEATURES+[TARGET]): st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
641
+
642
+ # Prediction and inverse transform
643
+ df[f"{TARGET}_Pred"] = model.predict(df[FEATURES])
644
+ df[f"{TARGET_DISPLAY_NAME}_Pred"] = np.power(10, df[f"{TARGET}_Pred"])
645
+
646
+ df[TARGET_DISPLAY_NAME] = np.power(10, df[TARGET])
647
+
648
+ st.session_state.results["Validate"]=df
649
+
650
+ ranges = st.session_state.train_ranges; oor_pct = 0.0; tbl=None
651
+ if ranges:
652
+ any_viol = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in FEATURES}).any(axis=1)
653
+ oor_pct = float(any_viol.mean()*100.0)
654
+ if any_viol.any():
655
+ tbl = df.loc[any_viol, FEATURES].copy()
656
+ for c in FEATURES:
657
+ if pd.api.types.is_numeric_dtype(tbl[c]): tbl[c] = tbl[c].round(2)
658
+ tbl["Violations"] = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in FEATURES}).loc[any_viol].apply(lambda r:", ".join([c for c,v in r.items() if v]), axis=1)
659
+ st.session_state.results["m_val"]={
660
+ "R": pearson_r(df[TARGET_DISPLAY_NAME], df[f"{TARGET_DISPLAY_NAME}_Pred"]),
661
+ "RMSE": rmse(df[TARGET_DISPLAY_NAME], df[f"{TARGET_DISPLAY_NAME}_Pred"]),
662
+ "MAE": mean_absolute_error(df[TARGET_DISPLAY_NAME], df[f"{TARGET_DISPLAY_NAME}_Pred"])
663
+ }
664
+ st.session_state.results["sv_val"]={"n":len(df),"pred_min":float(df[f"{TARGET_DISPLAY_NAME}_Pred"].min()),"pred_max":float(df[f"{TARGET_DISPLAY_NAME}_Pred"].max()),"oor":oor_pct}
665
+ st.session_state.results["oor_tbl"]=tbl
666
+
667
+ if "Validate" in st.session_state.results:
668
+ m = st.session_state.results["m_val"]
669
+ c1,c2,c3 = st.columns(3)
670
+ c1.metric("R", f"{m['R']:.2f}")
671
+ c2.metric("RMSE", f"{m['RMSE']:.2f}")
672
+ c3.metric("MAE", f"{m['MAE']:.2f}")
673
+
674
+ # NEW: Footer for metric abbreviations
675
+ st.markdown("""
676
+ <div style='text-align: left; font-size: 0.8em; color: #6b7280; margin-top: -16px; margin-bottom: 8px;'>
677
+ <strong>R:</strong> Pearson Correlation Coefficient<br>
678
+ <strong>RMSE:</strong> Root Mean Square Error<br>
679
+ <strong>MAE:</strong> Mean Absolute Error
680
+ </div>
681
+ """, unsafe_allow_html=True)
682
+
683
+ col_track, col_cross = st.columns([2, 3], gap="large")
684
+ with col_track:
685
+ st.plotly_chart(
686
+ track_plot(st.session_state.results["Validate"], include_actual=True),
687
+ use_container_width=False, # Set to False to honor the width in track_plot()
688
+ config={"displayModeBar": False, "scrollZoom": True}
689
+ )
690
+ with col_cross:
691
+ st.pyplot(
692
+ cross_plot_static(st.session_state.results["Validate"][TARGET_DISPLAY_NAME],
693
+ st.session_state.results["Validate"][f"{TARGET_DISPLAY_NAME}_Pred"]),
694
+ use_container_width=False
695
+ )
696
+
697
+ sv = st.session_state.results["sv_val"]
698
+ if sv["oor"] > 0: st.markdown('<div class="st-message-box st-warning">Some inputs fall outside **training min–max** ranges.</div>', unsafe_allow_html=True)
699
+ if st.session_state.results["oor_tbl"] is not None:
700
+ st.write("*Out-of-range rows (vs. Training min–max):*")
701
+ df_centered_rounded(st.session_state.results["oor_tbl"])
 
702
 
703
  # =========================
704
  # PREDICTION (no actual GR)
705
  # =========================
706
  if st.session_state.app_step == "predict":
707
+ st.sidebar.header("Prediction (No Actual GR)")
708
+ up = st.sidebar.file_uploader("Upload Prediction Excel", type=["xlsx","xls"])
709
+ if up is not None:
710
+ book = read_book_bytes(up.getvalue())
711
+ if book:
712
+ df0 = next(iter(book.values()))
713
+ st.sidebar.caption(f"**Data loaded:** {up.name} • {df0.shape[0]} rows × {df0.shape[1]} cols")
714
+ if st.sidebar.button("Preview data", use_container_width=True, disabled=(up is None)):
715
+ st.session_state.show_preview_modal = True # Set state to show modal
716
+ go_btn = st.sidebar.button("Predict", type="primary", use_container_width=True)
717
+ if st.sidebar.button("⬅ Back to Case Building", use_container_width=True): st.session_state.app_step="dev"; st.rerun()
718
+
719
+ sticky_header("Prediction", f"Upload a dataset with the feature columns (no **{TARGET_DISPLAY_NAME}**).")
720
+
721
+ if go_btn and up is not None:
722
+ book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
723
+ df = book[name].copy()
724
+ if not ensure_cols(df, FEATURES): st.markdown('<div class="st-message-box st-error">Missing required columns.</div>', unsafe_allow_html=True); st.stop()
725
+
726
+ # Prediction and inverse transform
727
+ df[f"{TARGET}_Pred"] = model.predict(df[FEATURES])
728
+ df[f"{TARGET_DISPLAY_NAME}_Pred"] = np.power(10, df[f"{TARGET}_Pred"])
729
+
730
+ st.session_state.results["PredictOnly"]=df
731
+
732
+ ranges = st.session_state.train_ranges; oor_pct = 0.0
733
+ if ranges:
734
+ any_viol = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in FEATURES}).any(axis=1)
735
+ oor_pct = float(any_viol.mean()*100.0)
736
+ st.session_state.results["sv_pred"]={
737
+ "n":len(df),
738
+ "pred_min":float(df[f"{TARGET_DISPLAY_NAME}_Pred"].min()),
739
+ "pred_max":float(df[f"{TARGET_DISPLAY_NAME}_Pred"].max()),
740
+ "pred_mean":float(df[f"{TARGET_DISPLAY_NAME}_Pred"].mean()),
741
+ "pred_std":float(df[f"{TARGET_DISPLAY_NAME}_Pred"].std(ddof=0)),
742
+ "oor":oor_pct
743
+ }
744
+
745
+ if "PredictOnly" in st.session_state.results:
746
+ df = st.session_state.results["PredictOnly"]; sv = st.session_state.results["sv_pred"]
747
+
748
+ col_left, col_right = st.columns([2,3], gap="large")
749
+ with col_left:
750
+ table = pd.DataFrame({
751
+ "Metric": ["# points","Pred min","Pred max","Pred mean","Pred std","OOR %"],
752
+ "Value": [sv["n"],
753
+ round(sv["pred_min"],2),
754
+ round(sv["pred_max"],2),
755
+ round(sv["pred_mean"],2),
756
+ round(sv["pred_std"],2),
757
+ f'{sv["oor"]:.1f}%']
758
+ })
759
+ st.markdown('<div class="st-message-box st-success">Predictions ready ✓</div>', unsafe_allow_html=True)
760
+ df_centered_rounded(table, hide_index=True)
761
+ st.caption("**★ OOR** = % of rows whose input features fall outside the training min–max range.")
762
+ with col_right:
763
+ st.plotly_chart(
764
+ track_plot(df, include_actual=False),
765
+ use_container_width=False, # Set to False to honor the width in track_plot()
766
+ config={"displayModeBar": False, "scrollZoom": True}
767
+ )
768
+
769
+ # =========================
770
+ # Run preview modal after all other elements
771
  # =========================
772
  if st.session_state.show_preview_modal:
773
+ # Get the correct book based on the current app step
774
+ book_to_preview = {}
775
+ if st.session_state.app_step == "dev":
776
+ book_to_preview = read_book_bytes(st.session_state.dev_file_bytes)
777
+ elif st.session_state.app_step in ["validate", "predict"] and 'up' in locals() and up is not None:
778
+ book_to_preview = read_book_bytes(up.getvalue())
779
+
780
+ with st.expander("Preview data", expanded=True):
781
+ if not book_to_preview:
782
+ st.markdown('<div class="st-message-box">No data loaded yet.</div>', unsafe_allow_html=True)
783
+ else:
784
+ names = list(book_to_preview.keys())
785
+ tabs = st.tabs(names)
786
+ for t, name in zip(tabs, names):
787
+ with t:
788
+ df = book_to_preview[name]
789
+ t1, t2 = st.tabs(["Tracks", "Summary"])
790
+ with t1:
791
+ st.pyplot(preview_tracks(df, FEATURES), use_container_width=True)
792
+ with t2:
793
+ tbl = (df[FEATURES]
794
+ .agg(['min','max','mean','std'])
795
+ .T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"}))
796
+ df_centered_rounded(tbl.reset_index(names="Feature"))
797
+ # Reset the state variable after the modal is displayed
798
+ st.session_state.show_preview_modal = False
799
  # =========================
800
  # Footer
801
  # =========================
 
804
  <hr>
805
  <div style='text-align:center;color:#6b7280;font-size:1.0em;'>
806
  © 2024 Smart Thinking AI-Solutions Team. All rights reserved.<br>
807
+ Contact: <a href="mailto:smartthinking.com.sa">smartthinking@smartthinking.com.sa</a>
808
  </div>
809
  """, unsafe_allow_html=True)