Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -20,10 +20,12 @@ from sklearn.metrics import mean_squared_error, mean_absolute_error
|
|
| 20 |
# =========================
|
| 21 |
APP_NAME = "ST_GR"
|
| 22 |
TAGLINE = "Gamma Ray Prediction"
|
|
|
|
| 23 |
# If meta.json is present, these will be overridden
|
| 24 |
FEATURES = ["GPM", "SPP", "RPM", "WOB", "T", "ROP"]
|
| 25 |
-
TARGET = "log(GR)"
|
| 26 |
-
|
|
|
|
| 27 |
|
| 28 |
MODELS_DIR = Path("models")
|
| 29 |
DEFAULT_MODEL = MODELS_DIR / "gr_rf.joblib"
|
|
@@ -63,7 +65,7 @@ div[data-testid="stVerticalBlock"] { overflow: unset !important; }
|
|
| 63 |
</style>
|
| 64 |
""", unsafe_allow_html=True)
|
| 65 |
|
| 66 |
-
# Hide uploader helper text
|
| 67 |
st.markdown("""
|
| 68 |
<style>
|
| 69 |
section[data-testid="stFileUploader"] div[data-testid="stMarkdownContainer"]{display:none !important;}
|
|
@@ -166,7 +168,13 @@ def parse_excel(data_bytes: bytes):
|
|
| 166 |
xl = pd.ExcelFile(bio)
|
| 167 |
return {sh: xl.parse(sh) for sh in xl.sheet_names}
|
| 168 |
|
| 169 |
-
def read_book_bytes(b: bytes):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
|
| 171 |
def ensure_cols(df, cols):
|
| 172 |
miss = [c for c in cols if c not in df.columns]
|
|
@@ -211,10 +219,8 @@ def to_actual_series(df: pd.DataFrame, target_col: str, actual_col_hint: str, tr
|
|
| 211 |
"""
|
| 212 |
if actual_col_hint and actual_col_hint in df.columns:
|
| 213 |
return pd.Series(df[actual_col_hint], dtype=float)
|
| 214 |
-
# else, if target exists, invert:
|
| 215 |
if target_col in df.columns:
|
| 216 |
return pd.Series(inverse_target(np.asarray(df[target_col], dtype=float), transform), dtype=float)
|
| 217 |
-
# fallback: if a column named "GR" exists, use it
|
| 218 |
if "GR" in df.columns:
|
| 219 |
return pd.Series(df["GR"], dtype=float)
|
| 220 |
raise ValueError("Cannot find actual GR column or target to invert.")
|
|
@@ -222,8 +228,7 @@ def to_actual_series(df: pd.DataFrame, target_col: str, actual_col_hint: str, tr
|
|
| 222 |
# =========================
|
| 223 |
# Cross plot (Matplotlib) — auto limits for GR
|
| 224 |
# =========================
|
| 225 |
-
def _nice_bounds(arr_min, arr_max, n_ticks=
|
| 226 |
-
# pick a "nice" range and step for GR (typically 0–200+ API)
|
| 227 |
if not np.isfinite(arr_min) or not np.isfinite(arr_max):
|
| 228 |
return 0.0, 100.0, 20.0
|
| 229 |
span = arr_max - arr_min
|
|
@@ -241,7 +246,6 @@ def cross_plot_static(actual, pred):
|
|
| 241 |
a = pd.Series(actual, dtype=float)
|
| 242 |
p = pd.Series(pred, dtype=float)
|
| 243 |
|
| 244 |
-
# auto bounds & ticks for GR
|
| 245 |
lo = min(a.min(), p.min())
|
| 246 |
hi = max(a.max(), p.max())
|
| 247 |
fixed_min, fixed_max, step = _nice_bounds(lo, hi, n_ticks=6)
|
|
@@ -262,7 +266,7 @@ def cross_plot_static(actual, pred):
|
|
| 262 |
ax.set_ylim(fixed_min, fixed_max)
|
| 263 |
ax.set_xticks(ticks)
|
| 264 |
ax.set_yticks(ticks)
|
| 265 |
-
ax.set_aspect("equal", adjustable="box")
|
| 266 |
|
| 267 |
fmt = FuncFormatter(lambda x, _: f"{int(x):,}")
|
| 268 |
ax.xaxis.set_major_formatter(fmt)
|
|
@@ -517,7 +521,7 @@ if st.session_state.app_step == "dev":
|
|
| 517 |
if sh_train is None or sh_test is None:
|
| 518 |
st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training and Test/Testing sheets.</div>', unsafe_allow_html=True)
|
| 519 |
st.stop()
|
| 520 |
-
tr = book[sh_train].copy(); te = book[sh_test].copy()
|
| 521 |
if not (ensure_cols(tr, FEATURES) and ensure_cols(te, FEATURES)):
|
| 522 |
st.markdown('<div class="st-message-box st-error">Missing required feature columns.</div>', unsafe_allow_html=True); st.stop()
|
| 523 |
|
|
@@ -562,9 +566,9 @@ if st.session_state.app_step == "dev":
|
|
| 562 |
|
| 563 |
col_track, col_cross = st.columns([2, 3], gap="large")
|
| 564 |
with col_track:
|
|
|
|
| 565 |
st.plotly_chart(
|
| 566 |
-
track_plot(
|
| 567 |
-
pred_col="GR_Pred", actual_col="GR"),
|
| 568 |
use_container_width=False,
|
| 569 |
config={"displayModeBar": False, "scrollZoom": True}
|
| 570 |
)
|
|
@@ -600,16 +604,18 @@ if st.session_state.app_step == "validate":
|
|
| 600 |
if go_btn and up is not None:
|
| 601 |
book = read_book_bytes(up.getvalue())
|
| 602 |
name = find_sheet(book, ["Validation","Validate","validation2","Val","val"]) or list(book.keys())[0]
|
| 603 |
-
df = book[name].copy()
|
| 604 |
-
if not ensure_cols(df, FEATURES):
|
|
|
|
|
|
|
| 605 |
|
| 606 |
pred_raw = model.predict(df[FEATURES])
|
| 607 |
df["GR_Pred"] = inverse_target(np.asarray(pred_raw, dtype=float), TARGET_TRANSFORM)
|
| 608 |
-
# actual GR
|
| 609 |
try:
|
| 610 |
df["GR_Actual"] = to_actual_series(df, TARGET, ACTUAL_COL, TARGET_TRANSFORM)
|
| 611 |
except Exception:
|
| 612 |
-
st.markdown('<div class="st-message-box st-error">Validation sheet must include actual GR (or a target column that can be inverse-transformed).</div>', unsafe_allow_html=True);
|
|
|
|
| 613 |
|
| 614 |
st.session_state.results["Validate"]=df
|
| 615 |
|
|
@@ -685,8 +691,10 @@ if st.session_state.app_step == "predict":
|
|
| 685 |
|
| 686 |
if go_btn and up is not None:
|
| 687 |
book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
|
| 688 |
-
df = book[name].copy()
|
| 689 |
-
if not ensure_cols(df, FEATURES):
|
|
|
|
|
|
|
| 690 |
|
| 691 |
pred_raw = model.predict(df[FEATURES])
|
| 692 |
df["GR_Pred"] = inverse_target(np.asarray(pred_raw, dtype=float), TARGET_TRANSFORM)
|
|
@@ -724,8 +732,7 @@ if st.session_state.app_step == "predict":
|
|
| 724 |
st.caption("**★ OOR** = % of rows whose input features fall outside the training min–max range.")
|
| 725 |
with col_right:
|
| 726 |
st.plotly_chart(
|
| 727 |
-
track_plot(df
|
| 728 |
-
pred_col="GR_Pred", actual_col="GR"),
|
| 729 |
use_container_width=False, config={"displayModeBar": False, "scrollZoom": True}
|
| 730 |
)
|
| 731 |
|
|
@@ -747,15 +754,23 @@ if st.session_state.show_preview_modal:
|
|
| 747 |
tabs = st.tabs(names)
|
| 748 |
for t, name in zip(tabs, names):
|
| 749 |
with t:
|
| 750 |
-
df = book_to_preview[name]
|
| 751 |
t1, t2 = st.tabs(["Tracks", "Summary"])
|
| 752 |
with t1:
|
| 753 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 754 |
with t2:
|
| 755 |
-
|
| 756 |
-
|
| 757 |
-
|
| 758 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 759 |
st.session_state.show_preview_modal = False
|
| 760 |
|
| 761 |
# =========================
|
|
@@ -767,4 +782,4 @@ st.markdown("""
|
|
| 767 |
<div style='text-align:center;color:#6b7280;font-size:0.8em;'>
|
| 768 |
© 2024 Smart Thinking AI-Solutions Team. All rights reserved.
|
| 769 |
</div>
|
| 770 |
-
""", unsafe_allow_html=True)
|
|
|
|
| 20 |
# =========================
|
| 21 |
APP_NAME = "ST_GR"
|
| 22 |
TAGLINE = "Gamma Ray Prediction"
|
| 23 |
+
|
| 24 |
# If meta.json is present, these will be overridden
|
| 25 |
FEATURES = ["GPM", "SPP", "RPM", "WOB", "T", "ROP"]
|
| 26 |
+
TARGET = "log(GR)" # or "GR" if your model predicts GR directly
|
| 27 |
+
TARGET_TRANSFORM = "ln" # "ln" (exp back), "log10" (10**x), or "none"
|
| 28 |
+
ACTUAL_COL = "GR" # where the actual API values live (if present)
|
| 29 |
|
| 30 |
MODELS_DIR = Path("models")
|
| 31 |
DEFAULT_MODEL = MODELS_DIR / "gr_rf.joblib"
|
|
|
|
| 65 |
</style>
|
| 66 |
""", unsafe_allow_html=True)
|
| 67 |
|
| 68 |
+
# Hide uploader helper text
|
| 69 |
st.markdown("""
|
| 70 |
<style>
|
| 71 |
section[data-testid="stFileUploader"] div[data-testid="stMarkdownContainer"]{display:none !important;}
|
|
|
|
| 168 |
xl = pd.ExcelFile(bio)
|
| 169 |
return {sh: xl.parse(sh) for sh in xl.sheet_names}
|
| 170 |
|
| 171 |
+
def read_book_bytes(b: bytes):
|
| 172 |
+
return parse_excel(b) if b else {}
|
| 173 |
+
|
| 174 |
+
def normalize_df(df: pd.DataFrame) -> pd.DataFrame:
|
| 175 |
+
out = df.copy()
|
| 176 |
+
out.columns = [str(c).strip() for c in out.columns]
|
| 177 |
+
return out
|
| 178 |
|
| 179 |
def ensure_cols(df, cols):
|
| 180 |
miss = [c for c in cols if c not in df.columns]
|
|
|
|
| 219 |
"""
|
| 220 |
if actual_col_hint and actual_col_hint in df.columns:
|
| 221 |
return pd.Series(df[actual_col_hint], dtype=float)
|
|
|
|
| 222 |
if target_col in df.columns:
|
| 223 |
return pd.Series(inverse_target(np.asarray(df[target_col], dtype=float), transform), dtype=float)
|
|
|
|
| 224 |
if "GR" in df.columns:
|
| 225 |
return pd.Series(df["GR"], dtype=float)
|
| 226 |
raise ValueError("Cannot find actual GR column or target to invert.")
|
|
|
|
| 228 |
# =========================
|
| 229 |
# Cross plot (Matplotlib) — auto limits for GR
|
| 230 |
# =========================
|
| 231 |
+
def _nice_bounds(arr_min, arr_max, n_ticks=6):
|
|
|
|
| 232 |
if not np.isfinite(arr_min) or not np.isfinite(arr_max):
|
| 233 |
return 0.0, 100.0, 20.0
|
| 234 |
span = arr_max - arr_min
|
|
|
|
| 246 |
a = pd.Series(actual, dtype=float)
|
| 247 |
p = pd.Series(pred, dtype=float)
|
| 248 |
|
|
|
|
| 249 |
lo = min(a.min(), p.min())
|
| 250 |
hi = max(a.max(), p.max())
|
| 251 |
fixed_min, fixed_max, step = _nice_bounds(lo, hi, n_ticks=6)
|
|
|
|
| 266 |
ax.set_ylim(fixed_min, fixed_max)
|
| 267 |
ax.set_xticks(ticks)
|
| 268 |
ax.set_yticks(ticks)
|
| 269 |
+
ax.set_aspect("equal", adjustable="box")
|
| 270 |
|
| 271 |
fmt = FuncFormatter(lambda x, _: f"{int(x):,}")
|
| 272 |
ax.xaxis.set_major_formatter(fmt)
|
|
|
|
| 521 |
if sh_train is None or sh_test is None:
|
| 522 |
st.markdown('<div class="st-message-box st-error">Workbook must include Train/Training and Test/Testing sheets.</div>', unsafe_allow_html=True)
|
| 523 |
st.stop()
|
| 524 |
+
tr = normalize_df(book[sh_train].copy()); te = normalize_df(book[sh_test].copy())
|
| 525 |
if not (ensure_cols(tr, FEATURES) and ensure_cols(te, FEATURES)):
|
| 526 |
st.markdown('<div class="st-message-box st-error">Missing required feature columns.</div>', unsafe_allow_html=True); st.stop()
|
| 527 |
|
|
|
|
| 566 |
|
| 567 |
col_track, col_cross = st.columns([2, 3], gap="large")
|
| 568 |
with col_track:
|
| 569 |
+
df_for_plot = df.rename(columns={"GR_Actual":"GR"})
|
| 570 |
st.plotly_chart(
|
| 571 |
+
track_plot(df_for_plot, include_actual=True, pred_col="GR_Pred", actual_col="GR"),
|
|
|
|
| 572 |
use_container_width=False,
|
| 573 |
config={"displayModeBar": False, "scrollZoom": True}
|
| 574 |
)
|
|
|
|
| 604 |
if go_btn and up is not None:
|
| 605 |
book = read_book_bytes(up.getvalue())
|
| 606 |
name = find_sheet(book, ["Validation","Validate","validation2","Val","val"]) or list(book.keys())[0]
|
| 607 |
+
df = normalize_df(book[name].copy())
|
| 608 |
+
if not ensure_cols(df, FEATURES):
|
| 609 |
+
st.markdown('<div class="st-message-box st-error">Missing required feature columns.</div>', unsafe_allow_html=True);
|
| 610 |
+
st.stop()
|
| 611 |
|
| 612 |
pred_raw = model.predict(df[FEATURES])
|
| 613 |
df["GR_Pred"] = inverse_target(np.asarray(pred_raw, dtype=float), TARGET_TRANSFORM)
|
|
|
|
| 614 |
try:
|
| 615 |
df["GR_Actual"] = to_actual_series(df, TARGET, ACTUAL_COL, TARGET_TRANSFORM)
|
| 616 |
except Exception:
|
| 617 |
+
st.markdown('<div class="st-message-box st-error">Validation sheet must include actual GR (or a target column that can be inverse-transformed).</div>', unsafe_allow_html=True);
|
| 618 |
+
st.stop()
|
| 619 |
|
| 620 |
st.session_state.results["Validate"]=df
|
| 621 |
|
|
|
|
| 691 |
|
| 692 |
if go_btn and up is not None:
|
| 693 |
book = read_book_bytes(up.getvalue()); name = list(book.keys())[0]
|
| 694 |
+
df = normalize_df(book[name].copy())
|
| 695 |
+
if not ensure_cols(df, FEATURES):
|
| 696 |
+
st.markdown('<div class="st-message-box st-error">Missing required feature columns.</div>', unsafe_allow_html=True);
|
| 697 |
+
st.stop()
|
| 698 |
|
| 699 |
pred_raw = model.predict(df[FEATURES])
|
| 700 |
df["GR_Pred"] = inverse_target(np.asarray(pred_raw, dtype=float), TARGET_TRANSFORM)
|
|
|
|
| 732 |
st.caption("**★ OOR** = % of rows whose input features fall outside the training min–max range.")
|
| 733 |
with col_right:
|
| 734 |
st.plotly_chart(
|
| 735 |
+
track_plot(df, include_actual=False, pred_col="GR_Pred", actual_col="GR"),
|
|
|
|
| 736 |
use_container_width=False, config={"displayModeBar": False, "scrollZoom": True}
|
| 737 |
)
|
| 738 |
|
|
|
|
| 754 |
tabs = st.tabs(names)
|
| 755 |
for t, name in zip(tabs, names):
|
| 756 |
with t:
|
| 757 |
+
df = normalize_df(book_to_preview[name])
|
| 758 |
t1, t2 = st.tabs(["Tracks", "Summary"])
|
| 759 |
with t1:
|
| 760 |
+
if any(c in df.columns for c in FEATURES):
|
| 761 |
+
st.pyplot(preview_tracks(df, FEATURES), use_container_width=True)
|
| 762 |
+
else:
|
| 763 |
+
st.info(f"None of the expected feature columns were found in this sheet. "
|
| 764 |
+
f"Expected any of: {FEATURES}. Found: {list(df.columns)}")
|
| 765 |
with t2:
|
| 766 |
+
present = [c for c in FEATURES if c in df.columns]
|
| 767 |
+
if present:
|
| 768 |
+
tbl = (df[present]
|
| 769 |
+
.agg(['min','max','mean','std'])
|
| 770 |
+
.T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"}))
|
| 771 |
+
df_centered_rounded(tbl.reset_index(names="Feature"))
|
| 772 |
+
else:
|
| 773 |
+
st.info("No expected feature columns found to summarize.")
|
| 774 |
st.session_state.show_preview_modal = False
|
| 775 |
|
| 776 |
# =========================
|
|
|
|
| 782 |
<div style='text-align:center;color:#6b7280;font-size:0.8em;'>
|
| 783 |
© 2024 Smart Thinking AI-Solutions Team. All rights reserved.
|
| 784 |
</div>
|
| 785 |
+
""", unsafe_allow_html=True)
|