Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,7 +14,7 @@ import matplotlib.pyplot as plt
|
|
| 14 |
from matplotlib.ticker import FuncFormatter
|
| 15 |
|
| 16 |
import plotly.graph_objects as go
|
| 17 |
-
from sklearn.metrics import mean_squared_error
|
| 18 |
|
| 19 |
# =========================
|
| 20 |
# Constants / Config
|
|
@@ -145,7 +145,20 @@ add_password_gate()
|
|
| 145 |
# =========================
|
| 146 |
# Utilities
|
| 147 |
# =========================
|
| 148 |
-
def rmse(y_true, y_pred):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
|
| 150 |
def render_bo_bd_note():
|
| 151 |
st.markdown(
|
|
@@ -368,7 +381,7 @@ def _x_range_for_tracks(df, cols):
|
|
| 368 |
x_series = pd.concat([pd.to_numeric(df[c], errors="coerce") for c in cols if c in df], ignore_index=True)
|
| 369 |
x_lo, x_hi = float(x_series.min()), float(x_series.max())
|
| 370 |
pad = 0.03 * (x_hi - x_lo if x_hi > x_lo else 1.0)
|
| 371 |
-
xmin, xmax = x_lo - pad, x_hi + pad
|
| 372 |
tick0 = _nice_tick0(xmin, step=max((xmax - xmin)/10.0, 0.1))
|
| 373 |
return xmin, xmax, tick0
|
| 374 |
|
|
@@ -660,10 +673,10 @@ if st.session_state.app_step == "dev":
|
|
| 660 |
te[PRED_BD] = model_bd.predict(Xte_bd)
|
| 661 |
|
| 662 |
st.session_state.results["Train"]=tr; st.session_state.results["Test"]=te
|
| 663 |
-
st.session_state.results["m_train_bo"]={"R": pearson_r(tr[TARGET_BO], tr[PRED_BO]), "RMSE": rmse(tr[TARGET_BO], tr[PRED_BO]), "
|
| 664 |
-
st.session_state.results["m_train_bd"]={"R": pearson_r(tr[TARGET_BD], tr[PRED_BD]), "RMSE": rmse(tr[TARGET_BD], tr[PRED_BD]), "
|
| 665 |
-
st.session_state.results["m_test_bo"] ={"R": pearson_r(te[TARGET_BO], te[PRED_BO]), "RMSE": rmse(te[TARGET_BO], te[PRED_BO]), "
|
| 666 |
-
st.session_state.results["m_test_bd"] ={"R": pearson_r(te[TARGET_BD], te[PRED_BD]), "RMSE": rmse(te[TARGET_BD], te[PRED_BD]), "
|
| 667 |
|
| 668 |
tr_min = tr[st.session_state["FEATURES"]].min().to_dict(); tr_max = tr[st.session_state["FEATURES"]].max().to_dict()
|
| 669 |
st.session_state.train_ranges = {f:(float(tr_min[f]), float(tr_max[f])) for f in st.session_state["FEATURES"]}
|
|
@@ -674,11 +687,11 @@ if st.session_state.app_step == "dev":
|
|
| 674 |
c1, c2, c3 = st.columns(3)
|
| 675 |
c1.metric(f"R ({name})", f"{m['R']:.3f}")
|
| 676 |
c2.metric(f"RMSE ({name})", f"{m['RMSE']:.2f}")
|
| 677 |
-
c3.metric(f"
|
| 678 |
|
| 679 |
def _dev_block(df, mbo, mbd):
|
| 680 |
_metrics_block("BO", mbo); _metrics_block("BD", mbd)
|
| 681 |
-
st.markdown("<div style='text-align:left;font-size:0.8em;color:#6b7280;margin-top:-16px;margin-bottom:8px;'><strong>R</strong> = Pearson correlation • <strong>RMSE
|
| 682 |
t1, t2, t3 = st.tabs(["Breakout", "Breakdown", "Combined"])
|
| 683 |
with t1:
|
| 684 |
left, right = st.columns([3,1], gap="large")
|
|
@@ -755,8 +768,8 @@ if st.session_state.app_step == "validate":
|
|
| 755 |
if pd.api.types.is_numeric_dtype(tbl[c]): tbl[c] = tbl[c].round(2)
|
| 756 |
tbl["Violations"] = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in st.session_state["FEATURES"]}).loc[any_viol].apply(lambda r:", ".join([c for c,v in r.items() if v]), axis=1)
|
| 757 |
|
| 758 |
-
st.session_state.results["m_val_bo"]={"R": pearson_r(df[TARGET_BO], df[PRED_BO]), "RMSE": rmse(df[TARGET_BO], df[PRED_BO]), "
|
| 759 |
-
st.session_state.results["m_val_bd"]={"R": pearson_r(df[TARGET_BD], df[PRED_BD]), "RMSE": rmse(df[TARGET_BD], df[PRED_BD]), "
|
| 760 |
st.session_state.results["sv_val"]={"n":len(df), "bo_min":float(df[PRED_BO].min()), "bo_max":float(df[PRED_BO].max()),
|
| 761 |
"bd_min":float(df[PRED_BD].min()), "bd_max":float(df[PRED_BD].max()), "oor":oor_pct}
|
| 762 |
st.session_state.results["oor_tbl"]=tbl
|
|
@@ -767,12 +780,12 @@ if st.session_state.app_step == "validate":
|
|
| 767 |
c1,c2,c3 = st.columns(3)
|
| 768 |
c1.metric("R (Breakout)", f"{m_bo['R']:.3f}")
|
| 769 |
c2.metric("RMSE (Breakout)", f"{m_bo['RMSE']:.2f}")
|
| 770 |
-
c3.metric("
|
| 771 |
c1,c2,c3 = st.columns(3)
|
| 772 |
c1.metric("R (Breakdown)", f"{m_bd['R']:.3f}")
|
| 773 |
c2.metric("RMSE (Breakdown)", f"{m_bd['RMSE']:.2f}")
|
| 774 |
-
c3.metric("
|
| 775 |
-
st.markdown("<div style='text-align:left;font-size:0.8em;color:#6b7280;margin-top:-16px;margin-bottom:8px;'>R = Pearson correlation</div>", unsafe_allow_html=True)
|
| 776 |
|
| 777 |
t1, t2, t3 = st.tabs(["Breakout", "Breakdown", "Combined"])
|
| 778 |
with t1:
|
|
|
|
| 14 |
from matplotlib.ticker import FuncFormatter
|
| 15 |
|
| 16 |
import plotly.graph_objects as go
|
| 17 |
+
from sklearn.metrics import mean_squared_error # MAPE implemented manually
|
| 18 |
|
| 19 |
# =========================
|
| 20 |
# Constants / Config
|
|
|
|
| 145 |
# =========================
|
| 146 |
# Utilities
|
| 147 |
# =========================
|
| 148 |
+
def rmse(y_true, y_pred):
|
| 149 |
+
return float(np.sqrt(mean_squared_error(y_true, y_pred)))
|
| 150 |
+
|
| 151 |
+
def mape(y_true, y_pred, eps: float = 1e-8) -> float:
|
| 152 |
+
"""
|
| 153 |
+
Mean Absolute Percentage Error in PERCENT.
|
| 154 |
+
Rows where |actual| < eps are ignored to avoid division issues.
|
| 155 |
+
"""
|
| 156 |
+
a = np.asarray(y_true, dtype=float)
|
| 157 |
+
p = np.asarray(y_pred, dtype=float)
|
| 158 |
+
denom = np.where(np.abs(a) < eps, np.nan, np.abs(a))
|
| 159 |
+
pct = np.abs(a - p) / denom * 100.0
|
| 160 |
+
val = np.nanmean(pct)
|
| 161 |
+
return float(val) if np.isfinite(val) else float("nan")
|
| 162 |
|
| 163 |
def render_bo_bd_note():
|
| 164 |
st.markdown(
|
|
|
|
| 381 |
x_series = pd.concat([pd.to_numeric(df[c], errors="coerce") for c in cols if c in df], ignore_index=True)
|
| 382 |
x_lo, x_hi = float(x_series.min()), float(x_series.max())
|
| 383 |
pad = 0.03 * (x_hi - x_lo if x_hi > x_lo else 1.0)
|
| 384 |
+
xmin, xmax = x_lo - x_pad, x_hi + x_pad = x_lo - pad, x_hi + pad # keep variable names consistent
|
| 385 |
tick0 = _nice_tick0(xmin, step=max((xmax - xmin)/10.0, 0.1))
|
| 386 |
return xmin, xmax, tick0
|
| 387 |
|
|
|
|
| 673 |
te[PRED_BD] = model_bd.predict(Xte_bd)
|
| 674 |
|
| 675 |
st.session_state.results["Train"]=tr; st.session_state.results["Test"]=te
|
| 676 |
+
st.session_state.results["m_train_bo"]={"R": pearson_r(tr[TARGET_BO], tr[PRED_BO]), "RMSE": rmse(tr[TARGET_BO], tr[PRED_BO]), "MAPE": mape(tr[TARGET_BO], tr[PRED_BO])}
|
| 677 |
+
st.session_state.results["m_train_bd"]={"R": pearson_r(tr[TARGET_BD], tr[PRED_BD]), "RMSE": rmse(tr[TARGET_BD], tr[PRED_BD]), "MAPE": mape(tr[TARGET_BD], tr[PRED_BD])}
|
| 678 |
+
st.session_state.results["m_test_bo"] ={"R": pearson_r(te[TARGET_BO], te[PRED_BO]), "RMSE": rmse(te[TARGET_BO], te[PRED_BO]), "MAPE": mape(te[TARGET_BO], te[PRED_BO])}
|
| 679 |
+
st.session_state.results["m_test_bd"] ={"R": pearson_r(te[TARGET_BD], te[PRED_BD]), "RMSE": rmse(te[TARGET_BD], te[PRED_BD]), "MAPE": mape(te[TARGET_BD], te[PRED_BD])}
|
| 680 |
|
| 681 |
tr_min = tr[st.session_state["FEATURES"]].min().to_dict(); tr_max = tr[st.session_state["FEATURES"]].max().to_dict()
|
| 682 |
st.session_state.train_ranges = {f:(float(tr_min[f]), float(tr_max[f])) for f in st.session_state["FEATURES"]}
|
|
|
|
| 687 |
c1, c2, c3 = st.columns(3)
|
| 688 |
c1.metric(f"R ({name})", f"{m['R']:.3f}")
|
| 689 |
c2.metric(f"RMSE ({name})", f"{m['RMSE']:.2f}")
|
| 690 |
+
c3.metric(f"MAPE (%) ({name})", f"{m['MAPE']:.2f}%")
|
| 691 |
|
| 692 |
def _dev_block(df, mbo, mbd):
|
| 693 |
_metrics_block("BO", mbo); _metrics_block("BD", mbd)
|
| 694 |
+
st.markdown("<div style='text-align:left;font-size:0.8em;color:#6b7280;margin-top:-16px;margin-bottom:8px;'><strong>R</strong> = Pearson correlation • <strong>RMSE</strong> in MW (pcf) • <strong>MAPE</strong> in %</div>", unsafe_allow_html=True)
|
| 695 |
t1, t2, t3 = st.tabs(["Breakout", "Breakdown", "Combined"])
|
| 696 |
with t1:
|
| 697 |
left, right = st.columns([3,1], gap="large")
|
|
|
|
| 768 |
if pd.api.types.is_numeric_dtype(tbl[c]): tbl[c] = tbl[c].round(2)
|
| 769 |
tbl["Violations"] = pd.DataFrame({f:(df[f]<ranges[f][0])|(df[f]>ranges[f][1]) for f in st.session_state["FEATURES"]}).loc[any_viol].apply(lambda r:", ".join([c for c,v in r.items() if v]), axis=1)
|
| 770 |
|
| 771 |
+
st.session_state.results["m_val_bo"]={"R": pearson_r(df[TARGET_BO], df[PRED_BO]), "RMSE": rmse(df[TARGET_BO], df[PRED_BO]), "MAPE": mape(df[TARGET_BO], df[PRED_BO])}
|
| 772 |
+
st.session_state.results["m_val_bd"]={"R": pearson_r(df[TARGET_BD], df[PRED_BD]), "RMSE": rmse(df[TARGET_BD], df[PRED_BD]), "MAPE": mape(df[TARGET_BD], df[PRED_BD])}
|
| 773 |
st.session_state.results["sv_val"]={"n":len(df), "bo_min":float(df[PRED_BO].min()), "bo_max":float(df[PRED_BO].max()),
|
| 774 |
"bd_min":float(df[PRED_BD].min()), "bd_max":float(df[PRED_BD].max()), "oor":oor_pct}
|
| 775 |
st.session_state.results["oor_tbl"]=tbl
|
|
|
|
| 780 |
c1,c2,c3 = st.columns(3)
|
| 781 |
c1.metric("R (Breakout)", f"{m_bo['R']:.3f}")
|
| 782 |
c2.metric("RMSE (Breakout)", f"{m_bo['RMSE']:.2f}")
|
| 783 |
+
c3.metric("MAPE (%) (Breakout)", f"{m_bo['MAPE']:.2f}%")
|
| 784 |
c1,c2,c3 = st.columns(3)
|
| 785 |
c1.metric("R (Breakdown)", f"{m_bd['R']:.3f}")
|
| 786 |
c2.metric("RMSE (Breakdown)", f"{m_bd['RMSE']:.2f}")
|
| 787 |
+
c3.metric("MAPE (%) (Breakdown)", f"{m_bd['MAPE']:.2f}%")
|
| 788 |
+
st.markdown("<div style='text-align:left;font-size:0.8em;color:#6b7280;margin-top:-16px;margin-bottom:8px;'>R = Pearson correlation • RMSE in MW (pcf) • MAPE in %</div>", unsafe_allow_html=True)
|
| 789 |
|
| 790 |
t1, t2, t3 = st.tabs(["Breakout", "Breakdown", "Combined"])
|
| 791 |
with t1:
|