Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -200,7 +200,7 @@ def df_centered_rounded(df: pd.DataFrame, hide_index=True):
|
|
| 200 |
)
|
| 201 |
st.dataframe(styler, use_container_width=True, hide_index=hide_index)
|
| 202 |
|
| 203 |
-
# === Excel export helpers
|
| 204 |
def _excel_engine() -> str:
|
| 205 |
try:
|
| 206 |
import xlsxwriter # noqa: F401
|
|
@@ -228,16 +228,50 @@ def _summary_table(df: pd.DataFrame, cols: list[str]) -> pd.DataFrame:
|
|
| 228 |
.agg(['min','max','mean','std'])
|
| 229 |
.T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"})
|
| 230 |
.reset_index(names="Field"))
|
| 231 |
-
return
|
| 232 |
|
| 233 |
def _train_ranges_df(ranges: dict[str, tuple[float, float]]) -> pd.DataFrame:
|
| 234 |
if not ranges:
|
| 235 |
return pd.DataFrame()
|
| 236 |
df = pd.DataFrame(ranges).T.reset_index()
|
| 237 |
df.columns = ["Feature", "Min", "Max"]
|
| 238 |
-
return
|
| 239 |
|
| 240 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
res = st.session_state.get("results", {})
|
| 242 |
if not res:
|
| 243 |
return None, None, []
|
|
@@ -246,93 +280,116 @@ def build_export_workbook() -> tuple[bytes|None, str|None, list[str]]:
|
|
| 246 |
order: list[str] = []
|
| 247 |
|
| 248 |
# Training
|
| 249 |
-
if "Train" in res:
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
s = _summary_table(tr, tr_cols)
|
| 257 |
-
if not s.empty:
|
| 258 |
-
sheets["Training_Summary"] = s; order.append("Training_Summary")
|
| 259 |
|
| 260 |
# Testing
|
| 261 |
-
if "Test" in res:
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
s = _summary_table(te, te_cols)
|
| 269 |
-
if not s.empty:
|
| 270 |
-
sheets["Testing_Summary"] = s; order.append("Testing_Summary")
|
| 271 |
|
| 272 |
# Validation
|
| 273 |
-
if "Validate" in res:
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
if "
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
if
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
|
|
|
|
|
|
| 310 |
bio = io.BytesIO()
|
| 311 |
-
|
|
|
|
| 312 |
for name in order:
|
| 313 |
df = sheets[name]
|
| 314 |
df.to_excel(writer, sheet_name=_excel_safe_name(name), index=False)
|
|
|
|
| 315 |
bio.seek(0)
|
| 316 |
|
| 317 |
-
fname = f"
|
| 318 |
return bio.getvalue(), fname, order
|
| 319 |
|
| 320 |
def render_export_button(key: str = "export_main") -> None:
|
| 321 |
-
data, fname, names = build_export_workbook()
|
| 322 |
st.divider()
|
| 323 |
st.markdown("### Export to Excel")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 324 |
if names:
|
| 325 |
-
st.caption("
|
| 326 |
st.download_button(
|
| 327 |
label="⬇️ Export Excel",
|
| 328 |
data=(data or b""),
|
| 329 |
-
file_name=(
|
| 330 |
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
| 331 |
disabled=(data is None),
|
| 332 |
-
help="Exports
|
| 333 |
key=key,
|
| 334 |
)
|
| 335 |
-
|
| 336 |
# =========================
|
| 337 |
# Cross plot (Matplotlib) — auto-scaled for Ym
|
| 338 |
# =========================
|
|
@@ -441,12 +498,17 @@ def track_plot(df, include_actual=True):
|
|
| 441 |
showline=True, linewidth=1.2, linecolor="#444", mirror=True,
|
| 442 |
showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True
|
| 443 |
)
|
| 444 |
-
|
| 445 |
-
|
|
|
|
| 446 |
title_font=dict(size=20, family=BOLD_FONT, color="#000"),
|
| 447 |
tickfont=dict(size=15, family=BOLD_FONT, color="#000"),
|
| 448 |
-
|
|
|
|
| 449 |
ticks="outside",
|
|
|
|
|
|
|
|
|
|
| 450 |
showline=True, linewidth=1.2, linecolor="#444", mirror=True,
|
| 451 |
showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True
|
| 452 |
)
|
|
|
|
| 200 |
)
|
| 201 |
st.dataframe(styler, use_container_width=True, hide_index=hide_index)
|
| 202 |
|
| 203 |
+
# === Excel export helpers =================================================
|
| 204 |
def _excel_engine() -> str:
|
| 205 |
try:
|
| 206 |
import xlsxwriter # noqa: F401
|
|
|
|
| 228 |
.agg(['min','max','mean','std'])
|
| 229 |
.T.rename(columns={"min":"Min","max":"Max","mean":"Mean","std":"Std"})
|
| 230 |
.reset_index(names="Field"))
|
| 231 |
+
return tbl
|
| 232 |
|
| 233 |
def _train_ranges_df(ranges: dict[str, tuple[float, float]]) -> pd.DataFrame:
|
| 234 |
if not ranges:
|
| 235 |
return pd.DataFrame()
|
| 236 |
df = pd.DataFrame(ranges).T.reset_index()
|
| 237 |
df.columns = ["Feature", "Min", "Max"]
|
| 238 |
+
return df
|
| 239 |
|
| 240 |
+
def _excel_autofit(writer, sheet_name: str, df: pd.DataFrame, min_w: int = 8, max_w: int = 40):
|
| 241 |
+
"""Auto-fit columns when using xlsxwriter."""
|
| 242 |
+
try:
|
| 243 |
+
import xlsxwriter # noqa: F401
|
| 244 |
+
except Exception:
|
| 245 |
+
return
|
| 246 |
+
ws = writer.sheets[sheet_name]
|
| 247 |
+
# header
|
| 248 |
+
for i, col in enumerate(df.columns):
|
| 249 |
+
series = df[col].astype(str)
|
| 250 |
+
max_len = max([len(str(col))] + series.map(len).tolist())
|
| 251 |
+
ws.set_column(i, i, max(min_w, min(max_len + 2, max_w)))
|
| 252 |
+
# freeze header row
|
| 253 |
+
ws.freeze_panes(1, 0)
|
| 254 |
+
|
| 255 |
+
def _add_sheet(sheets: dict, order: list, name: str, df: pd.DataFrame, ndigits: int):
|
| 256 |
+
if df is None or df.empty:
|
| 257 |
+
return
|
| 258 |
+
sheets[name] = _round_numeric(df, ndigits)
|
| 259 |
+
order.append(name)
|
| 260 |
+
|
| 261 |
+
def _available_sections():
|
| 262 |
+
"""Compute which sections exist, to build a sensible default list."""
|
| 263 |
+
res = st.session_state.get("results", {})
|
| 264 |
+
sections = []
|
| 265 |
+
if "Train" in res: sections += ["Training","Training_Metrics","Training_Summary"]
|
| 266 |
+
if "Test" in res: sections += ["Testing","Testing_Metrics","Testing_Summary"]
|
| 267 |
+
if "Validate" in res: sections += ["Validation","Validation_Metrics","Validation_Summary","Validation_OOR"]
|
| 268 |
+
if "PredictOnly" in res: sections += ["Prediction","Prediction_Summary","Prediction_OOR"]
|
| 269 |
+
if st.session_state.get("train_ranges"): sections += ["Training_Ranges"]
|
| 270 |
+
sections += ["Info"]
|
| 271 |
+
return sections
|
| 272 |
+
|
| 273 |
+
def build_export_workbook(selected: list[str], ndigits: int = 2) -> tuple[bytes|None, str|None, list[str]]:
|
| 274 |
+
"""Builds an in-memory Excel workbook based on selected sheet names."""
|
| 275 |
res = st.session_state.get("results", {})
|
| 276 |
if not res:
|
| 277 |
return None, None, []
|
|
|
|
| 280 |
order: list[str] = []
|
| 281 |
|
| 282 |
# Training
|
| 283 |
+
if "Training" in selected and "Train" in res:
|
| 284 |
+
_add_sheet(sheets, order, "Training", res["Train"], ndigits)
|
| 285 |
+
if "Training_Metrics" in selected and res.get("m_train"):
|
| 286 |
+
_add_sheet(sheets, order, "Training_Metrics", pd.DataFrame([res["m_train"]]), ndigits)
|
| 287 |
+
if "Training_Summary" in selected and "Train" in res:
|
| 288 |
+
tr_cols = FEATURES + [c for c in ["GR_Actual","GR_Pred"] if c in res["Train"].columns]
|
| 289 |
+
_add_sheet(sheets, order, "Training_Summary", _summary_table(res["Train"], tr_cols), ndigits)
|
|
|
|
|
|
|
|
|
|
| 290 |
|
| 291 |
# Testing
|
| 292 |
+
if "Testing" in selected and "Test" in res:
|
| 293 |
+
_add_sheet(sheets, order, "Testing", res["Test"], ndigits)
|
| 294 |
+
if "Testing_Metrics" in selected and res.get("m_test"):
|
| 295 |
+
_add_sheet(sheets, order, "Testing_Metrics", pd.DataFrame([res["m_test"]]), ndigits)
|
| 296 |
+
if "Testing_Summary" in selected and "Test" in res:
|
| 297 |
+
te_cols = FEATURES + [c for c in ["GR_Actual","GR_Pred"] if c in res["Test"].columns]
|
| 298 |
+
_add_sheet(sheets, order, "Testing_Summary", _summary_table(res["Test"], te_cols), ndigits)
|
|
|
|
|
|
|
|
|
|
| 299 |
|
| 300 |
# Validation
|
| 301 |
+
if "Validation" in selected and "Validate" in res:
|
| 302 |
+
_add_sheet(sheets, order, "Validation", res["Validate"], ndigits)
|
| 303 |
+
if "Validation_Metrics" in selected and res.get("m_val"):
|
| 304 |
+
_add_sheet(sheets, order, "Validation_Metrics", pd.DataFrame([res["m_val"]]), ndigits)
|
| 305 |
+
if "Validation_Summary" in selected and res.get("sv_val"):
|
| 306 |
+
_add_sheet(sheets, order, "Validation_Summary", pd.DataFrame([res["sv_val"]]), ndigits)
|
| 307 |
+
if "Validation_OOR" in selected and isinstance(res.get("oor_tbl"), pd.DataFrame) and not res["oor_tbl"].empty:
|
| 308 |
+
_add_sheet(sheets, order, "Validation_OOR", res["oor_tbl"].reset_index(drop=True), ndigits)
|
| 309 |
+
|
| 310 |
+
# Prediction
|
| 311 |
+
if "Prediction" in selected and "PredictOnly" in res:
|
| 312 |
+
_add_sheet(sheets, order, "Prediction", res["PredictOnly"], ndigits)
|
| 313 |
+
if "Prediction_Summary" in selected and res.get("sv_pred"):
|
| 314 |
+
_add_sheet(sheets, order, "Prediction_Summary", pd.DataFrame([res["sv_pred"]]), ndigits)
|
| 315 |
+
if "Prediction_OOR" in selected and isinstance(res.get("oor_tbl_pred"), pd.DataFrame) and not res["oor_tbl_pred"].empty:
|
| 316 |
+
_add_sheet(sheets, order, "Prediction_OOR", res["oor_tbl_pred"].reset_index(drop=True), ndigits)
|
| 317 |
+
|
| 318 |
+
# Training ranges
|
| 319 |
+
if "Training_Ranges" in selected and st.session_state.get("train_ranges"):
|
| 320 |
+
rr = _train_ranges_df(st.session_state["train_ranges"])
|
| 321 |
+
_add_sheet(sheets, order, "Training_Ranges", rr, ndigits)
|
| 322 |
+
|
| 323 |
+
# Info
|
| 324 |
+
if "Info" in selected:
|
| 325 |
+
info = pd.DataFrame([
|
| 326 |
+
{"Key": "AppName", "Value": APP_NAME},
|
| 327 |
+
{"Key": "Tagline", "Value": TAGLINE},
|
| 328 |
+
{"Key": "Target", "Value": TARGET},
|
| 329 |
+
{"Key": "TargetTransform", "Value": TARGET_TRANSFORM},
|
| 330 |
+
{"Key": "ActualColumn", "Value": ACTUAL_COL},
|
| 331 |
+
{"Key": "Features", "Value": ", ".join(FEATURES)},
|
| 332 |
+
{"Key": "ExportedAt", "Value": datetime.now().strftime("%Y-%m-%d %H:%M:%S")},
|
| 333 |
+
])
|
| 334 |
+
_add_sheet(sheets, order, "Info", info, ndigits)
|
| 335 |
+
|
| 336 |
+
if not order:
|
| 337 |
+
return None, None, []
|
| 338 |
+
|
| 339 |
+
# Write workbook to memory
|
| 340 |
bio = io.BytesIO()
|
| 341 |
+
engine = _excel_engine()
|
| 342 |
+
with pd.ExcelWriter(bio, engine=engine) as writer:
|
| 343 |
for name in order:
|
| 344 |
df = sheets[name]
|
| 345 |
df.to_excel(writer, sheet_name=_excel_safe_name(name), index=False)
|
| 346 |
+
_excel_autofit(writer, _excel_safe_name(name), df)
|
| 347 |
bio.seek(0)
|
| 348 |
|
| 349 |
+
fname = f"GR_Export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
|
| 350 |
return bio.getvalue(), fname, order
|
| 351 |
|
| 352 |
def render_export_button(key: str = "export_main") -> None:
|
|
|
|
| 353 |
st.divider()
|
| 354 |
st.markdown("### Export to Excel")
|
| 355 |
+
|
| 356 |
+
# Defaults: include everything that currently exists
|
| 357 |
+
default_sections = _available_sections()
|
| 358 |
+
all_sections = [
|
| 359 |
+
"Training","Training_Metrics","Training_Summary",
|
| 360 |
+
"Testing","Testing_Metrics","Testing_Summary",
|
| 361 |
+
"Validation","Validation_Metrics","Validation_Summary","Validation_OOR",
|
| 362 |
+
"Prediction","Prediction_Summary","Prediction_OOR",
|
| 363 |
+
"Training_Ranges","Info"
|
| 364 |
+
]
|
| 365 |
+
selected = st.multiselect(
|
| 366 |
+
"Sheets to include",
|
| 367 |
+
options=all_sections,
|
| 368 |
+
default=default_sections,
|
| 369 |
+
help="Choose which sheets to include in the Excel export."
|
| 370 |
+
)
|
| 371 |
+
|
| 372 |
+
c1, c2, c3 = st.columns([1,1,2])
|
| 373 |
+
with c1:
|
| 374 |
+
ndigits = st.number_input("Rounding (decimals)", min_value=0, max_value=6, value=2, step=1)
|
| 375 |
+
with c2:
|
| 376 |
+
base_name = st.text_input("Base filename", value="GR_Export")
|
| 377 |
+
with c3:
|
| 378 |
+
st.caption("• Columns auto-fit & header row frozen (if xlsxwriter is available).")
|
| 379 |
+
|
| 380 |
+
data, default_fname, names = build_export_workbook(selected=selected, ndigits=int(ndigits))
|
| 381 |
+
|
| 382 |
if names:
|
| 383 |
+
st.caption("Will include: " + ", ".join(names))
|
| 384 |
st.download_button(
|
| 385 |
label="⬇️ Export Excel",
|
| 386 |
data=(data or b""),
|
| 387 |
+
file_name=((base_name or "GR_Export") + "_" + datetime.now().strftime("%Y%m%d_%H%M%S") + ".xlsx") if data else "GR_Export.xlsx",
|
| 388 |
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
| 389 |
disabled=(data is None),
|
| 390 |
+
help="Exports selected sheets with optional rounding, auto-fit columns, and frozen headers.",
|
| 391 |
key=key,
|
| 392 |
)
|
|
|
|
| 393 |
# =========================
|
| 394 |
# Cross plot (Matplotlib) — auto-scaled for Ym
|
| 395 |
# =========================
|
|
|
|
| 498 |
showline=True, linewidth=1.2, linecolor="#444", mirror=True,
|
| 499 |
showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True
|
| 500 |
)
|
| 501 |
+
|
| 502 |
+
fig.update_xaxes(
|
| 503 |
+
title_text="Ym",
|
| 504 |
title_font=dict(size=20, family=BOLD_FONT, color="#000"),
|
| 505 |
tickfont=dict(size=15, family=BOLD_FONT, color="#000"),
|
| 506 |
+
side="top",
|
| 507 |
+
range=[xmin, xmax],
|
| 508 |
ticks="outside",
|
| 509 |
+
tickformat=",.0f", # ← integer, thousands separated, no decimals
|
| 510 |
+
tickmode="auto",
|
| 511 |
+
tick0=tick0,
|
| 512 |
showline=True, linewidth=1.2, linecolor="#444", mirror=True,
|
| 513 |
showgrid=True, gridcolor="rgba(0,0,0,0.12)", automargin=True
|
| 514 |
)
|