Spaces:

HuggingAI4Engineering
/

cadgenbench-leaderboard

Running

Michael Rabinovich commited on 4 days ago

Commit

a662bfa

1 Parent(s): c78e980

leaderboard: drop silent fallback; boot resilient on Hub read failure

The leaderboard previously fell back to a bundled/local results.jsonl on
any Hub error, which let an under-scoped Space HF_TOKEN silently serve
stale baked-in rows that looked up-to-date. Remove the fallback: the live
results.jsonl is the single source of truth and any read failure raises
LeaderboardDataError.

To stay robust rather than crash the Space at construction time, app.py
wraps the readers (_safe_load_split / _safe_load_admin): a failed read
yields empty, correctly-shaped tables plus a loud, persistent banner and
a gr.Warning toast on refresh / Timer tick, and is logged via
logger.exception. No stale or cached data is ever shown in place of a
failed read.

Also git rm the bundled results.jsonl and add regression tests for the
no-fallback contract and the boot-resilience wrappers.

Files changed (5) hide show

app.py +148 -20
leaderboard.py +39 -34
results.jsonl +0 -0
tests/test_leaderboard.py +13 -4
tests/test_proxy.py +82 -0

app.py CHANGED Viewed

@@ -43,9 +43,12 @@ from leaderboard import (
     ADMIN_SELECT_COL,
     HF_DATA_REPO,
     HF_SUBMISSIONS_REPO,
     LEADERBOARD_DATATYPES,
     LEADERBOARD_HIDE_COLUMNS,
     VALIDATED_LEADERBOARD_DATATYPES,
     _fmt_timestamp,
     build_combined_csv,
     load_admin_table,
@@ -154,15 +157,100 @@ def _build_report_iframe(html_bytes: bytes) -> str:
     )
 def _refresh_leaderboard_with_toast():
-    """Manual Refresh button handler: toast + fresh DataFrames.
-    The Timer auto-refresh wires straight to ``load_leaderboard_split``
-    so it stays silent (a toast every 10s would be noise). Only the
-    explicit click goes through this wrapper.
     """
-    gr.Info("Leaderboard refreshed.")
-    return load_leaderboard_split()
 def _enable_submit_when_logged_in(
@@ -238,6 +326,19 @@ def _arm_delete(
     return gr.Button(interactive=bool(confirm) and is_admin(profile))
 def _admin_promote(
     table_df: pd.DataFrame | None,
     method: str | None,
@@ -260,8 +361,9 @@ def _admin_promote(
     except (LookupError, ValueError) as e:
         raise gr.Error(str(e))
     gr.Info(f"Promoted {len(ids)} row(s) to validated ({method}).")
-    validated, unvalidated = load_leaderboard_split()
-    return load_admin_table(), validated, unvalidated
 def _admin_demote(
@@ -279,8 +381,9 @@ def _admin_demote(
     except (LookupError, ValueError) as e:
         raise gr.Error(str(e))
     gr.Info(f"Demoted {len(ids)} row(s) to unvalidated.")
-    validated, unvalidated = load_leaderboard_split()
-    return load_admin_table(), validated, unvalidated
 def _admin_delete(
@@ -305,9 +408,10 @@ def _admin_delete(
     except ValueError as e:
         raise gr.Error(str(e))
     gr.Info(f"Deleted {len(ids)} submission(s).")
-    validated, unvalidated = load_leaderboard_split()
     return (
-        load_admin_table(),
         validated,
         unvalidated,
         gr.Checkbox(value=False),
@@ -416,6 +520,23 @@ with gr.Blocks(title="CADGenBench Leaderboard", theme=gr.themes.Soft()) as block
     )
     with gr.Tab("Leaderboard"):
         # Collapsed accordions above the tables. Validation guidelines
         # gives the short two-tier story + link to the full policy
         # doc; Citation carries the verbatim BibTeX entry. Both start
@@ -436,7 +557,8 @@ with gr.Blocks(title="CADGenBench Leaderboard", theme=gr.themes.Soft()) as block
         # on top so the curated results are above the fold; unvalidated
         # below carries every other row (auto-published, awaiting
         # methodology review). See decisions/validation-policy.md.
-        initial_validated, initial_unvalidated = load_leaderboard_split()
         validated_view = Leaderboard(
             value=initial_validated,
             datatype=VALIDATED_LEADERBOARD_DATATYPES,
@@ -464,7 +586,7 @@ with gr.Blocks(title="CADGenBench Leaderboard", theme=gr.themes.Soft()) as block
             )
         refresh_btn.click(
             fn=_refresh_leaderboard_with_toast,
-            outputs=[validated_view, unvalidated_view],
         )
         download_btn.click(fn=build_combined_csv, outputs=download_btn)
@@ -558,8 +680,11 @@ to publish the resulting row on the public leaderboard.
         )
         # Only the leading `select` column is editable; the rest is
         # read-only context. Click-to-tick drives every action below.
         admin_table = gr.Dataframe(
-            value=load_admin_table(),
             datatype=[
                 "bool", "str", "str", "str", "str", "str", "str", "number",
                 "str",
@@ -626,16 +751,19 @@ to publish the resulting row on the public leaderboard.
                 delete_confirm, delete_btn,
             ],
         )
-        admin_refresh_btn.click(fn=load_admin_table, outputs=admin_table)
     # gradio_leaderboard.Leaderboard handles its own update path
     # cleanly; bind a Timer to push fresh dataframes every 10 seconds.
-    # Single tick runs `load_leaderboard_split` once and pushes the
-    # tuple's two halves into the validated / unvalidated widgets.
     auto_refresh_timer = gr.Timer(10)
     auto_refresh_timer.tick(
-        fn=load_leaderboard_split,
-        outputs=[validated_view, unvalidated_view],
     )
     # On page load, read the visitor's OAuth profile (None if not

     ADMIN_SELECT_COL,
     HF_DATA_REPO,
     HF_SUBMISSIONS_REPO,
+    LEADERBOARD_COLS,
     LEADERBOARD_DATATYPES,
     LEADERBOARD_HIDE_COLUMNS,
+    VALIDATED_LEADERBOARD_COLS,
     VALIDATED_LEADERBOARD_DATATYPES,
+    LeaderboardDataError,
     _fmt_timestamp,
     build_combined_csv,
     load_admin_table,
     )
+def _data_error_banner_md(message: str | None) -> str:
+    """Markdown for the top-of-tab data-unavailable banner.
+    Empty string when there's no error (the banner is also hidden via
+    ``visible=False`` in that case). When the live ``results.jsonl``
+    can't be read, the banner is the loud, persistent signal that the
+    tables below are empty *by design* (we never fall back to stale or
+    bundled data) rather than because the leaderboard is genuinely
+    empty.
+    """
+    if not message:
+        return ""
+    return (
+        "> ⚠️ **Leaderboard data unavailable.** The live results could not "
+        "be read from the Hub, so the tables below are empty. No stale or "
+        "cached data is ever shown in its place.\n>\n"
+        f"> Details: `{message}`"
+    )
+def _safe_load_split() -> tuple[pd.DataFrame, pd.DataFrame, str | None]:
+    """Load both tiers, turning a Hub failure into empty frames + a message.
+    The reader (:func:`load_leaderboard_split`) deliberately *raises*
+    on any read failure (no silent fallback). The Space, however, must
+    stay up and loudly surface the failure rather than crash, so this
+    wrapper converts :class:`LeaderboardDataError` into empty,
+    correctly-shaped DataFrames plus an error string the caller renders
+    in the banner / a toast. Returns ``(validated, unvalidated, error)``
+    with ``error`` ``None`` on success.
+    """
+    try:
+        validated, unvalidated = load_leaderboard_split()
+        return validated, unvalidated, None
+    except LeaderboardDataError as e:
+        logger.exception("Leaderboard data load failed")
+        return (
+            pd.DataFrame(columns=VALIDATED_LEADERBOARD_COLS),
+            pd.DataFrame(columns=LEADERBOARD_COLS),
+            str(e),
+        )
+def _safe_load_admin() -> tuple[pd.DataFrame, str | None]:
+    """Admin-table counterpart to :func:`_safe_load_split`.
+    Same no-crash contract: a Hub read failure yields an empty,
+    correctly-shaped admin frame plus the error string instead of
+    propagating the exception (which would take the whole Space down at
+    boot, since the admin table loads at module-construction time).
+    """
+    try:
+        return load_admin_table(), None
+    except LeaderboardDataError as e:
+        logger.exception("Admin table load failed")
+        return pd.DataFrame(columns=ADMIN_COLUMNS), str(e)
 def _refresh_leaderboard_with_toast():
+    """Manual Refresh button handler: toast + fresh DataFrames + banner.
+    Surfaces the outcome loudly either way: ``gr.Info`` on success,
+    ``gr.Warning`` when the live read failed. The third output keeps
+    the data-unavailable banner in sync (shown with the error,
+    cleared on success).
+    """
+    validated, unvalidated, error = _safe_load_split()
+    if error:
+        gr.Warning(f"Leaderboard data unavailable: {error}")
+    else:
+        gr.Info("Leaderboard refreshed.")
+    return (
+        validated,
+        unvalidated,
+        gr.Markdown(value=_data_error_banner_md(error), visible=error is not None),
+    )
+def _auto_refresh_leaderboard():
+    """Timer-tick handler: fresh DataFrames + banner, no success toast.
+    Mirrors :func:`_refresh_leaderboard_with_toast` but stays silent on
+    success (a toast every 10s would be noise). A read failure still
+    fires a loud ``gr.Warning`` and updates the banner so a degraded
+    Hub read can't quietly leave the tables blank.
     """
+    validated, unvalidated, error = _safe_load_split()
+    if error:
+        gr.Warning(f"Leaderboard data unavailable: {error}")
+    return (
+        validated,
+        unvalidated,
+        gr.Markdown(value=_data_error_banner_md(error), visible=error is not None),
+    )
 def _enable_submit_when_logged_in(
     return gr.Button(interactive=bool(confirm) and is_admin(profile))
+def _refresh_admin_table() -> pd.DataFrame:
+    """Admin Refresh button handler: reload the admin table, toast on failure.
+    Uses the no-crash :func:`_safe_load_admin` so a Hub read failure
+    surfaces as a loud ``gr.Warning`` plus an empty table rather than an
+    uncaught exception.
+    """
+    admin_df, error = _safe_load_admin()
+    if error:
+        gr.Warning(f"Admin table unavailable: {error}")
+    return admin_df
 def _admin_promote(
     table_df: pd.DataFrame | None,
     method: str | None,
     except (LookupError, ValueError) as e:
         raise gr.Error(str(e))
     gr.Info(f"Promoted {len(ids)} row(s) to validated ({method}).")
+    validated, unvalidated, _ = _safe_load_split()
+    admin_df, _ = _safe_load_admin()
+    return admin_df, validated, unvalidated
 def _admin_demote(
     except (LookupError, ValueError) as e:
         raise gr.Error(str(e))
     gr.Info(f"Demoted {len(ids)} row(s) to unvalidated.")
+    validated, unvalidated, _ = _safe_load_split()
+    admin_df, _ = _safe_load_admin()
+    return admin_df, validated, unvalidated
 def _admin_delete(
     except ValueError as e:
         raise gr.Error(str(e))
     gr.Info(f"Deleted {len(ids)} submission(s).")
+    validated, unvalidated, _ = _safe_load_split()
+    admin_df, _ = _safe_load_admin()
     return (
+        admin_df,
         validated,
         unvalidated,
         gr.Checkbox(value=False),
     )
     with gr.Tab("Leaderboard"):
+        # Load both tiers once at boot. `_safe_load_split` keeps a Hub
+        # read failure from crashing the Space: on failure the frames
+        # come up empty and `initial_error` carries the message the
+        # banner renders.
+        initial_validated, initial_unvalidated, initial_error = _safe_load_split()
+        # Loud, persistent banner shown only when the live results
+        # can't be read from the Hub (e.g. an under-scoped Space
+        # HF_TOKEN). Kept in sync by the refresh / Timer handlers. The
+        # leaderboard never falls back to stale/bundled data, so this
+        # banner is the signal that empty tables are a read failure,
+        # not a genuinely empty leaderboard.
+        data_error_banner = gr.Markdown(
+            value=_data_error_banner_md(initial_error),
+            visible=initial_error is not None,
+        )
         # Collapsed accordions above the tables. Validation guidelines
         # gives the short two-tier story + link to the full policy
         # doc; Citation carries the verbatim BibTeX entry. Both start
         # on top so the curated results are above the fold; unvalidated
         # below carries every other row (auto-published, awaiting
         # methodology review). See decisions/validation-policy.md.
+        # Initial values come from the boot-time `_safe_load_split`
+        # above (empty + banner on a Hub read failure).
         validated_view = Leaderboard(
             value=initial_validated,
             datatype=VALIDATED_LEADERBOARD_DATATYPES,
             )
         refresh_btn.click(
             fn=_refresh_leaderboard_with_toast,
+            outputs=[validated_view, unvalidated_view, data_error_banner],
         )
         download_btn.click(fn=build_combined_csv, outputs=download_btn)
         )
         # Only the leading `select` column is editable; the rest is
         # read-only context. Click-to-tick drives every action below.
+        # `_safe_load_admin` keeps a Hub read failure from crashing the
+        # Space at boot (the admin table loads at construction time).
+        initial_admin_table, _ = _safe_load_admin()
         admin_table = gr.Dataframe(
+            value=initial_admin_table,
             datatype=[
                 "bool", "str", "str", "str", "str", "str", "str", "number",
                 "str",
                 delete_confirm, delete_btn,
             ],
         )
+        admin_refresh_btn.click(fn=_refresh_admin_table, outputs=admin_table)
     # gradio_leaderboard.Leaderboard handles its own update path
     # cleanly; bind a Timer to push fresh dataframes every 10 seconds.
+    # Single tick runs `_auto_refresh_leaderboard` once and pushes the
+    # two halves into the validated / unvalidated widgets plus the
+    # data-unavailable banner. The handler swallows a Hub read failure
+    # into empty frames + a loud warning toast so a degraded read never
+    # crashes the tick loop or silently blanks the tables.
     auto_refresh_timer = gr.Timer(10)
     auto_refresh_timer.tick(
+        fn=_auto_refresh_leaderboard,
+        outputs=[validated_view, unvalidated_view, data_error_banner],
     )
     # On page load, read the visitor's OAuth profile (None if not

leaderboard.py CHANGED Viewed

@@ -14,10 +14,13 @@
 """Leaderboard read path.
-Loads `results.jsonl` from the submissions dataset on the Hub (or falls
-back to the local mirror on any Hub error) and shapes the rows into the
-dataframe shown on the Leaderboard tab. Module-level constants describe
-the env-var-driven repo identities that the submit path also consumes.
 """
 from __future__ import annotations
@@ -42,10 +45,19 @@ HF_SUBMISSIONS_REPO = os.getenv(
 )
 HF_DATA_REPO = os.getenv("HF_DATA_REPO", f"{HF_ORG}/cadgenbench-data")
-LOCAL_RESULTS_PATH = Path(__file__).parent / "results.jsonl"
 RESULTS_FILENAME = "results.jsonl"
 HUB_FETCH_TIMEOUT_SECONDS = 30
 # Columns visible in the rendered table, in left-to-right order, followed
 # by hidden-but-data-present columns the row-click detail panel pulls from.
 # Hidden columns ride along in the DataFrame so `Leaderboard.select(...)`
@@ -148,7 +160,7 @@ def _fmt_timestamp(ts) -> str:
     return s
-def _load_rows_from_hub() -> list[dict] | None:
     """Pull results.jsonl from the submissions dataset via raw HTTPS.
     Avoids :func:`huggingface_hub.hf_hub_download` because its layered
@@ -159,8 +171,12 @@ def _load_rows_from_hub() -> list[dict] | None:
     query param and ``Cache-Control: no-cache`` consistently sees the
     latest commit on the dataset's ``main`` branch within seconds.
-    Returns None on any failure so callers can fall back to the local
-    mirror.
     """
     url = (
         f"https://huggingface.co/datasets/{HF_SUBMISSIONS_REPO}"
@@ -178,22 +194,22 @@ def _load_rows_from_hub() -> list[dict] | None:
             timeout=HUB_FETCH_TIMEOUT_SECONDS,
         )
         r.raise_for_status()
         rows = [json.loads(line) for line in r.text.splitlines() if line.strip()]
-        logger.info("Loaded %d rows from Hub", len(rows))
-        return rows
-    except Exception as e:  # noqa: BLE001 - any failure should fall back
-        logger.warning("Hub fetch failed (%s: %s)", type(e).__name__, e)
-        return None
-def _load_rows_from_local() -> list[dict]:
-    if not LOCAL_RESULTS_PATH.exists():
-        return []
-    return [
-        json.loads(line)
-        for line in LOCAL_RESULTS_PATH.read_text().splitlines()
-        if line.strip()
-    ]
 def _fmt_pct(x: float | None, status: str) -> str:
@@ -318,9 +334,6 @@ def load_leaderboard_split() -> tuple[pd.DataFrame, pd.DataFrame]:
     on both tiers via :func:`_project_and_format`.
     """
     rows = _load_rows_from_hub()
-    if rows is None:
-        logger.warning("Hub read failed; falling back to local results.jsonl")
-        rows = _load_rows_from_local()
     if not rows:
         return (
             pd.DataFrame(columns=VALIDATED_LEADERBOARD_COLS),
@@ -455,10 +468,6 @@ def build_combined_csv() -> str:
     readers diffing the CSV against the UI see the same ordering.
     """
     rows = _load_rows_from_hub()
-    if rows is None:
-        logger.info("CSV build falling back to local results.jsonl")
-        rows = _load_rows_from_local()
-    rows = rows or []
     for row in rows:
         if row.get("status") is None:
             row["status"] = "completed"
@@ -512,10 +521,6 @@ def load_admin_table() -> pd.DataFrame:
     pre-schema-bump rows still show up and are actionable.
     """
     rows = _load_rows_from_hub()
-    if rows is None:
-        logger.info("Admin table build falling back to local results.jsonl")
-        rows = _load_rows_from_local()
-    rows = rows or []
     for row in rows:
         if row.get("status") is None:
             row["status"] = "completed"

 """Leaderboard read path.
+Loads `results.jsonl` from the submissions dataset on the Hub and
+shapes the rows into the dataframe shown on the Leaderboard tab. The
+live file is the single source of truth: there is **no fallback** to
+bundled/stale data, so any read failure raises
+:class:`LeaderboardDataError` rather than silently serving wrong rows.
+Module-level constants describe the env-var-driven repo identities
+that the submit path also consumes.
 """
 from __future__ import annotations
 )
 HF_DATA_REPO = os.getenv("HF_DATA_REPO", f"{HF_ORG}/cadgenbench-data")
 RESULTS_FILENAME = "results.jsonl"
 HUB_FETCH_TIMEOUT_SECONDS = 30
+class LeaderboardDataError(RuntimeError):
+    """Raised when the live ``results.jsonl`` cannot be read from the Hub.
+    The leaderboard has **no fallback**: rather than silently serving
+    stale or bundled data (which can make a broken Hub read, e.g. an
+    under-scoped Space ``HF_TOKEN``, look like an up-to-date but wrong
+    leaderboard), every read failure surfaces loudly here.
+    """
 # Columns visible in the rendered table, in left-to-right order, followed
 # by hidden-but-data-present columns the row-click detail panel pulls from.
 # Hidden columns ride along in the DataFrame so `Leaderboard.select(...)`
     return s
+def _load_rows_from_hub() -> list[dict]:
     """Pull results.jsonl from the submissions dataset via raw HTTPS.
     Avoids :func:`huggingface_hub.hf_hub_download` because its layered
     query param and ``Cache-Control: no-cache`` consistently sees the
     latest commit on the dataset's ``main`` branch within seconds.
+    The live ``results.jsonl`` is the single source of truth. Any
+    failure (network, auth, malformed JSON) raises
+    :class:`LeaderboardDataError`: there is deliberately **no fallback**
+    to bundled/stale data, so a broken read fails loudly instead of
+    silently serving wrong rows. An empty file is a valid result (an
+    empty leaderboard), not a failure.
     """
     url = (
         f"https://huggingface.co/datasets/{HF_SUBMISSIONS_REPO}"
             timeout=HUB_FETCH_TIMEOUT_SECONDS,
         )
         r.raise_for_status()
+    except Exception as e:
+        raise LeaderboardDataError(
+            f"Could not fetch {RESULTS_FILENAME} from {HF_SUBMISSIONS_REPO}: "
+            f"{type(e).__name__}: {e}. Verify the Space's HF_TOKEN has read "
+            f"access to the (private) submissions dataset. The leaderboard "
+            f"serves no fallback data."
+        ) from e
+    try:
         rows = [json.loads(line) for line in r.text.splitlines() if line.strip()]
+    except json.JSONDecodeError as e:
+        raise LeaderboardDataError(
+            f"Malformed {RESULTS_FILENAME} from {HF_SUBMISSIONS_REPO}: "
+            f"{type(e).__name__}: {e}."
+        ) from e
+    logger.info("Loaded %d rows from Hub", len(rows))
+    return rows
 def _fmt_pct(x: float | None, status: str) -> str:
     on both tiers via :func:`_project_and_format`.
     """
     rows = _load_rows_from_hub()
     if not rows:
         return (
             pd.DataFrame(columns=VALIDATED_LEADERBOARD_COLS),
     readers diffing the CSV against the UI see the same ordering.
     """
     rows = _load_rows_from_hub()
     for row in rows:
         if row.get("status") is None:
             row["status"] = "completed"
     pre-schema-bump rows still show up and are actionable.
     """
     rows = _load_rows_from_hub()
     for row in rows:
         if row.get("status") is None:
             row["status"] = "completed"

results.jsonl DELETED Viewed

File without changes

tests/test_leaderboard.py CHANGED Viewed

@@ -8,6 +8,8 @@ Tests stub the Hub fetcher via ``monkeypatch`` so no network I/O runs.
 """
 from __future__ import annotations
 import leaderboard
@@ -127,7 +129,6 @@ def test_field_passthrough(monkeypatch):
 def test_empty_input_returns_two_empty_frames(monkeypatch):
     """Empty input yields two empty DataFrames carrying the expected columns."""
     monkeypatch.setattr(leaderboard, "_load_rows_from_hub", lambda: [])
-    monkeypatch.setattr(leaderboard, "_load_rows_from_local", lambda: [])
     validated, unvalidated = leaderboard.load_leaderboard_split()
     assert validated.empty
     assert unvalidated.empty
@@ -135,6 +136,17 @@ def test_empty_input_returns_two_empty_frames(monkeypatch):
     assert list(unvalidated.columns) == leaderboard.LEADERBOARD_COLS
 def test_submission_name_is_plain_text(monkeypatch):
     """`submission_name` cells render as plain text on both tables.
@@ -216,7 +228,6 @@ def test_build_combined_csv_has_discriminator_and_both_tiers(monkeypatch, tmp_pa
 def test_build_combined_csv_handles_empty_input(monkeypatch):
     """Empty source rows -> empty CSV with the declared columns + header."""
     monkeypatch.setattr(leaderboard, "_load_rows_from_hub", lambda: [])
-    monkeypatch.setattr(leaderboard, "_load_rows_from_local", lambda: [])
     path = leaderboard.build_combined_csv()
     import pandas as pd
     df = pd.read_csv(path)
@@ -247,8 +258,6 @@ def test_fmt_timestamp_formats_iso_and_passes_through_garbage():
     Empty / None / NaN render as the empty string (the cell is
     rendered blank rather than as a literal placeholder).
     """
-    import math
     assert leaderboard._fmt_timestamp("2026-05-28T07:13:16Z") == "2026-05-28 07:13 UTC"
     assert leaderboard._fmt_timestamp(None) == ""
     assert leaderboard._fmt_timestamp("") == ""

 """
 from __future__ import annotations
+import pytest
 import leaderboard
 def test_empty_input_returns_two_empty_frames(monkeypatch):
     """Empty input yields two empty DataFrames carrying the expected columns."""
     monkeypatch.setattr(leaderboard, "_load_rows_from_hub", lambda: [])
     validated, unvalidated = leaderboard.load_leaderboard_split()
     assert validated.empty
     assert unvalidated.empty
     assert list(unvalidated.columns) == leaderboard.LEADERBOARD_COLS
+def test_hub_read_failure_raises_no_silent_fallback(monkeypatch):
+    """A failed Hub read surfaces loudly; the leaderboard never serves
+    bundled/stale fallback data in its place."""
+    def _boom():
+        raise leaderboard.LeaderboardDataError("simulated hub failure")
+    monkeypatch.setattr(leaderboard, "_load_rows_from_hub", _boom)
+    with pytest.raises(leaderboard.LeaderboardDataError):
+        leaderboard.load_leaderboard_split()
 def test_submission_name_is_plain_text(monkeypatch):
     """`submission_name` cells render as plain text on both tables.
 def test_build_combined_csv_handles_empty_input(monkeypatch):
     """Empty source rows -> empty CSV with the declared columns + header."""
     monkeypatch.setattr(leaderboard, "_load_rows_from_hub", lambda: [])
     path = leaderboard.build_combined_csv()
     import pandas as pd
     df = pd.read_csv(path)
     Empty / None / NaN render as the empty string (the cell is
     rendered blank rather than as a literal placeholder).
     """
     assert leaderboard._fmt_timestamp("2026-05-28T07:13:16Z") == "2026-05-28 07:13 UTC"
     assert leaderboard._fmt_timestamp(None) == ""
     assert leaderboard._fmt_timestamp("") == ""

tests/test_proxy.py CHANGED Viewed

@@ -24,6 +24,7 @@ import types
 import pandas as pd
 import app
 def test_serve_report_returns_html_when_file_exists(monkeypatch):
@@ -161,6 +162,87 @@ def test_iframe_viewer_returns_placeholder_on_null_event():
     assert iframe == ""
 def test_iframe_escape_is_attribute_safe(monkeypatch):
     """Quotes / ampersands inside the report HTML are escaped properly.

 import pandas as pd
 import app
+import leaderboard
 def test_serve_report_returns_html_when_file_exists(monkeypatch):
     assert iframe == ""
+# --- Boot resilience: no silent fallback, but no crash either -------
+#
+# leaderboard.load_leaderboard_split / load_admin_table *raise*
+# LeaderboardDataError on any Hub read failure (no fallback to stale
+# or bundled data). app.py must turn that into empty tables + a loud
+# banner / toast rather than crash at construction time (which would
+# take the whole Space down on an under-scoped HF_TOKEN).
+def test_safe_load_split_returns_empty_and_error_on_hub_failure(monkeypatch):
+    """A failed Hub read yields empty, correctly-shaped frames + a message."""
+    def boom():
+        raise leaderboard.LeaderboardDataError("simulated hub failure")
+    monkeypatch.setattr(app, "load_leaderboard_split", boom)
+    validated, unvalidated, error = app._safe_load_split()
+    assert error is not None
+    assert "simulated hub failure" in error
+    assert len(validated) == 0
+    assert len(unvalidated) == 0
+    # Empty frames keep the declared column shape so the widgets stay
+    # consistent with their datatypes.
+    assert list(validated.columns) == leaderboard.VALIDATED_LEADERBOARD_COLS
+    assert list(unvalidated.columns) == leaderboard.LEADERBOARD_COLS
+def test_safe_load_split_passes_through_on_success(monkeypatch):
+    """On success the wrapper returns the frames untouched with no error."""
+    v = pd.DataFrame(columns=leaderboard.VALIDATED_LEADERBOARD_COLS)
+    u = pd.DataFrame(columns=leaderboard.LEADERBOARD_COLS)
+    monkeypatch.setattr(app, "load_leaderboard_split", lambda: (v, u))
+    validated, unvalidated, error = app._safe_load_split()
+    assert error is None
+    assert validated is v
+    assert unvalidated is u
+def test_safe_load_admin_returns_empty_and_error_on_hub_failure(monkeypatch):
+    """Admin counterpart: empty admin frame + message, no exception."""
+    def boom():
+        raise leaderboard.LeaderboardDataError("simulated admin hub failure")
+    monkeypatch.setattr(app, "load_admin_table", boom)
+    admin_df, error = app._safe_load_admin()
+    assert error is not None
+    assert len(admin_df) == 0
+    assert list(admin_df.columns) == leaderboard.ADMIN_COLUMNS
+def test_data_error_banner_md_present_on_error_empty_otherwise():
+    """Banner markdown is non-empty (and names the cause) only on error."""
+    assert app._data_error_banner_md(None) == ""
+    assert app._data_error_banner_md("") == ""
+    banner = app._data_error_banner_md("boom: 403 Forbidden")
+    assert "boom: 403 Forbidden" in banner
+    assert "unavailable" in banner.lower()
+def test_refresh_handler_shows_banner_and_warns_on_error(monkeypatch):
+    """Manual refresh surfaces the failure loudly: visible banner + warning toast.
+    ``gr.Warning`` / ``gr.Info`` are stubbed so the test runs outside a
+    Gradio request context; the assertion is that a failure path fires
+    a warning (not an info) and flips the banner visible.
+    """
+    def boom():
+        raise leaderboard.LeaderboardDataError("simulated hub failure")
+    monkeypatch.setattr(app, "load_leaderboard_split", boom)
+    toasts = {"warning": 0, "info": 0}
+    monkeypatch.setattr(app.gr, "Warning", lambda *a, **k: toasts.__setitem__("warning", toasts["warning"] + 1))
+    monkeypatch.setattr(app.gr, "Info", lambda *a, **k: toasts.__setitem__("info", toasts["info"] + 1))
+    validated, unvalidated, banner = app._refresh_leaderboard_with_toast()
+    assert toasts["warning"] == 1
+    assert toasts["info"] == 0
+    assert len(validated) == 0 and len(unvalidated) == 0
+    # The banner output is a gr.Markdown update flipped visible.
+    assert getattr(banner, "visible", None) is True
 def test_iframe_escape_is_attribute_safe(monkeypatch):
     """Quotes / ampersands inside the report HTML are escaped properly.