Michael Rabinovich commited on
Commit
a662bfa
·
1 Parent(s): c78e980

leaderboard: drop silent fallback; boot resilient on Hub read failure

Browse files

The leaderboard previously fell back to a bundled/local results.jsonl on
any Hub error, which let an under-scoped Space HF_TOKEN silently serve
stale baked-in rows that looked up-to-date. Remove the fallback: the live
results.jsonl is the single source of truth and any read failure raises
LeaderboardDataError.

To stay robust rather than crash the Space at construction time, app.py
wraps the readers (_safe_load_split / _safe_load_admin): a failed read
yields empty, correctly-shaped tables plus a loud, persistent banner and
a gr.Warning toast on refresh / Timer tick, and is logged via
logger.exception. No stale or cached data is ever shown in place of a
failed read.

Also git rm the bundled results.jsonl and add regression tests for the
no-fallback contract and the boot-resilience wrappers.

Files changed (5) hide show
  1. app.py +148 -20
  2. leaderboard.py +39 -34
  3. results.jsonl +0 -0
  4. tests/test_leaderboard.py +13 -4
  5. tests/test_proxy.py +82 -0
app.py CHANGED
@@ -43,9 +43,12 @@ from leaderboard import (
43
  ADMIN_SELECT_COL,
44
  HF_DATA_REPO,
45
  HF_SUBMISSIONS_REPO,
 
46
  LEADERBOARD_DATATYPES,
47
  LEADERBOARD_HIDE_COLUMNS,
 
48
  VALIDATED_LEADERBOARD_DATATYPES,
 
49
  _fmt_timestamp,
50
  build_combined_csv,
51
  load_admin_table,
@@ -154,15 +157,100 @@ def _build_report_iframe(html_bytes: bytes) -> str:
154
  )
155
 
156
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  def _refresh_leaderboard_with_toast():
158
- """Manual Refresh button handler: toast + fresh DataFrames.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
- The Timer auto-refresh wires straight to ``load_leaderboard_split``
161
- so it stays silent (a toast every 10s would be noise). Only the
162
- explicit click goes through this wrapper.
 
 
 
 
163
  """
164
- gr.Info("Leaderboard refreshed.")
165
- return load_leaderboard_split()
 
 
 
 
 
 
166
 
167
 
168
  def _enable_submit_when_logged_in(
@@ -238,6 +326,19 @@ def _arm_delete(
238
  return gr.Button(interactive=bool(confirm) and is_admin(profile))
239
 
240
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  def _admin_promote(
242
  table_df: pd.DataFrame | None,
243
  method: str | None,
@@ -260,8 +361,9 @@ def _admin_promote(
260
  except (LookupError, ValueError) as e:
261
  raise gr.Error(str(e))
262
  gr.Info(f"Promoted {len(ids)} row(s) to validated ({method}).")
263
- validated, unvalidated = load_leaderboard_split()
264
- return load_admin_table(), validated, unvalidated
 
265
 
266
 
267
  def _admin_demote(
@@ -279,8 +381,9 @@ def _admin_demote(
279
  except (LookupError, ValueError) as e:
280
  raise gr.Error(str(e))
281
  gr.Info(f"Demoted {len(ids)} row(s) to unvalidated.")
282
- validated, unvalidated = load_leaderboard_split()
283
- return load_admin_table(), validated, unvalidated
 
284
 
285
 
286
  def _admin_delete(
@@ -305,9 +408,10 @@ def _admin_delete(
305
  except ValueError as e:
306
  raise gr.Error(str(e))
307
  gr.Info(f"Deleted {len(ids)} submission(s).")
308
- validated, unvalidated = load_leaderboard_split()
 
309
  return (
310
- load_admin_table(),
311
  validated,
312
  unvalidated,
313
  gr.Checkbox(value=False),
@@ -416,6 +520,23 @@ with gr.Blocks(title="CADGenBench Leaderboard", theme=gr.themes.Soft()) as block
416
  )
417
 
418
  with gr.Tab("Leaderboard"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
419
  # Collapsed accordions above the tables. Validation guidelines
420
  # gives the short two-tier story + link to the full policy
421
  # doc; Citation carries the verbatim BibTeX entry. Both start
@@ -436,7 +557,8 @@ with gr.Blocks(title="CADGenBench Leaderboard", theme=gr.themes.Soft()) as block
436
  # on top so the curated results are above the fold; unvalidated
437
  # below carries every other row (auto-published, awaiting
438
  # methodology review). See decisions/validation-policy.md.
439
- initial_validated, initial_unvalidated = load_leaderboard_split()
 
440
  validated_view = Leaderboard(
441
  value=initial_validated,
442
  datatype=VALIDATED_LEADERBOARD_DATATYPES,
@@ -464,7 +586,7 @@ with gr.Blocks(title="CADGenBench Leaderboard", theme=gr.themes.Soft()) as block
464
  )
465
  refresh_btn.click(
466
  fn=_refresh_leaderboard_with_toast,
467
- outputs=[validated_view, unvalidated_view],
468
  )
469
  download_btn.click(fn=build_combined_csv, outputs=download_btn)
470
 
@@ -558,8 +680,11 @@ to publish the resulting row on the public leaderboard.
558
  )
559
  # Only the leading `select` column is editable; the rest is
560
  # read-only context. Click-to-tick drives every action below.
 
 
 
561
  admin_table = gr.Dataframe(
562
- value=load_admin_table(),
563
  datatype=[
564
  "bool", "str", "str", "str", "str", "str", "str", "number",
565
  "str",
@@ -626,16 +751,19 @@ to publish the resulting row on the public leaderboard.
626
  delete_confirm, delete_btn,
627
  ],
628
  )
629
- admin_refresh_btn.click(fn=load_admin_table, outputs=admin_table)
630
 
631
  # gradio_leaderboard.Leaderboard handles its own update path
632
  # cleanly; bind a Timer to push fresh dataframes every 10 seconds.
633
- # Single tick runs `load_leaderboard_split` once and pushes the
634
- # tuple's two halves into the validated / unvalidated widgets.
 
 
 
635
  auto_refresh_timer = gr.Timer(10)
636
  auto_refresh_timer.tick(
637
- fn=load_leaderboard_split,
638
- outputs=[validated_view, unvalidated_view],
639
  )
640
 
641
  # On page load, read the visitor's OAuth profile (None if not
 
43
  ADMIN_SELECT_COL,
44
  HF_DATA_REPO,
45
  HF_SUBMISSIONS_REPO,
46
+ LEADERBOARD_COLS,
47
  LEADERBOARD_DATATYPES,
48
  LEADERBOARD_HIDE_COLUMNS,
49
+ VALIDATED_LEADERBOARD_COLS,
50
  VALIDATED_LEADERBOARD_DATATYPES,
51
+ LeaderboardDataError,
52
  _fmt_timestamp,
53
  build_combined_csv,
54
  load_admin_table,
 
157
  )
158
 
159
 
160
+ def _data_error_banner_md(message: str | None) -> str:
161
+ """Markdown for the top-of-tab data-unavailable banner.
162
+
163
+ Empty string when there's no error (the banner is also hidden via
164
+ ``visible=False`` in that case). When the live ``results.jsonl``
165
+ can't be read, the banner is the loud, persistent signal that the
166
+ tables below are empty *by design* (we never fall back to stale or
167
+ bundled data) rather than because the leaderboard is genuinely
168
+ empty.
169
+ """
170
+ if not message:
171
+ return ""
172
+ return (
173
+ "> ⚠️ **Leaderboard data unavailable.** The live results could not "
174
+ "be read from the Hub, so the tables below are empty. No stale or "
175
+ "cached data is ever shown in its place.\n>\n"
176
+ f"> Details: `{message}`"
177
+ )
178
+
179
+
180
+ def _safe_load_split() -> tuple[pd.DataFrame, pd.DataFrame, str | None]:
181
+ """Load both tiers, turning a Hub failure into empty frames + a message.
182
+
183
+ The reader (:func:`load_leaderboard_split`) deliberately *raises*
184
+ on any read failure (no silent fallback). The Space, however, must
185
+ stay up and loudly surface the failure rather than crash, so this
186
+ wrapper converts :class:`LeaderboardDataError` into empty,
187
+ correctly-shaped DataFrames plus an error string the caller renders
188
+ in the banner / a toast. Returns ``(validated, unvalidated, error)``
189
+ with ``error`` ``None`` on success.
190
+ """
191
+ try:
192
+ validated, unvalidated = load_leaderboard_split()
193
+ return validated, unvalidated, None
194
+ except LeaderboardDataError as e:
195
+ logger.exception("Leaderboard data load failed")
196
+ return (
197
+ pd.DataFrame(columns=VALIDATED_LEADERBOARD_COLS),
198
+ pd.DataFrame(columns=LEADERBOARD_COLS),
199
+ str(e),
200
+ )
201
+
202
+
203
+ def _safe_load_admin() -> tuple[pd.DataFrame, str | None]:
204
+ """Admin-table counterpart to :func:`_safe_load_split`.
205
+
206
+ Same no-crash contract: a Hub read failure yields an empty,
207
+ correctly-shaped admin frame plus the error string instead of
208
+ propagating the exception (which would take the whole Space down at
209
+ boot, since the admin table loads at module-construction time).
210
+ """
211
+ try:
212
+ return load_admin_table(), None
213
+ except LeaderboardDataError as e:
214
+ logger.exception("Admin table load failed")
215
+ return pd.DataFrame(columns=ADMIN_COLUMNS), str(e)
216
+
217
+
218
  def _refresh_leaderboard_with_toast():
219
+ """Manual Refresh button handler: toast + fresh DataFrames + banner.
220
+
221
+ Surfaces the outcome loudly either way: ``gr.Info`` on success,
222
+ ``gr.Warning`` when the live read failed. The third output keeps
223
+ the data-unavailable banner in sync (shown with the error,
224
+ cleared on success).
225
+ """
226
+ validated, unvalidated, error = _safe_load_split()
227
+ if error:
228
+ gr.Warning(f"Leaderboard data unavailable: {error}")
229
+ else:
230
+ gr.Info("Leaderboard refreshed.")
231
+ return (
232
+ validated,
233
+ unvalidated,
234
+ gr.Markdown(value=_data_error_banner_md(error), visible=error is not None),
235
+ )
236
+
237
 
238
+ def _auto_refresh_leaderboard():
239
+ """Timer-tick handler: fresh DataFrames + banner, no success toast.
240
+
241
+ Mirrors :func:`_refresh_leaderboard_with_toast` but stays silent on
242
+ success (a toast every 10s would be noise). A read failure still
243
+ fires a loud ``gr.Warning`` and updates the banner so a degraded
244
+ Hub read can't quietly leave the tables blank.
245
  """
246
+ validated, unvalidated, error = _safe_load_split()
247
+ if error:
248
+ gr.Warning(f"Leaderboard data unavailable: {error}")
249
+ return (
250
+ validated,
251
+ unvalidated,
252
+ gr.Markdown(value=_data_error_banner_md(error), visible=error is not None),
253
+ )
254
 
255
 
256
  def _enable_submit_when_logged_in(
 
326
  return gr.Button(interactive=bool(confirm) and is_admin(profile))
327
 
328
 
329
+ def _refresh_admin_table() -> pd.DataFrame:
330
+ """Admin Refresh button handler: reload the admin table, toast on failure.
331
+
332
+ Uses the no-crash :func:`_safe_load_admin` so a Hub read failure
333
+ surfaces as a loud ``gr.Warning`` plus an empty table rather than an
334
+ uncaught exception.
335
+ """
336
+ admin_df, error = _safe_load_admin()
337
+ if error:
338
+ gr.Warning(f"Admin table unavailable: {error}")
339
+ return admin_df
340
+
341
+
342
  def _admin_promote(
343
  table_df: pd.DataFrame | None,
344
  method: str | None,
 
361
  except (LookupError, ValueError) as e:
362
  raise gr.Error(str(e))
363
  gr.Info(f"Promoted {len(ids)} row(s) to validated ({method}).")
364
+ validated, unvalidated, _ = _safe_load_split()
365
+ admin_df, _ = _safe_load_admin()
366
+ return admin_df, validated, unvalidated
367
 
368
 
369
  def _admin_demote(
 
381
  except (LookupError, ValueError) as e:
382
  raise gr.Error(str(e))
383
  gr.Info(f"Demoted {len(ids)} row(s) to unvalidated.")
384
+ validated, unvalidated, _ = _safe_load_split()
385
+ admin_df, _ = _safe_load_admin()
386
+ return admin_df, validated, unvalidated
387
 
388
 
389
  def _admin_delete(
 
408
  except ValueError as e:
409
  raise gr.Error(str(e))
410
  gr.Info(f"Deleted {len(ids)} submission(s).")
411
+ validated, unvalidated, _ = _safe_load_split()
412
+ admin_df, _ = _safe_load_admin()
413
  return (
414
+ admin_df,
415
  validated,
416
  unvalidated,
417
  gr.Checkbox(value=False),
 
520
  )
521
 
522
  with gr.Tab("Leaderboard"):
523
+ # Load both tiers once at boot. `_safe_load_split` keeps a Hub
524
+ # read failure from crashing the Space: on failure the frames
525
+ # come up empty and `initial_error` carries the message the
526
+ # banner renders.
527
+ initial_validated, initial_unvalidated, initial_error = _safe_load_split()
528
+
529
+ # Loud, persistent banner shown only when the live results
530
+ # can't be read from the Hub (e.g. an under-scoped Space
531
+ # HF_TOKEN). Kept in sync by the refresh / Timer handlers. The
532
+ # leaderboard never falls back to stale/bundled data, so this
533
+ # banner is the signal that empty tables are a read failure,
534
+ # not a genuinely empty leaderboard.
535
+ data_error_banner = gr.Markdown(
536
+ value=_data_error_banner_md(initial_error),
537
+ visible=initial_error is not None,
538
+ )
539
+
540
  # Collapsed accordions above the tables. Validation guidelines
541
  # gives the short two-tier story + link to the full policy
542
  # doc; Citation carries the verbatim BibTeX entry. Both start
 
557
  # on top so the curated results are above the fold; unvalidated
558
  # below carries every other row (auto-published, awaiting
559
  # methodology review). See decisions/validation-policy.md.
560
+ # Initial values come from the boot-time `_safe_load_split`
561
+ # above (empty + banner on a Hub read failure).
562
  validated_view = Leaderboard(
563
  value=initial_validated,
564
  datatype=VALIDATED_LEADERBOARD_DATATYPES,
 
586
  )
587
  refresh_btn.click(
588
  fn=_refresh_leaderboard_with_toast,
589
+ outputs=[validated_view, unvalidated_view, data_error_banner],
590
  )
591
  download_btn.click(fn=build_combined_csv, outputs=download_btn)
592
 
 
680
  )
681
  # Only the leading `select` column is editable; the rest is
682
  # read-only context. Click-to-tick drives every action below.
683
+ # `_safe_load_admin` keeps a Hub read failure from crashing the
684
+ # Space at boot (the admin table loads at construction time).
685
+ initial_admin_table, _ = _safe_load_admin()
686
  admin_table = gr.Dataframe(
687
+ value=initial_admin_table,
688
  datatype=[
689
  "bool", "str", "str", "str", "str", "str", "str", "number",
690
  "str",
 
751
  delete_confirm, delete_btn,
752
  ],
753
  )
754
+ admin_refresh_btn.click(fn=_refresh_admin_table, outputs=admin_table)
755
 
756
  # gradio_leaderboard.Leaderboard handles its own update path
757
  # cleanly; bind a Timer to push fresh dataframes every 10 seconds.
758
+ # Single tick runs `_auto_refresh_leaderboard` once and pushes the
759
+ # two halves into the validated / unvalidated widgets plus the
760
+ # data-unavailable banner. The handler swallows a Hub read failure
761
+ # into empty frames + a loud warning toast so a degraded read never
762
+ # crashes the tick loop or silently blanks the tables.
763
  auto_refresh_timer = gr.Timer(10)
764
  auto_refresh_timer.tick(
765
+ fn=_auto_refresh_leaderboard,
766
+ outputs=[validated_view, unvalidated_view, data_error_banner],
767
  )
768
 
769
  # On page load, read the visitor's OAuth profile (None if not
leaderboard.py CHANGED
@@ -14,10 +14,13 @@
14
 
15
  """Leaderboard read path.
16
 
17
- Loads `results.jsonl` from the submissions dataset on the Hub (or falls
18
- back to the local mirror on any Hub error) and shapes the rows into the
19
- dataframe shown on the Leaderboard tab. Module-level constants describe
20
- the env-var-driven repo identities that the submit path also consumes.
 
 
 
21
  """
22
  from __future__ import annotations
23
 
@@ -42,10 +45,19 @@ HF_SUBMISSIONS_REPO = os.getenv(
42
  )
43
  HF_DATA_REPO = os.getenv("HF_DATA_REPO", f"{HF_ORG}/cadgenbench-data")
44
 
45
- LOCAL_RESULTS_PATH = Path(__file__).parent / "results.jsonl"
46
  RESULTS_FILENAME = "results.jsonl"
47
  HUB_FETCH_TIMEOUT_SECONDS = 30
48
 
 
 
 
 
 
 
 
 
 
 
49
  # Columns visible in the rendered table, in left-to-right order, followed
50
  # by hidden-but-data-present columns the row-click detail panel pulls from.
51
  # Hidden columns ride along in the DataFrame so `Leaderboard.select(...)`
@@ -148,7 +160,7 @@ def _fmt_timestamp(ts) -> str:
148
  return s
149
 
150
 
151
- def _load_rows_from_hub() -> list[dict] | None:
152
  """Pull results.jsonl from the submissions dataset via raw HTTPS.
153
 
154
  Avoids :func:`huggingface_hub.hf_hub_download` because its layered
@@ -159,8 +171,12 @@ def _load_rows_from_hub() -> list[dict] | None:
159
  query param and ``Cache-Control: no-cache`` consistently sees the
160
  latest commit on the dataset's ``main`` branch within seconds.
161
 
162
- Returns None on any failure so callers can fall back to the local
163
- mirror.
 
 
 
 
164
  """
165
  url = (
166
  f"https://huggingface.co/datasets/{HF_SUBMISSIONS_REPO}"
@@ -178,22 +194,22 @@ def _load_rows_from_hub() -> list[dict] | None:
178
  timeout=HUB_FETCH_TIMEOUT_SECONDS,
179
  )
180
  r.raise_for_status()
 
 
 
 
 
 
 
 
181
  rows = [json.loads(line) for line in r.text.splitlines() if line.strip()]
182
- logger.info("Loaded %d rows from Hub", len(rows))
183
- return rows
184
- except Exception as e: # noqa: BLE001 - any failure should fall back
185
- logger.warning("Hub fetch failed (%s: %s)", type(e).__name__, e)
186
- return None
187
-
188
-
189
- def _load_rows_from_local() -> list[dict]:
190
- if not LOCAL_RESULTS_PATH.exists():
191
- return []
192
- return [
193
- json.loads(line)
194
- for line in LOCAL_RESULTS_PATH.read_text().splitlines()
195
- if line.strip()
196
- ]
197
 
198
 
199
  def _fmt_pct(x: float | None, status: str) -> str:
@@ -318,9 +334,6 @@ def load_leaderboard_split() -> tuple[pd.DataFrame, pd.DataFrame]:
318
  on both tiers via :func:`_project_and_format`.
319
  """
320
  rows = _load_rows_from_hub()
321
- if rows is None:
322
- logger.warning("Hub read failed; falling back to local results.jsonl")
323
- rows = _load_rows_from_local()
324
  if not rows:
325
  return (
326
  pd.DataFrame(columns=VALIDATED_LEADERBOARD_COLS),
@@ -455,10 +468,6 @@ def build_combined_csv() -> str:
455
  readers diffing the CSV against the UI see the same ordering.
456
  """
457
  rows = _load_rows_from_hub()
458
- if rows is None:
459
- logger.info("CSV build falling back to local results.jsonl")
460
- rows = _load_rows_from_local()
461
- rows = rows or []
462
  for row in rows:
463
  if row.get("status") is None:
464
  row["status"] = "completed"
@@ -512,10 +521,6 @@ def load_admin_table() -> pd.DataFrame:
512
  pre-schema-bump rows still show up and are actionable.
513
  """
514
  rows = _load_rows_from_hub()
515
- if rows is None:
516
- logger.info("Admin table build falling back to local results.jsonl")
517
- rows = _load_rows_from_local()
518
- rows = rows or []
519
  for row in rows:
520
  if row.get("status") is None:
521
  row["status"] = "completed"
 
14
 
15
  """Leaderboard read path.
16
 
17
+ Loads `results.jsonl` from the submissions dataset on the Hub and
18
+ shapes the rows into the dataframe shown on the Leaderboard tab. The
19
+ live file is the single source of truth: there is **no fallback** to
20
+ bundled/stale data, so any read failure raises
21
+ :class:`LeaderboardDataError` rather than silently serving wrong rows.
22
+ Module-level constants describe the env-var-driven repo identities
23
+ that the submit path also consumes.
24
  """
25
  from __future__ import annotations
26
 
 
45
  )
46
  HF_DATA_REPO = os.getenv("HF_DATA_REPO", f"{HF_ORG}/cadgenbench-data")
47
 
 
48
  RESULTS_FILENAME = "results.jsonl"
49
  HUB_FETCH_TIMEOUT_SECONDS = 30
50
 
51
+
52
+ class LeaderboardDataError(RuntimeError):
53
+ """Raised when the live ``results.jsonl`` cannot be read from the Hub.
54
+
55
+ The leaderboard has **no fallback**: rather than silently serving
56
+ stale or bundled data (which can make a broken Hub read, e.g. an
57
+ under-scoped Space ``HF_TOKEN``, look like an up-to-date but wrong
58
+ leaderboard), every read failure surfaces loudly here.
59
+ """
60
+
61
  # Columns visible in the rendered table, in left-to-right order, followed
62
  # by hidden-but-data-present columns the row-click detail panel pulls from.
63
  # Hidden columns ride along in the DataFrame so `Leaderboard.select(...)`
 
160
  return s
161
 
162
 
163
+ def _load_rows_from_hub() -> list[dict]:
164
  """Pull results.jsonl from the submissions dataset via raw HTTPS.
165
 
166
  Avoids :func:`huggingface_hub.hf_hub_download` because its layered
 
171
  query param and ``Cache-Control: no-cache`` consistently sees the
172
  latest commit on the dataset's ``main`` branch within seconds.
173
 
174
+ The live ``results.jsonl`` is the single source of truth. Any
175
+ failure (network, auth, malformed JSON) raises
176
+ :class:`LeaderboardDataError`: there is deliberately **no fallback**
177
+ to bundled/stale data, so a broken read fails loudly instead of
178
+ silently serving wrong rows. An empty file is a valid result (an
179
+ empty leaderboard), not a failure.
180
  """
181
  url = (
182
  f"https://huggingface.co/datasets/{HF_SUBMISSIONS_REPO}"
 
194
  timeout=HUB_FETCH_TIMEOUT_SECONDS,
195
  )
196
  r.raise_for_status()
197
+ except Exception as e:
198
+ raise LeaderboardDataError(
199
+ f"Could not fetch {RESULTS_FILENAME} from {HF_SUBMISSIONS_REPO}: "
200
+ f"{type(e).__name__}: {e}. Verify the Space's HF_TOKEN has read "
201
+ f"access to the (private) submissions dataset. The leaderboard "
202
+ f"serves no fallback data."
203
+ ) from e
204
+ try:
205
  rows = [json.loads(line) for line in r.text.splitlines() if line.strip()]
206
+ except json.JSONDecodeError as e:
207
+ raise LeaderboardDataError(
208
+ f"Malformed {RESULTS_FILENAME} from {HF_SUBMISSIONS_REPO}: "
209
+ f"{type(e).__name__}: {e}."
210
+ ) from e
211
+ logger.info("Loaded %d rows from Hub", len(rows))
212
+ return rows
 
 
 
 
 
 
 
 
213
 
214
 
215
  def _fmt_pct(x: float | None, status: str) -> str:
 
334
  on both tiers via :func:`_project_and_format`.
335
  """
336
  rows = _load_rows_from_hub()
 
 
 
337
  if not rows:
338
  return (
339
  pd.DataFrame(columns=VALIDATED_LEADERBOARD_COLS),
 
468
  readers diffing the CSV against the UI see the same ordering.
469
  """
470
  rows = _load_rows_from_hub()
 
 
 
 
471
  for row in rows:
472
  if row.get("status") is None:
473
  row["status"] = "completed"
 
521
  pre-schema-bump rows still show up and are actionable.
522
  """
523
  rows = _load_rows_from_hub()
 
 
 
 
524
  for row in rows:
525
  if row.get("status") is None:
526
  row["status"] = "completed"
results.jsonl DELETED
File without changes
tests/test_leaderboard.py CHANGED
@@ -8,6 +8,8 @@ Tests stub the Hub fetcher via ``monkeypatch`` so no network I/O runs.
8
  """
9
  from __future__ import annotations
10
 
 
 
11
  import leaderboard
12
 
13
 
@@ -127,7 +129,6 @@ def test_field_passthrough(monkeypatch):
127
  def test_empty_input_returns_two_empty_frames(monkeypatch):
128
  """Empty input yields two empty DataFrames carrying the expected columns."""
129
  monkeypatch.setattr(leaderboard, "_load_rows_from_hub", lambda: [])
130
- monkeypatch.setattr(leaderboard, "_load_rows_from_local", lambda: [])
131
  validated, unvalidated = leaderboard.load_leaderboard_split()
132
  assert validated.empty
133
  assert unvalidated.empty
@@ -135,6 +136,17 @@ def test_empty_input_returns_two_empty_frames(monkeypatch):
135
  assert list(unvalidated.columns) == leaderboard.LEADERBOARD_COLS
136
 
137
 
 
 
 
 
 
 
 
 
 
 
 
138
  def test_submission_name_is_plain_text(monkeypatch):
139
  """`submission_name` cells render as plain text on both tables.
140
 
@@ -216,7 +228,6 @@ def test_build_combined_csv_has_discriminator_and_both_tiers(monkeypatch, tmp_pa
216
  def test_build_combined_csv_handles_empty_input(monkeypatch):
217
  """Empty source rows -> empty CSV with the declared columns + header."""
218
  monkeypatch.setattr(leaderboard, "_load_rows_from_hub", lambda: [])
219
- monkeypatch.setattr(leaderboard, "_load_rows_from_local", lambda: [])
220
  path = leaderboard.build_combined_csv()
221
  import pandas as pd
222
  df = pd.read_csv(path)
@@ -247,8 +258,6 @@ def test_fmt_timestamp_formats_iso_and_passes_through_garbage():
247
  Empty / None / NaN render as the empty string (the cell is
248
  rendered blank rather than as a literal placeholder).
249
  """
250
- import math
251
-
252
  assert leaderboard._fmt_timestamp("2026-05-28T07:13:16Z") == "2026-05-28 07:13 UTC"
253
  assert leaderboard._fmt_timestamp(None) == ""
254
  assert leaderboard._fmt_timestamp("") == ""
 
8
  """
9
  from __future__ import annotations
10
 
11
+ import pytest
12
+
13
  import leaderboard
14
 
15
 
 
129
  def test_empty_input_returns_two_empty_frames(monkeypatch):
130
  """Empty input yields two empty DataFrames carrying the expected columns."""
131
  monkeypatch.setattr(leaderboard, "_load_rows_from_hub", lambda: [])
 
132
  validated, unvalidated = leaderboard.load_leaderboard_split()
133
  assert validated.empty
134
  assert unvalidated.empty
 
136
  assert list(unvalidated.columns) == leaderboard.LEADERBOARD_COLS
137
 
138
 
139
+ def test_hub_read_failure_raises_no_silent_fallback(monkeypatch):
140
+ """A failed Hub read surfaces loudly; the leaderboard never serves
141
+ bundled/stale fallback data in its place."""
142
+ def _boom():
143
+ raise leaderboard.LeaderboardDataError("simulated hub failure")
144
+
145
+ monkeypatch.setattr(leaderboard, "_load_rows_from_hub", _boom)
146
+ with pytest.raises(leaderboard.LeaderboardDataError):
147
+ leaderboard.load_leaderboard_split()
148
+
149
+
150
  def test_submission_name_is_plain_text(monkeypatch):
151
  """`submission_name` cells render as plain text on both tables.
152
 
 
228
  def test_build_combined_csv_handles_empty_input(monkeypatch):
229
  """Empty source rows -> empty CSV with the declared columns + header."""
230
  monkeypatch.setattr(leaderboard, "_load_rows_from_hub", lambda: [])
 
231
  path = leaderboard.build_combined_csv()
232
  import pandas as pd
233
  df = pd.read_csv(path)
 
258
  Empty / None / NaN render as the empty string (the cell is
259
  rendered blank rather than as a literal placeholder).
260
  """
 
 
261
  assert leaderboard._fmt_timestamp("2026-05-28T07:13:16Z") == "2026-05-28 07:13 UTC"
262
  assert leaderboard._fmt_timestamp(None) == ""
263
  assert leaderboard._fmt_timestamp("") == ""
tests/test_proxy.py CHANGED
@@ -24,6 +24,7 @@ import types
24
  import pandas as pd
25
 
26
  import app
 
27
 
28
 
29
  def test_serve_report_returns_html_when_file_exists(monkeypatch):
@@ -161,6 +162,87 @@ def test_iframe_viewer_returns_placeholder_on_null_event():
161
  assert iframe == ""
162
 
163
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  def test_iframe_escape_is_attribute_safe(monkeypatch):
165
  """Quotes / ampersands inside the report HTML are escaped properly.
166
 
 
24
  import pandas as pd
25
 
26
  import app
27
+ import leaderboard
28
 
29
 
30
  def test_serve_report_returns_html_when_file_exists(monkeypatch):
 
162
  assert iframe == ""
163
 
164
 
165
+ # --- Boot resilience: no silent fallback, but no crash either -------
166
+ #
167
+ # leaderboard.load_leaderboard_split / load_admin_table *raise*
168
+ # LeaderboardDataError on any Hub read failure (no fallback to stale
169
+ # or bundled data). app.py must turn that into empty tables + a loud
170
+ # banner / toast rather than crash at construction time (which would
171
+ # take the whole Space down on an under-scoped HF_TOKEN).
172
+
173
+
174
+ def test_safe_load_split_returns_empty_and_error_on_hub_failure(monkeypatch):
175
+ """A failed Hub read yields empty, correctly-shaped frames + a message."""
176
+ def boom():
177
+ raise leaderboard.LeaderboardDataError("simulated hub failure")
178
+
179
+ monkeypatch.setattr(app, "load_leaderboard_split", boom)
180
+ validated, unvalidated, error = app._safe_load_split()
181
+ assert error is not None
182
+ assert "simulated hub failure" in error
183
+ assert len(validated) == 0
184
+ assert len(unvalidated) == 0
185
+ # Empty frames keep the declared column shape so the widgets stay
186
+ # consistent with their datatypes.
187
+ assert list(validated.columns) == leaderboard.VALIDATED_LEADERBOARD_COLS
188
+ assert list(unvalidated.columns) == leaderboard.LEADERBOARD_COLS
189
+
190
+
191
+ def test_safe_load_split_passes_through_on_success(monkeypatch):
192
+ """On success the wrapper returns the frames untouched with no error."""
193
+ v = pd.DataFrame(columns=leaderboard.VALIDATED_LEADERBOARD_COLS)
194
+ u = pd.DataFrame(columns=leaderboard.LEADERBOARD_COLS)
195
+ monkeypatch.setattr(app, "load_leaderboard_split", lambda: (v, u))
196
+ validated, unvalidated, error = app._safe_load_split()
197
+ assert error is None
198
+ assert validated is v
199
+ assert unvalidated is u
200
+
201
+
202
+ def test_safe_load_admin_returns_empty_and_error_on_hub_failure(monkeypatch):
203
+ """Admin counterpart: empty admin frame + message, no exception."""
204
+ def boom():
205
+ raise leaderboard.LeaderboardDataError("simulated admin hub failure")
206
+
207
+ monkeypatch.setattr(app, "load_admin_table", boom)
208
+ admin_df, error = app._safe_load_admin()
209
+ assert error is not None
210
+ assert len(admin_df) == 0
211
+ assert list(admin_df.columns) == leaderboard.ADMIN_COLUMNS
212
+
213
+
214
+ def test_data_error_banner_md_present_on_error_empty_otherwise():
215
+ """Banner markdown is non-empty (and names the cause) only on error."""
216
+ assert app._data_error_banner_md(None) == ""
217
+ assert app._data_error_banner_md("") == ""
218
+ banner = app._data_error_banner_md("boom: 403 Forbidden")
219
+ assert "boom: 403 Forbidden" in banner
220
+ assert "unavailable" in banner.lower()
221
+
222
+
223
+ def test_refresh_handler_shows_banner_and_warns_on_error(monkeypatch):
224
+ """Manual refresh surfaces the failure loudly: visible banner + warning toast.
225
+
226
+ ``gr.Warning`` / ``gr.Info`` are stubbed so the test runs outside a
227
+ Gradio request context; the assertion is that a failure path fires
228
+ a warning (not an info) and flips the banner visible.
229
+ """
230
+ def boom():
231
+ raise leaderboard.LeaderboardDataError("simulated hub failure")
232
+
233
+ monkeypatch.setattr(app, "load_leaderboard_split", boom)
234
+ toasts = {"warning": 0, "info": 0}
235
+ monkeypatch.setattr(app.gr, "Warning", lambda *a, **k: toasts.__setitem__("warning", toasts["warning"] + 1))
236
+ monkeypatch.setattr(app.gr, "Info", lambda *a, **k: toasts.__setitem__("info", toasts["info"] + 1))
237
+
238
+ validated, unvalidated, banner = app._refresh_leaderboard_with_toast()
239
+ assert toasts["warning"] == 1
240
+ assert toasts["info"] == 0
241
+ assert len(validated) == 0 and len(unvalidated) == 0
242
+ # The banner output is a gr.Markdown update flipped visible.
243
+ assert getattr(banner, "visible", None) is True
244
+
245
+
246
  def test_iframe_escape_is_attribute_safe(monkeypatch):
247
  """Quotes / ampersands inside the report HTML are escaped properly.
248