Michael Rabinovich commited on
Commit
b5ad973
·
1 Parent(s): 628bc9e

refactor: split leaderboard read path into its own module

Browse files

Mirrors the shape adyen/DABstep uses on their reference Space (UI
assembly in app.py, business logic in a sibling module). app.py drops
from 224 lines to 158 by extracting the env-driven repo constants,
the LOCAL_RESULTS_PATH + LEADERBOARD_COLS schema, the Hub/local row
loaders, and load_leaderboard() into leaderboard.py. The submit
handler stays in app.py for now; it'll move to a third module
(submit.py) when Step 6 (E) lands the real validation + async eval.

No behaviour change: load_leaderboard is imported back into app.py
and wired into the same gr.Dataframe / refresh button pair as before.
Smoke-tested locally (cadgenbench-space env): app module imports
cleanly, load_leaderboard() returns a dataframe with the expected
columns.

Files changed (2) hide show
  1. app.py +8 -85
  2. leaderboard.py +91 -0
app.py CHANGED
@@ -1,97 +1,20 @@
1
- """CADGenBench Leaderboard Space.
2
 
3
- Step 3 prototype: a hand-crafted ``results.jsonl`` drives the leaderboard
4
- table, and the Submit tab is a UI-only stub. The read path (Step 5) will
5
- swap the JSONL for ``datasets.load_dataset(HF_SUBMISSIONS_REPO, 'results')``
6
- and the write path (Step 6) will run ``cadgenbench evaluate`` and push a
7
- result row back to the submissions dataset via ``HfApi``.
8
  """
9
-
10
  from __future__ import annotations
11
 
12
- import json
13
- import os
14
  from pathlib import Path
15
 
16
  import gradio as gr
17
- import pandas as pd
18
- from huggingface_hub import hf_hub_download
19
 
20
- HF_ORG = os.getenv("HF_ORG", "michaelr27")
21
- HF_SUBMISSIONS_REPO = os.getenv(
22
- "HF_SUBMISSIONS_REPO", f"{HF_ORG}/cadgenbench-submissions"
 
23
  )
24
- HF_DATA_REPO = os.getenv("HF_DATA_REPO", f"{HF_ORG}/cadgenbench-data")
25
-
26
- LOCAL_RESULTS_PATH = Path(__file__).parent / "results.jsonl"
27
-
28
- LEADERBOARD_COLS = [
29
- "submission_name",
30
- "submitter_name",
31
- "aggregate_score",
32
- "validity_rate",
33
- "submitted_at",
34
- "cadgenbench_version",
35
- ]
36
-
37
-
38
- def _load_rows_from_hub() -> list[dict] | None:
39
- """Pull results.jsonl from the submissions dataset.
40
-
41
- Returns None on any failure so callers can fall back to the local file.
42
- """
43
- try:
44
- path = hf_hub_download(
45
- repo_id=HF_SUBMISSIONS_REPO,
46
- filename="results.jsonl",
47
- repo_type="dataset",
48
- force_download=True,
49
- )
50
- return [
51
- json.loads(line)
52
- for line in Path(path).read_text().splitlines()
53
- if line.strip()
54
- ]
55
- except Exception as e: # noqa: BLE001 — any failure should fall back
56
- print(f"[load_leaderboard] Hub fetch failed ({type(e).__name__}: {e})")
57
- return None
58
-
59
-
60
- def _load_rows_from_local() -> list[dict]:
61
- if not LOCAL_RESULTS_PATH.exists():
62
- return []
63
- return [
64
- json.loads(line)
65
- for line in LOCAL_RESULTS_PATH.read_text().splitlines()
66
- if line.strip()
67
- ]
68
-
69
-
70
- def _fmt_pct(x: float | None) -> str:
71
- """Render a 0-1 fraction as 'NN%' (or 'NN.N%' for non-whole values)."""
72
- if x is None:
73
- return ""
74
- pct = float(x) * 100
75
- return f"{pct:.0f}%" if pct == int(pct) else f"{pct:.1f}%"
76
-
77
-
78
- def load_leaderboard() -> pd.DataFrame:
79
- rows = _load_rows_from_hub()
80
- if rows is None:
81
- print("[load_leaderboard] falling back to local results.jsonl")
82
- rows = _load_rows_from_local()
83
- if not rows:
84
- return pd.DataFrame(columns=LEADERBOARD_COLS)
85
- df = pd.DataFrame(rows)
86
- cols = [c for c in LEADERBOARD_COLS if c in df.columns]
87
- df = (
88
- df[cols]
89
- .sort_values("aggregate_score", ascending=False, na_position="last")
90
- .reset_index(drop=True)
91
- )
92
- if "validity_rate" in df.columns:
93
- df["validity_rate"] = df["validity_rate"].map(_fmt_pct)
94
- return df
95
 
96
 
97
  def handle_submit(
 
1
+ """CADGenBench Leaderboard Space - Gradio UI assembly.
2
 
3
+ Read path lives in :mod:`leaderboard`. The submit handler is a UI-only
4
+ stub here; the real validation + async eval lands in :mod:`submit` as
5
+ part of Step 6 (E).
 
 
6
  """
 
7
  from __future__ import annotations
8
 
 
 
9
  from pathlib import Path
10
 
11
  import gradio as gr
 
 
12
 
13
+ from leaderboard import (
14
+ HF_DATA_REPO,
15
+ HF_SUBMISSIONS_REPO,
16
+ load_leaderboard,
17
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
 
20
  def handle_submit(
leaderboard.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Leaderboard read path.
2
+
3
+ Loads `results.jsonl` from the submissions dataset on the Hub (or falls
4
+ back to the local mirror on any Hub error) and shapes the rows into the
5
+ dataframe shown on the Leaderboard tab. Module-level constants describe
6
+ the env-var-driven repo identities that the submit path also consumes.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import os
12
+ from pathlib import Path
13
+
14
+ import pandas as pd
15
+ from huggingface_hub import hf_hub_download
16
+
17
+ HF_ORG = os.getenv("HF_ORG", "michaelr27")
18
+ HF_SUBMISSIONS_REPO = os.getenv(
19
+ "HF_SUBMISSIONS_REPO", f"{HF_ORG}/cadgenbench-submissions"
20
+ )
21
+ HF_DATA_REPO = os.getenv("HF_DATA_REPO", f"{HF_ORG}/cadgenbench-data")
22
+
23
+ LOCAL_RESULTS_PATH = Path(__file__).parent / "results.jsonl"
24
+
25
+ LEADERBOARD_COLS = [
26
+ "submission_name",
27
+ "submitter_name",
28
+ "aggregate_score",
29
+ "validity_rate",
30
+ "submitted_at",
31
+ "cadgenbench_version",
32
+ ]
33
+
34
+
35
+ def _load_rows_from_hub() -> list[dict] | None:
36
+ """Pull results.jsonl from the submissions dataset.
37
+
38
+ Returns None on any failure so callers can fall back to the local file.
39
+ """
40
+ try:
41
+ path = hf_hub_download(
42
+ repo_id=HF_SUBMISSIONS_REPO,
43
+ filename="results.jsonl",
44
+ repo_type="dataset",
45
+ force_download=True,
46
+ )
47
+ return [
48
+ json.loads(line)
49
+ for line in Path(path).read_text().splitlines()
50
+ if line.strip()
51
+ ]
52
+ except Exception as e: # noqa: BLE001 - any failure should fall back
53
+ print(f"[load_leaderboard] Hub fetch failed ({type(e).__name__}: {e})")
54
+ return None
55
+
56
+
57
+ def _load_rows_from_local() -> list[dict]:
58
+ if not LOCAL_RESULTS_PATH.exists():
59
+ return []
60
+ return [
61
+ json.loads(line)
62
+ for line in LOCAL_RESULTS_PATH.read_text().splitlines()
63
+ if line.strip()
64
+ ]
65
+
66
+
67
+ def _fmt_pct(x: float | None) -> str:
68
+ """Render a 0-1 fraction as 'NN%' (or 'NN.N%' for non-whole values)."""
69
+ if x is None:
70
+ return ""
71
+ pct = float(x) * 100
72
+ return f"{pct:.0f}%" if pct == int(pct) else f"{pct:.1f}%"
73
+
74
+
75
+ def load_leaderboard() -> pd.DataFrame:
76
+ rows = _load_rows_from_hub()
77
+ if rows is None:
78
+ print("[load_leaderboard] falling back to local results.jsonl")
79
+ rows = _load_rows_from_local()
80
+ if not rows:
81
+ return pd.DataFrame(columns=LEADERBOARD_COLS)
82
+ df = pd.DataFrame(rows)
83
+ cols = [c for c in LEADERBOARD_COLS if c in df.columns]
84
+ df = (
85
+ df[cols]
86
+ .sort_values("aggregate_score", ascending=False, na_position="last")
87
+ .reset_index(drop=True)
88
+ )
89
+ if "validity_rate" in df.columns:
90
+ df["validity_rate"] = df["validity_rate"].map(_fmt_pct)
91
+ return df