Michael Rabinovich Cursor commited on
Commit
afed315
·
1 Parent(s): c040324

step 5: leaderboard reads from submissions dataset on hub

Browse files

Replace the local-only JSONL read with hf_hub_download(force_download=True)
against michaelr27/cadgenbench-submissions. force_download avoids the
stale-snapshot trap that datasets.load_dataset would create per process.

Falls back to AI4Engineering/results.jsonl on any Hub error (broad except)
so the table stays up during a transient outage or a mid-migration rename.

HF_TOKEN secret already set on the Space via HfApi.add_space_secret to
read the private dataset. Secret becomes write-only at launch when the
dataset goes public; rotation TODO documented in space-setup/migration.md.

Co-authored-by: Cursor <cursoragent@cursor.com>

Files changed (1) hide show
  1. app.py +33 -3
app.py CHANGED
@@ -15,6 +15,7 @@ from pathlib import Path
15
 
16
  import gradio as gr
17
  import pandas as pd
 
18
 
19
  HF_ORG = os.getenv("HF_ORG", "michaelr27")
20
  HF_SUBMISSIONS_REPO = os.getenv(
@@ -34,14 +35,43 @@ LEADERBOARD_COLS = [
34
  ]
35
 
36
 
37
- def load_leaderboard() -> pd.DataFrame:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  if not LOCAL_RESULTS_PATH.exists():
39
- return pd.DataFrame(columns=LEADERBOARD_COLS)
40
- rows = [
41
  json.loads(line)
42
  for line in LOCAL_RESULTS_PATH.read_text().splitlines()
43
  if line.strip()
44
  ]
 
 
 
 
 
 
 
45
  if not rows:
46
  return pd.DataFrame(columns=LEADERBOARD_COLS)
47
  df = pd.DataFrame(rows)
 
15
 
16
  import gradio as gr
17
  import pandas as pd
18
+ from huggingface_hub import hf_hub_download
19
 
20
  HF_ORG = os.getenv("HF_ORG", "michaelr27")
21
  HF_SUBMISSIONS_REPO = os.getenv(
 
35
  ]
36
 
37
 
38
+ def _load_rows_from_hub() -> list[dict] | None:
39
+ """Pull results.jsonl from the submissions dataset.
40
+
41
+ Returns None on any failure so callers can fall back to the local file.
42
+ """
43
+ try:
44
+ path = hf_hub_download(
45
+ repo_id=HF_SUBMISSIONS_REPO,
46
+ filename="results.jsonl",
47
+ repo_type="dataset",
48
+ force_download=True,
49
+ )
50
+ return [
51
+ json.loads(line)
52
+ for line in Path(path).read_text().splitlines()
53
+ if line.strip()
54
+ ]
55
+ except Exception as e: # noqa: BLE001 — any failure should fall back
56
+ print(f"[load_leaderboard] Hub fetch failed ({type(e).__name__}: {e})")
57
+ return None
58
+
59
+
60
+ def _load_rows_from_local() -> list[dict]:
61
  if not LOCAL_RESULTS_PATH.exists():
62
+ return []
63
+ return [
64
  json.loads(line)
65
  for line in LOCAL_RESULTS_PATH.read_text().splitlines()
66
  if line.strip()
67
  ]
68
+
69
+
70
+ def load_leaderboard() -> pd.DataFrame:
71
+ rows = _load_rows_from_hub()
72
+ if rows is None:
73
+ print("[load_leaderboard] falling back to local results.jsonl")
74
+ rows = _load_rows_from_local()
75
  if not rows:
76
  return pd.DataFrame(columns=LEADERBOARD_COLS)
77
  df = pd.DataFrame(rows)