Michael Rabinovich commited on
Commit
5aee3e5
·
1 Parent(s): 00d091d

leaderboard: render pending / failed score cells with a status tag

Browse files

Blank score cells on a pending row left the only "this is in
progress" cue in the status column, which read as broken to a
user expecting at least *some* signal in the score columns.

_fmt_pct and _fmt_score are now status-aware: pending rows render
the tag "evaluating..." (hourglass prefix) in both score columns,
failed rows render "failed" (cross prefix). Completed rows render
the score number as before.

Matches schema.md's "table renders the three states distinctly
(spinner / score / error tag)" description and lines up with the
visual pattern other HF leaderboards use while a row is queued.

Files changed (1) hide show
  1. leaderboard.py +23 -8
leaderboard.py CHANGED
@@ -32,6 +32,9 @@ LEADERBOARD_COLS = [
32
  "cadgenbench_version",
33
  ]
34
 
 
 
 
35
 
36
  def _load_rows_from_hub() -> list[dict] | None:
37
  """Pull results.jsonl from the submissions dataset.
@@ -65,21 +68,29 @@ def _load_rows_from_local() -> list[dict]:
65
  ]
66
 
67
 
68
- def _fmt_pct(x: float | None) -> str:
69
  """Render a 0-1 fraction as 'NN%' (or 'NN.N%' for non-whole values).
70
 
71
- ``pandas`` coerces JSON ``null`` to ``NaN`` on column construction,
72
- so ``pd.isna`` is the safe gate (catches both ``None`` and ``NaN``).
73
- Returns ``""`` so pending / failed rows render with blank cells.
74
  """
 
 
 
 
75
  if pd.isna(x):
76
  return ""
77
  pct = float(x) * 100
78
  return f"{pct:.0f}%" if pct == int(pct) else f"{pct:.1f}%"
79
 
80
 
81
- def _fmt_score(x: float | None) -> str:
82
- """Render an aggregate CAD score as a 4-decimal float, blank on null."""
 
 
 
 
83
  if pd.isna(x):
84
  return ""
85
  return f"{float(x):.4f}"
@@ -107,7 +118,11 @@ def load_leaderboard() -> pd.DataFrame:
107
  .reset_index(drop=True)
108
  )
109
  if "validity_rate" in df.columns:
110
- df["validity_rate"] = df["validity_rate"].map(_fmt_pct)
 
 
111
  if "aggregate_score" in df.columns:
112
- df["aggregate_score"] = df["aggregate_score"].map(_fmt_score)
 
 
113
  return df
 
32
  "cadgenbench_version",
33
  ]
34
 
35
+ PENDING_CELL_TAG = "⏳ evaluating..."
36
+ FAILED_CELL_TAG = "✗ failed"
37
+
38
 
39
  def _load_rows_from_hub() -> list[dict] | None:
40
  """Pull results.jsonl from the submissions dataset.
 
68
  ]
69
 
70
 
71
+ def _fmt_pct(x: float | None, status: str) -> str:
72
  """Render a 0-1 fraction as 'NN%' (or 'NN.N%' for non-whole values).
73
 
74
+ Status-aware: pending / failed rows render a tag in place of the
75
+ number (the row's eventual score is not yet known or never will
76
+ be). ``pd.isna`` covers both ``None`` and pandas-coerced ``NaN``.
77
  """
78
+ if status == "pending":
79
+ return PENDING_CELL_TAG
80
+ if status == "failed":
81
+ return FAILED_CELL_TAG
82
  if pd.isna(x):
83
  return ""
84
  pct = float(x) * 100
85
  return f"{pct:.0f}%" if pct == int(pct) else f"{pct:.1f}%"
86
 
87
 
88
+ def _fmt_score(x: float | None, status: str) -> str:
89
+ """Render an aggregate CAD score, status-aware tag on pending / failed."""
90
+ if status == "pending":
91
+ return PENDING_CELL_TAG
92
+ if status == "failed":
93
+ return FAILED_CELL_TAG
94
  if pd.isna(x):
95
  return ""
96
  return f"{float(x):.4f}"
 
118
  .reset_index(drop=True)
119
  )
120
  if "validity_rate" in df.columns:
121
+ df["validity_rate"] = df.apply(
122
+ lambda r: _fmt_pct(r["validity_rate"], r["status"]), axis=1,
123
+ )
124
  if "aggregate_score" in df.columns:
125
+ df["aggregate_score"] = df.apply(
126
+ lambda r: _fmt_score(r["aggregate_score"], r["status"]), axis=1,
127
+ )
128
  return df