Spaces:

HuggingAI4Engineering
/

cadgenbench-leaderboard

Running

Michael Rabinovich commited on 9 days ago

Commit

5aee3e5

1 Parent(s): 00d091d

leaderboard: render pending / failed score cells with a status tag

Blank score cells on a pending row left the only "this is in
progress" cue in the status column, which read as broken to a
user expecting at least *some* signal in the score columns.

_fmt_pct and _fmt_score are now status-aware: pending rows render
the tag "evaluating..." (hourglass prefix) in both score columns,
failed rows render "failed" (cross prefix). Completed rows render
the score number as before.

Matches schema.md's "table renders the three states distinctly
(spinner / score / error tag)" description and lines up with the
visual pattern other HF leaderboards use while a row is queued.

Files changed (1) hide show

leaderboard.py +23 -8

leaderboard.py CHANGED Viewed

@@ -32,6 +32,9 @@ LEADERBOARD_COLS = [
     "cadgenbench_version",
 ]
 def _load_rows_from_hub() -> list[dict] | None:
     """Pull results.jsonl from the submissions dataset.
@@ -65,21 +68,29 @@ def _load_rows_from_local() -> list[dict]:
     ]
-def _fmt_pct(x: float | None) -> str:
     """Render a 0-1 fraction as 'NN%' (or 'NN.N%' for non-whole values).
-    ``pandas`` coerces JSON ``null`` to ``NaN`` on column construction,
-    so ``pd.isna`` is the safe gate (catches both ``None`` and ``NaN``).
-    Returns ``""`` so pending / failed rows render with blank cells.
     """
     if pd.isna(x):
         return ""
     pct = float(x) * 100
     return f"{pct:.0f}%" if pct == int(pct) else f"{pct:.1f}%"
-def _fmt_score(x: float | None) -> str:
-    """Render an aggregate CAD score as a 4-decimal float, blank on null."""
     if pd.isna(x):
         return ""
     return f"{float(x):.4f}"
@@ -107,7 +118,11 @@ def load_leaderboard() -> pd.DataFrame:
         .reset_index(drop=True)
     )
     if "validity_rate" in df.columns:
-        df["validity_rate"] = df["validity_rate"].map(_fmt_pct)
     if "aggregate_score" in df.columns:
-        df["aggregate_score"] = df["aggregate_score"].map(_fmt_score)
     return df

     "cadgenbench_version",
 ]
+PENDING_CELL_TAG = "⏳ evaluating..."
+FAILED_CELL_TAG = "✗ failed"
 def _load_rows_from_hub() -> list[dict] | None:
     """Pull results.jsonl from the submissions dataset.
     ]
+def _fmt_pct(x: float | None, status: str) -> str:
     """Render a 0-1 fraction as 'NN%' (or 'NN.N%' for non-whole values).
+    Status-aware: pending / failed rows render a tag in place of the
+    number (the row's eventual score is not yet known or never will
+    be). ``pd.isna`` covers both ``None`` and pandas-coerced ``NaN``.
     """
+    if status == "pending":
+        return PENDING_CELL_TAG
+    if status == "failed":
+        return FAILED_CELL_TAG
     if pd.isna(x):
         return ""
     pct = float(x) * 100
     return f"{pct:.0f}%" if pct == int(pct) else f"{pct:.1f}%"
+def _fmt_score(x: float | None, status: str) -> str:
+    """Render an aggregate CAD score, status-aware tag on pending / failed."""
+    if status == "pending":
+        return PENDING_CELL_TAG
+    if status == "failed":
+        return FAILED_CELL_TAG
     if pd.isna(x):
         return ""
     return f"{float(x):.4f}"
         .reset_index(drop=True)
     )
     if "validity_rate" in df.columns:
+        df["validity_rate"] = df.apply(
+            lambda r: _fmt_pct(r["validity_rate"], r["status"]), axis=1,
+        )
     if "aggregate_score" in df.columns:
+        df["aggregate_score"] = df.apply(
+            lambda r: _fmt_score(r["aggregate_score"], r["status"]), axis=1,
+        )
     return df