Michael Rabinovich commited on
Commit ·
0957a56
1
Parent(s): 007a50e
admin: bulk promote/demote/delete + admin table loader
Browse filesadds load_admin_table to the leaderboard reader: a single flat frame of
every row (both tiers) with a leading editable select column, for the
admin tab to act on. reworks admin.py around bulk helpers
(promote_rows, demote_rows, delete_rows) that each do one results.jsonl
write for the whole selection; delete also best-effort removes the
companion zip and report artifacts. the singular promote_row/demote_row
stay as thin wrappers. tests cover the bulk paths, a missing-id abort,
empty-selection guards, and artifact deletion, all with the hub mocked.
- admin.py +112 -26
- leaderboard.py +61 -0
- tests/test_admin.py +64 -1
admin.py
CHANGED
|
@@ -14,11 +14,18 @@ from __future__ import annotations
|
|
| 14 |
|
| 15 |
import logging
|
| 16 |
import os
|
| 17 |
-
from typing import Any
|
| 18 |
|
| 19 |
import gradio as gr
|
|
|
|
| 20 |
|
| 21 |
-
from submit import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
logger = logging.getLogger(__name__)
|
| 24 |
|
|
@@ -54,59 +61,138 @@ def is_admin(profile: gr.OAuthProfile | None) -> bool:
|
|
| 54 |
return profile.username in admin_usernames()
|
| 55 |
|
| 56 |
|
| 57 |
-
def
|
| 58 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
|
|
|
|
|
|
| 63 |
|
| 64 |
Raises:
|
| 65 |
-
ValueError: *method* is
|
| 66 |
-
LookupError:
|
|
|
|
|
|
|
| 67 |
"""
|
| 68 |
if method not in VALID_METHODS:
|
| 69 |
raise ValueError(
|
| 70 |
f"Unknown validation_method {method!r}; expected one of "
|
| 71 |
f"{', '.join(VALID_METHODS)}."
|
| 72 |
)
|
|
|
|
| 73 |
|
| 74 |
def mutate(rows: list[dict[str, Any]]) -> None:
|
|
|
|
| 75 |
for row in rows:
|
| 76 |
-
if row.get("submission_id")
|
| 77 |
row["validation_status"] = "validated"
|
| 78 |
row["validation_method"] = method
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
f"No row with submission_id={submission_id!r} in results.jsonl."
|
| 82 |
-
)
|
| 83 |
|
| 84 |
_hub_rmw_results(
|
| 85 |
mutate,
|
| 86 |
-
commit_message=f"promote {
|
| 87 |
)
|
| 88 |
|
| 89 |
|
| 90 |
-
def
|
| 91 |
-
"""Return
|
| 92 |
|
| 93 |
-
|
| 94 |
-
|
| 95 |
|
| 96 |
Raises:
|
| 97 |
-
|
|
|
|
| 98 |
"""
|
|
|
|
|
|
|
| 99 |
def mutate(rows: list[dict[str, Any]]) -> None:
|
|
|
|
| 100 |
for row in rows:
|
| 101 |
-
if row.get("submission_id")
|
| 102 |
row["validation_status"] = "unvalidated"
|
| 103 |
row["validation_method"] = None
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
f"No row with submission_id={submission_id!r} in results.jsonl."
|
| 107 |
-
)
|
| 108 |
|
| 109 |
_hub_rmw_results(
|
| 110 |
mutate,
|
| 111 |
-
commit_message=f"demote {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
import logging
|
| 16 |
import os
|
| 17 |
+
from typing import Any, Iterable
|
| 18 |
|
| 19 |
import gradio as gr
|
| 20 |
+
from huggingface_hub.errors import EntryNotFoundError
|
| 21 |
|
| 22 |
+
from submit import (
|
| 23 |
+
HF_SUBMISSIONS_REPO,
|
| 24 |
+
REPORTS_DIR,
|
| 25 |
+
SUBMISSIONS_DIR,
|
| 26 |
+
_HF_API,
|
| 27 |
+
_hub_rmw_results,
|
| 28 |
+
)
|
| 29 |
|
| 30 |
logger = logging.getLogger(__name__)
|
| 31 |
|
|
|
|
| 61 |
return profile.username in admin_usernames()
|
| 62 |
|
| 63 |
|
| 64 |
+
def _clean_id_set(submission_ids: Iterable[str]) -> set[str]:
|
| 65 |
+
"""Normalise an id iterable to a non-empty set, else raise.
|
| 66 |
+
|
| 67 |
+
Guards every bulk helper: a no-op call (nothing selected) is a
|
| 68 |
+
caller error, surfaced as ``ValueError`` rather than a silent
|
| 69 |
+
empty write.
|
| 70 |
+
"""
|
| 71 |
+
ids = {str(s) for s in submission_ids if s}
|
| 72 |
+
if not ids:
|
| 73 |
+
raise ValueError("No submissions selected.")
|
| 74 |
+
return ids
|
| 75 |
+
|
| 76 |
|
| 77 |
+
def promote_rows(submission_ids: Iterable[str], method: str) -> None:
|
| 78 |
+
"""Move every listed row into the validated tier with *method*.
|
| 79 |
+
|
| 80 |
+
One ``results.jsonl`` write for the whole batch. Idempotent on rows
|
| 81 |
+
already validated (their method is set to *method*).
|
| 82 |
|
| 83 |
Raises:
|
| 84 |
+
ValueError: *method* is unknown, or no ids were given.
|
| 85 |
+
LookupError: one or more ids are absent from ``results.jsonl``
|
| 86 |
+
(no partial write happens; the helper raises inside the
|
| 87 |
+
read-modify-write before the upload).
|
| 88 |
"""
|
| 89 |
if method not in VALID_METHODS:
|
| 90 |
raise ValueError(
|
| 91 |
f"Unknown validation_method {method!r}; expected one of "
|
| 92 |
f"{', '.join(VALID_METHODS)}."
|
| 93 |
)
|
| 94 |
+
ids = _clean_id_set(submission_ids)
|
| 95 |
|
| 96 |
def mutate(rows: list[dict[str, Any]]) -> None:
|
| 97 |
+
seen = set()
|
| 98 |
for row in rows:
|
| 99 |
+
if row.get("submission_id") in ids:
|
| 100 |
row["validation_status"] = "validated"
|
| 101 |
row["validation_method"] = method
|
| 102 |
+
seen.add(row["submission_id"])
|
| 103 |
+
_raise_for_missing(ids, seen)
|
|
|
|
|
|
|
| 104 |
|
| 105 |
_hub_rmw_results(
|
| 106 |
mutate,
|
| 107 |
+
commit_message=f"promote {len(ids)} row(s) to validated ({method})",
|
| 108 |
)
|
| 109 |
|
| 110 |
|
| 111 |
+
def demote_rows(submission_ids: Iterable[str]) -> None:
|
| 112 |
+
"""Return every listed row to the unvalidated tier, clearing method.
|
| 113 |
|
| 114 |
+
One ``results.jsonl`` write for the whole batch. Idempotent on rows
|
| 115 |
+
already unvalidated.
|
| 116 |
|
| 117 |
Raises:
|
| 118 |
+
ValueError: no ids were given.
|
| 119 |
+
LookupError: one or more ids are absent from ``results.jsonl``.
|
| 120 |
"""
|
| 121 |
+
ids = _clean_id_set(submission_ids)
|
| 122 |
+
|
| 123 |
def mutate(rows: list[dict[str, Any]]) -> None:
|
| 124 |
+
seen = set()
|
| 125 |
for row in rows:
|
| 126 |
+
if row.get("submission_id") in ids:
|
| 127 |
row["validation_status"] = "unvalidated"
|
| 128 |
row["validation_method"] = None
|
| 129 |
+
seen.add(row["submission_id"])
|
| 130 |
+
_raise_for_missing(ids, seen)
|
|
|
|
|
|
|
| 131 |
|
| 132 |
_hub_rmw_results(
|
| 133 |
mutate,
|
| 134 |
+
commit_message=f"demote {len(ids)} row(s) to unvalidated",
|
| 135 |
+
)
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def delete_rows(submission_ids: Iterable[str]) -> None:
|
| 139 |
+
"""Permanently delete every listed submission: artifacts then row.
|
| 140 |
+
|
| 141 |
+
Irreversible. For each id, best-effort deletes the companion blobs
|
| 142 |
+
(``submissions/<id>.zip``, ``reports/<id>.{html,json}``) and then
|
| 143 |
+
drops the row from ``results.jsonl`` in a single write. A blob that
|
| 144 |
+
does not exist is skipped (a failed / pending row may never have
|
| 145 |
+
had a report). Missing ``results.jsonl`` rows are tolerated too, so
|
| 146 |
+
a re-run after a partial failure still converges.
|
| 147 |
+
|
| 148 |
+
Raises:
|
| 149 |
+
ValueError: no ids were given.
|
| 150 |
+
"""
|
| 151 |
+
ids = _clean_id_set(submission_ids)
|
| 152 |
+
|
| 153 |
+
for sid in sorted(ids):
|
| 154 |
+
for path in (
|
| 155 |
+
f"{SUBMISSIONS_DIR}/{sid}.zip",
|
| 156 |
+
f"{REPORTS_DIR}/{sid}.html",
|
| 157 |
+
f"{REPORTS_DIR}/{sid}.json",
|
| 158 |
+
):
|
| 159 |
+
try:
|
| 160 |
+
_HF_API.delete_file(
|
| 161 |
+
path_in_repo=path,
|
| 162 |
+
repo_id=HF_SUBMISSIONS_REPO,
|
| 163 |
+
repo_type="dataset",
|
| 164 |
+
commit_message=f"delete artifact {path}",
|
| 165 |
+
)
|
| 166 |
+
except EntryNotFoundError:
|
| 167 |
+
pass
|
| 168 |
+
except Exception as e: # noqa: BLE001 - keep deleting the rest
|
| 169 |
+
logger.warning(
|
| 170 |
+
"Failed to delete artifact %s (%s: %s)",
|
| 171 |
+
path, type(e).__name__, e,
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
def mutate(rows: list[dict[str, Any]]) -> None:
|
| 175 |
+
rows[:] = [r for r in rows if r.get("submission_id") not in ids]
|
| 176 |
+
|
| 177 |
+
_hub_rmw_results(
|
| 178 |
+
mutate, commit_message=f"delete {len(ids)} submission(s)",
|
| 179 |
)
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
def _raise_for_missing(requested: set[str], seen: set[str]) -> None:
|
| 183 |
+
"""Raise ``LookupError`` if any requested id was not found in the rows."""
|
| 184 |
+
missing = requested - seen
|
| 185 |
+
if missing:
|
| 186 |
+
raise LookupError(
|
| 187 |
+
f"submission_id(s) not in results.jsonl: {', '.join(sorted(missing))}."
|
| 188 |
+
)
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
def promote_row(submission_id: str, method: str) -> None:
|
| 192 |
+
"""Single-row convenience wrapper over :func:`promote_rows`."""
|
| 193 |
+
promote_rows([submission_id], method)
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
def demote_row(submission_id: str) -> None:
|
| 197 |
+
"""Single-row convenience wrapper over :func:`demote_rows`."""
|
| 198 |
+
demote_rows([submission_id])
|
leaderboard.py
CHANGED
|
@@ -467,3 +467,64 @@ def build_combined_csv() -> str:
|
|
| 467 |
path = out_dir / f"cadgenbench-leaderboard-{uuid.uuid4().hex[:8]}.csv"
|
| 468 |
df.to_csv(path, index=False)
|
| 469 |
return str(path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 467 |
path = out_dir / f"cadgenbench-leaderboard-{uuid.uuid4().hex[:8]}.csv"
|
| 468 |
df.to_csv(path, index=False)
|
| 469 |
return str(path)
|
| 470 |
+
|
| 471 |
+
|
| 472 |
+
# Admin-tab table. A single flat view of every row (both tiers) with a
|
| 473 |
+
# leading editable ``select`` checkbox column; the rest is read-only
|
| 474 |
+
# context the maintainer scans before acting. Raw values (not the
|
| 475 |
+
# display-formatted leaderboard strings) so the maintainer sees exactly
|
| 476 |
+
# what is on the row. `submission_id` rides last as the action key.
|
| 477 |
+
ADMIN_SELECT_COL = "select"
|
| 478 |
+
ADMIN_COLUMNS = [
|
| 479 |
+
ADMIN_SELECT_COL,
|
| 480 |
+
"validation_status",
|
| 481 |
+
"validation_method",
|
| 482 |
+
"submission_name",
|
| 483 |
+
"submitter_name",
|
| 484 |
+
"submitted_at",
|
| 485 |
+
"status",
|
| 486 |
+
"aggregate_score",
|
| 487 |
+
"submission_id",
|
| 488 |
+
]
|
| 489 |
+
|
| 490 |
+
|
| 491 |
+
def load_admin_table() -> pd.DataFrame:
|
| 492 |
+
"""Build the Admin tab's editable table: one row per submission.
|
| 493 |
+
|
| 494 |
+
Both tiers in a single frame, validated first then by score, with a
|
| 495 |
+
fresh (all-unchecked) ``select`` column the maintainer ticks to
|
| 496 |
+
choose action targets. Legacy rows get the same ``status`` /
|
| 497 |
+
``validation_status`` defaults the leaderboard reader applies, so
|
| 498 |
+
pre-schema-bump rows still show up and are actionable.
|
| 499 |
+
"""
|
| 500 |
+
rows = _load_rows_from_hub()
|
| 501 |
+
if rows is None:
|
| 502 |
+
logger.info("Admin table build falling back to local results.jsonl")
|
| 503 |
+
rows = _load_rows_from_local()
|
| 504 |
+
rows = rows or []
|
| 505 |
+
for row in rows:
|
| 506 |
+
if row.get("status") is None:
|
| 507 |
+
row["status"] = "completed"
|
| 508 |
+
if row.get("validation_status") is None:
|
| 509 |
+
row["validation_status"] = "unvalidated"
|
| 510 |
+
if not rows:
|
| 511 |
+
return pd.DataFrame(columns=ADMIN_COLUMNS)
|
| 512 |
+
df = pd.DataFrame(rows)
|
| 513 |
+
for c in ADMIN_COLUMNS:
|
| 514 |
+
if c not in df.columns:
|
| 515 |
+
df[c] = None
|
| 516 |
+
if "submitted_at" in df.columns:
|
| 517 |
+
df["submitted_at"] = df["submitted_at"].apply(_fmt_timestamp)
|
| 518 |
+
df = (
|
| 519 |
+
df[ADMIN_COLUMNS]
|
| 520 |
+
.sort_values(
|
| 521 |
+
["validation_status", "aggregate_score"],
|
| 522 |
+
ascending=[False, False],
|
| 523 |
+
na_position="last",
|
| 524 |
+
)
|
| 525 |
+
.reset_index(drop=True)
|
| 526 |
+
)
|
| 527 |
+
# Set after projection so the column is a clean all-False boolean
|
| 528 |
+
# regardless of what (if anything) a stray source key held.
|
| 529 |
+
df[ADMIN_SELECT_COL] = False
|
| 530 |
+
return df
|
tests/test_admin.py
CHANGED
|
@@ -55,7 +55,11 @@ def hub(monkeypatch):
|
|
| 55 |
re-parsed back into dicts. ``state["uploads"]`` counts the writes
|
| 56 |
so a test can assert how many commits a call produced.
|
| 57 |
"""
|
| 58 |
-
state: dict = {
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
def fake_download() -> str:
|
| 61 |
return _jsonl(state["rows"])
|
|
@@ -71,8 +75,12 @@ def hub(monkeypatch):
|
|
| 71 |
]
|
| 72 |
state["uploads"] += 1
|
| 73 |
|
|
|
|
|
|
|
|
|
|
| 74 |
monkeypatch.setattr(submit, "_download_results_jsonl", fake_download)
|
| 75 |
monkeypatch.setattr(submit._HF_API, "upload_file", fake_upload)
|
|
|
|
| 76 |
return state
|
| 77 |
|
| 78 |
|
|
@@ -118,3 +126,58 @@ def test_promote_idempotent(hub):
|
|
| 118 |
# Second identical promotion produces an identical row.
|
| 119 |
admin.promote_row("beta", "code")
|
| 120 |
assert _row(hub["rows"], "beta") == once
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
re-parsed back into dicts. ``state["uploads"]`` counts the writes
|
| 56 |
so a test can assert how many commits a call produced.
|
| 57 |
"""
|
| 58 |
+
state: dict = {
|
| 59 |
+
"rows": [dict(r) for r in SEED_ROWS],
|
| 60 |
+
"uploads": 0,
|
| 61 |
+
"deleted_paths": [],
|
| 62 |
+
}
|
| 63 |
|
| 64 |
def fake_download() -> str:
|
| 65 |
return _jsonl(state["rows"])
|
|
|
|
| 75 |
]
|
| 76 |
state["uploads"] += 1
|
| 77 |
|
| 78 |
+
def fake_delete_file(*, path_in_repo, **kwargs) -> None:
|
| 79 |
+
state["deleted_paths"].append(path_in_repo)
|
| 80 |
+
|
| 81 |
monkeypatch.setattr(submit, "_download_results_jsonl", fake_download)
|
| 82 |
monkeypatch.setattr(submit._HF_API, "upload_file", fake_upload)
|
| 83 |
+
monkeypatch.setattr(submit._HF_API, "delete_file", fake_delete_file)
|
| 84 |
return state
|
| 85 |
|
| 86 |
|
|
|
|
| 126 |
# Second identical promotion produces an identical row.
|
| 127 |
admin.promote_row("beta", "code")
|
| 128 |
assert _row(hub["rows"], "beta") == once
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
def test_promote_rows_bulk(hub):
|
| 132 |
+
"""A bulk promote flips every listed row in one write."""
|
| 133 |
+
admin.promote_rows(["alpha", "beta"], "traces")
|
| 134 |
+
for sid in ("alpha", "beta"):
|
| 135 |
+
row = _row(hub["rows"], sid)
|
| 136 |
+
assert row["validation_status"] == "validated"
|
| 137 |
+
assert row["validation_method"] == "traces"
|
| 138 |
+
assert hub["uploads"] == 1
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def test_demote_rows_bulk(hub):
|
| 142 |
+
"""A bulk demote clears method on every listed row in one write."""
|
| 143 |
+
admin.demote_rows(["alpha", "beta"])
|
| 144 |
+
for sid in ("alpha", "beta"):
|
| 145 |
+
row = _row(hub["rows"], sid)
|
| 146 |
+
assert row["validation_status"] == "unvalidated"
|
| 147 |
+
assert row["validation_method"] is None
|
| 148 |
+
assert hub["uploads"] == 1
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
def test_promote_rows_missing_id_raises_without_write(hub):
|
| 152 |
+
"""An unknown id aborts the whole batch before any upload."""
|
| 153 |
+
with pytest.raises(LookupError):
|
| 154 |
+
admin.promote_rows(["alpha", "ghost"], "code")
|
| 155 |
+
assert hub["uploads"] == 0
|
| 156 |
+
# alpha is untouched since the write never happened.
|
| 157 |
+
assert _row(hub["rows"], "alpha")["validation_status"] == "unvalidated"
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
def test_empty_selection_raises(hub):
|
| 161 |
+
"""Bulk helpers reject an empty / all-falsy selection."""
|
| 162 |
+
for call in (
|
| 163 |
+
lambda: admin.promote_rows([], "code"),
|
| 164 |
+
lambda: admin.demote_rows([None, ""]),
|
| 165 |
+
lambda: admin.delete_rows([]),
|
| 166 |
+
):
|
| 167 |
+
with pytest.raises(ValueError):
|
| 168 |
+
call()
|
| 169 |
+
assert hub["uploads"] == 0
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
def test_delete_rows_removes_rows_and_artifacts(hub):
|
| 173 |
+
"""Delete drops the rows and best-effort removes their artifacts."""
|
| 174 |
+
admin.delete_rows(["alpha"])
|
| 175 |
+
remaining = {r["submission_id"] for r in hub["rows"]}
|
| 176 |
+
assert remaining == {"beta"}
|
| 177 |
+
# All three companion blobs were targeted for deletion.
|
| 178 |
+
assert hub["deleted_paths"] == [
|
| 179 |
+
"submissions/alpha.zip",
|
| 180 |
+
"reports/alpha.html",
|
| 181 |
+
"reports/alpha.json",
|
| 182 |
+
]
|
| 183 |
+
assert hub["uploads"] == 1
|