Michael Rabinovich commited on
Commit
0957a56
·
1 Parent(s): 007a50e

admin: bulk promote/demote/delete + admin table loader

Browse files

adds load_admin_table to the leaderboard reader: a single flat frame of
every row (both tiers) with a leading editable select column, for the
admin tab to act on. reworks admin.py around bulk helpers
(promote_rows, demote_rows, delete_rows) that each do one results.jsonl
write for the whole selection; delete also best-effort removes the
companion zip and report artifacts. the singular promote_row/demote_row
stay as thin wrappers. tests cover the bulk paths, a missing-id abort,
empty-selection guards, and artifact deletion, all with the hub mocked.

Files changed (3) hide show
  1. admin.py +112 -26
  2. leaderboard.py +61 -0
  3. tests/test_admin.py +64 -1
admin.py CHANGED
@@ -14,11 +14,18 @@ from __future__ import annotations
14
 
15
  import logging
16
  import os
17
- from typing import Any
18
 
19
  import gradio as gr
 
20
 
21
- from submit import _hub_rmw_results
 
 
 
 
 
 
22
 
23
  logger = logging.getLogger(__name__)
24
 
@@ -54,59 +61,138 @@ def is_admin(profile: gr.OAuthProfile | None) -> bool:
54
  return profile.username in admin_usernames()
55
 
56
 
57
- def promote_row(submission_id: str, method: str) -> None:
58
- """Move a row into the validated tier with the given evidence type.
 
 
 
 
 
 
 
 
 
 
59
 
60
- Sets ``validation_status`` to ``"validated"`` and
61
- ``validation_method`` to *method*. Idempotent: re-promoting an
62
- already-validated row to the same method lands the same values.
 
 
63
 
64
  Raises:
65
- ValueError: *method* is not one of :data:`VALID_METHODS`.
66
- LookupError: no row in ``results.jsonl`` carries *submission_id*.
 
 
67
  """
68
  if method not in VALID_METHODS:
69
  raise ValueError(
70
  f"Unknown validation_method {method!r}; expected one of "
71
  f"{', '.join(VALID_METHODS)}."
72
  )
 
73
 
74
  def mutate(rows: list[dict[str, Any]]) -> None:
 
75
  for row in rows:
76
- if row.get("submission_id") == submission_id:
77
  row["validation_status"] = "validated"
78
  row["validation_method"] = method
79
- return
80
- raise LookupError(
81
- f"No row with submission_id={submission_id!r} in results.jsonl."
82
- )
83
 
84
  _hub_rmw_results(
85
  mutate,
86
- commit_message=f"promote {submission_id} to validated ({method})",
87
  )
88
 
89
 
90
- def demote_row(submission_id: str) -> None:
91
- """Return a row to the unvalidated tier, clearing ``validation_method``.
92
 
93
- Sets ``validation_status`` to ``"unvalidated"`` and nulls
94
- ``validation_method``. Idempotent on an already-unvalidated row.
95
 
96
  Raises:
97
- LookupError: no row in ``results.jsonl`` carries *submission_id*.
 
98
  """
 
 
99
  def mutate(rows: list[dict[str, Any]]) -> None:
 
100
  for row in rows:
101
- if row.get("submission_id") == submission_id:
102
  row["validation_status"] = "unvalidated"
103
  row["validation_method"] = None
104
- return
105
- raise LookupError(
106
- f"No row with submission_id={submission_id!r} in results.jsonl."
107
- )
108
 
109
  _hub_rmw_results(
110
  mutate,
111
- commit_message=f"demote {submission_id} to unvalidated",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  import logging
16
  import os
17
+ from typing import Any, Iterable
18
 
19
  import gradio as gr
20
+ from huggingface_hub.errors import EntryNotFoundError
21
 
22
+ from submit import (
23
+ HF_SUBMISSIONS_REPO,
24
+ REPORTS_DIR,
25
+ SUBMISSIONS_DIR,
26
+ _HF_API,
27
+ _hub_rmw_results,
28
+ )
29
 
30
  logger = logging.getLogger(__name__)
31
 
 
61
  return profile.username in admin_usernames()
62
 
63
 
64
+ def _clean_id_set(submission_ids: Iterable[str]) -> set[str]:
65
+ """Normalise an id iterable to a non-empty set, else raise.
66
+
67
+ Guards every bulk helper: a no-op call (nothing selected) is a
68
+ caller error, surfaced as ``ValueError`` rather than a silent
69
+ empty write.
70
+ """
71
+ ids = {str(s) for s in submission_ids if s}
72
+ if not ids:
73
+ raise ValueError("No submissions selected.")
74
+ return ids
75
+
76
 
77
+ def promote_rows(submission_ids: Iterable[str], method: str) -> None:
78
+ """Move every listed row into the validated tier with *method*.
79
+
80
+ One ``results.jsonl`` write for the whole batch. Idempotent on rows
81
+ already validated (their method is set to *method*).
82
 
83
  Raises:
84
+ ValueError: *method* is unknown, or no ids were given.
85
+ LookupError: one or more ids are absent from ``results.jsonl``
86
+ (no partial write happens; the helper raises inside the
87
+ read-modify-write before the upload).
88
  """
89
  if method not in VALID_METHODS:
90
  raise ValueError(
91
  f"Unknown validation_method {method!r}; expected one of "
92
  f"{', '.join(VALID_METHODS)}."
93
  )
94
+ ids = _clean_id_set(submission_ids)
95
 
96
  def mutate(rows: list[dict[str, Any]]) -> None:
97
+ seen = set()
98
  for row in rows:
99
+ if row.get("submission_id") in ids:
100
  row["validation_status"] = "validated"
101
  row["validation_method"] = method
102
+ seen.add(row["submission_id"])
103
+ _raise_for_missing(ids, seen)
 
 
104
 
105
  _hub_rmw_results(
106
  mutate,
107
+ commit_message=f"promote {len(ids)} row(s) to validated ({method})",
108
  )
109
 
110
 
111
+ def demote_rows(submission_ids: Iterable[str]) -> None:
112
+ """Return every listed row to the unvalidated tier, clearing method.
113
 
114
+ One ``results.jsonl`` write for the whole batch. Idempotent on rows
115
+ already unvalidated.
116
 
117
  Raises:
118
+ ValueError: no ids were given.
119
+ LookupError: one or more ids are absent from ``results.jsonl``.
120
  """
121
+ ids = _clean_id_set(submission_ids)
122
+
123
  def mutate(rows: list[dict[str, Any]]) -> None:
124
+ seen = set()
125
  for row in rows:
126
+ if row.get("submission_id") in ids:
127
  row["validation_status"] = "unvalidated"
128
  row["validation_method"] = None
129
+ seen.add(row["submission_id"])
130
+ _raise_for_missing(ids, seen)
 
 
131
 
132
  _hub_rmw_results(
133
  mutate,
134
+ commit_message=f"demote {len(ids)} row(s) to unvalidated",
135
+ )
136
+
137
+
138
+ def delete_rows(submission_ids: Iterable[str]) -> None:
139
+ """Permanently delete every listed submission: artifacts then row.
140
+
141
+ Irreversible. For each id, best-effort deletes the companion blobs
142
+ (``submissions/<id>.zip``, ``reports/<id>.{html,json}``) and then
143
+ drops the row from ``results.jsonl`` in a single write. A blob that
144
+ does not exist is skipped (a failed / pending row may never have
145
+ had a report). Missing ``results.jsonl`` rows are tolerated too, so
146
+ a re-run after a partial failure still converges.
147
+
148
+ Raises:
149
+ ValueError: no ids were given.
150
+ """
151
+ ids = _clean_id_set(submission_ids)
152
+
153
+ for sid in sorted(ids):
154
+ for path in (
155
+ f"{SUBMISSIONS_DIR}/{sid}.zip",
156
+ f"{REPORTS_DIR}/{sid}.html",
157
+ f"{REPORTS_DIR}/{sid}.json",
158
+ ):
159
+ try:
160
+ _HF_API.delete_file(
161
+ path_in_repo=path,
162
+ repo_id=HF_SUBMISSIONS_REPO,
163
+ repo_type="dataset",
164
+ commit_message=f"delete artifact {path}",
165
+ )
166
+ except EntryNotFoundError:
167
+ pass
168
+ except Exception as e: # noqa: BLE001 - keep deleting the rest
169
+ logger.warning(
170
+ "Failed to delete artifact %s (%s: %s)",
171
+ path, type(e).__name__, e,
172
+ )
173
+
174
+ def mutate(rows: list[dict[str, Any]]) -> None:
175
+ rows[:] = [r for r in rows if r.get("submission_id") not in ids]
176
+
177
+ _hub_rmw_results(
178
+ mutate, commit_message=f"delete {len(ids)} submission(s)",
179
  )
180
+
181
+
182
+ def _raise_for_missing(requested: set[str], seen: set[str]) -> None:
183
+ """Raise ``LookupError`` if any requested id was not found in the rows."""
184
+ missing = requested - seen
185
+ if missing:
186
+ raise LookupError(
187
+ f"submission_id(s) not in results.jsonl: {', '.join(sorted(missing))}."
188
+ )
189
+
190
+
191
+ def promote_row(submission_id: str, method: str) -> None:
192
+ """Single-row convenience wrapper over :func:`promote_rows`."""
193
+ promote_rows([submission_id], method)
194
+
195
+
196
+ def demote_row(submission_id: str) -> None:
197
+ """Single-row convenience wrapper over :func:`demote_rows`."""
198
+ demote_rows([submission_id])
leaderboard.py CHANGED
@@ -467,3 +467,64 @@ def build_combined_csv() -> str:
467
  path = out_dir / f"cadgenbench-leaderboard-{uuid.uuid4().hex[:8]}.csv"
468
  df.to_csv(path, index=False)
469
  return str(path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
467
  path = out_dir / f"cadgenbench-leaderboard-{uuid.uuid4().hex[:8]}.csv"
468
  df.to_csv(path, index=False)
469
  return str(path)
470
+
471
+
472
+ # Admin-tab table. A single flat view of every row (both tiers) with a
473
+ # leading editable ``select`` checkbox column; the rest is read-only
474
+ # context the maintainer scans before acting. Raw values (not the
475
+ # display-formatted leaderboard strings) so the maintainer sees exactly
476
+ # what is on the row. `submission_id` rides last as the action key.
477
+ ADMIN_SELECT_COL = "select"
478
+ ADMIN_COLUMNS = [
479
+ ADMIN_SELECT_COL,
480
+ "validation_status",
481
+ "validation_method",
482
+ "submission_name",
483
+ "submitter_name",
484
+ "submitted_at",
485
+ "status",
486
+ "aggregate_score",
487
+ "submission_id",
488
+ ]
489
+
490
+
491
+ def load_admin_table() -> pd.DataFrame:
492
+ """Build the Admin tab's editable table: one row per submission.
493
+
494
+ Both tiers in a single frame, validated first then by score, with a
495
+ fresh (all-unchecked) ``select`` column the maintainer ticks to
496
+ choose action targets. Legacy rows get the same ``status`` /
497
+ ``validation_status`` defaults the leaderboard reader applies, so
498
+ pre-schema-bump rows still show up and are actionable.
499
+ """
500
+ rows = _load_rows_from_hub()
501
+ if rows is None:
502
+ logger.info("Admin table build falling back to local results.jsonl")
503
+ rows = _load_rows_from_local()
504
+ rows = rows or []
505
+ for row in rows:
506
+ if row.get("status") is None:
507
+ row["status"] = "completed"
508
+ if row.get("validation_status") is None:
509
+ row["validation_status"] = "unvalidated"
510
+ if not rows:
511
+ return pd.DataFrame(columns=ADMIN_COLUMNS)
512
+ df = pd.DataFrame(rows)
513
+ for c in ADMIN_COLUMNS:
514
+ if c not in df.columns:
515
+ df[c] = None
516
+ if "submitted_at" in df.columns:
517
+ df["submitted_at"] = df["submitted_at"].apply(_fmt_timestamp)
518
+ df = (
519
+ df[ADMIN_COLUMNS]
520
+ .sort_values(
521
+ ["validation_status", "aggregate_score"],
522
+ ascending=[False, False],
523
+ na_position="last",
524
+ )
525
+ .reset_index(drop=True)
526
+ )
527
+ # Set after projection so the column is a clean all-False boolean
528
+ # regardless of what (if anything) a stray source key held.
529
+ df[ADMIN_SELECT_COL] = False
530
+ return df
tests/test_admin.py CHANGED
@@ -55,7 +55,11 @@ def hub(monkeypatch):
55
  re-parsed back into dicts. ``state["uploads"]`` counts the writes
56
  so a test can assert how many commits a call produced.
57
  """
58
- state: dict = {"rows": [dict(r) for r in SEED_ROWS], "uploads": 0}
 
 
 
 
59
 
60
  def fake_download() -> str:
61
  return _jsonl(state["rows"])
@@ -71,8 +75,12 @@ def hub(monkeypatch):
71
  ]
72
  state["uploads"] += 1
73
 
 
 
 
74
  monkeypatch.setattr(submit, "_download_results_jsonl", fake_download)
75
  monkeypatch.setattr(submit._HF_API, "upload_file", fake_upload)
 
76
  return state
77
 
78
 
@@ -118,3 +126,58 @@ def test_promote_idempotent(hub):
118
  # Second identical promotion produces an identical row.
119
  admin.promote_row("beta", "code")
120
  assert _row(hub["rows"], "beta") == once
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  re-parsed back into dicts. ``state["uploads"]`` counts the writes
56
  so a test can assert how many commits a call produced.
57
  """
58
+ state: dict = {
59
+ "rows": [dict(r) for r in SEED_ROWS],
60
+ "uploads": 0,
61
+ "deleted_paths": [],
62
+ }
63
 
64
  def fake_download() -> str:
65
  return _jsonl(state["rows"])
 
75
  ]
76
  state["uploads"] += 1
77
 
78
+ def fake_delete_file(*, path_in_repo, **kwargs) -> None:
79
+ state["deleted_paths"].append(path_in_repo)
80
+
81
  monkeypatch.setattr(submit, "_download_results_jsonl", fake_download)
82
  monkeypatch.setattr(submit._HF_API, "upload_file", fake_upload)
83
+ monkeypatch.setattr(submit._HF_API, "delete_file", fake_delete_file)
84
  return state
85
 
86
 
 
126
  # Second identical promotion produces an identical row.
127
  admin.promote_row("beta", "code")
128
  assert _row(hub["rows"], "beta") == once
129
+
130
+
131
+ def test_promote_rows_bulk(hub):
132
+ """A bulk promote flips every listed row in one write."""
133
+ admin.promote_rows(["alpha", "beta"], "traces")
134
+ for sid in ("alpha", "beta"):
135
+ row = _row(hub["rows"], sid)
136
+ assert row["validation_status"] == "validated"
137
+ assert row["validation_method"] == "traces"
138
+ assert hub["uploads"] == 1
139
+
140
+
141
+ def test_demote_rows_bulk(hub):
142
+ """A bulk demote clears method on every listed row in one write."""
143
+ admin.demote_rows(["alpha", "beta"])
144
+ for sid in ("alpha", "beta"):
145
+ row = _row(hub["rows"], sid)
146
+ assert row["validation_status"] == "unvalidated"
147
+ assert row["validation_method"] is None
148
+ assert hub["uploads"] == 1
149
+
150
+
151
+ def test_promote_rows_missing_id_raises_without_write(hub):
152
+ """An unknown id aborts the whole batch before any upload."""
153
+ with pytest.raises(LookupError):
154
+ admin.promote_rows(["alpha", "ghost"], "code")
155
+ assert hub["uploads"] == 0
156
+ # alpha is untouched since the write never happened.
157
+ assert _row(hub["rows"], "alpha")["validation_status"] == "unvalidated"
158
+
159
+
160
+ def test_empty_selection_raises(hub):
161
+ """Bulk helpers reject an empty / all-falsy selection."""
162
+ for call in (
163
+ lambda: admin.promote_rows([], "code"),
164
+ lambda: admin.demote_rows([None, ""]),
165
+ lambda: admin.delete_rows([]),
166
+ ):
167
+ with pytest.raises(ValueError):
168
+ call()
169
+ assert hub["uploads"] == 0
170
+
171
+
172
+ def test_delete_rows_removes_rows_and_artifacts(hub):
173
+ """Delete drops the rows and best-effort removes their artifacts."""
174
+ admin.delete_rows(["alpha"])
175
+ remaining = {r["submission_id"] for r in hub["rows"]}
176
+ assert remaining == {"beta"}
177
+ # All three companion blobs were targeted for deletion.
178
+ assert hub["deleted_paths"] == [
179
+ "submissions/alpha.zip",
180
+ "reports/alpha.html",
181
+ "reports/alpha.json",
182
+ ]
183
+ assert hub["uploads"] == 1