Michael Rabinovich Cursor commited on
Commit
335cdfa
·
1 Parent(s): 2e44157

feat(admin): hide the entire admin panel from non-admins

Browse files

The Admin tab previously showed everyone the table + (disabled) controls.
Actions were already gated server-side (is_admin re-check in every handler),
but the UI and the read-only table were visible to all, which looked
unprofessional and streamed submission rows to non-admins.

Wrap all admin UI in a Column that stays hidden unless the logged-in user
is in CADGENBENCH_ADMINS; non-admins (and logged-out visitors) now see only
the login/logout button and a status line. The table starts empty and the
load/refresh/timer handlers return an empty frame for non-admins, so no
submission data is sent behind the hidden panel. Server-side gating on the
mutating handlers is unchanged (defense in depth).

Co-authored-by: Cursor <cursoragent@cursor.com>

Files changed (2) hide show
  1. app.py +158 -122
  2. tests/test_proxy.py +33 -1
app.py CHANGED
@@ -267,34 +267,41 @@ def _admin_selection_status(table_df: pd.DataFrame | None) -> str:
267
  def _gate_admin_controls(
268
  profile: gr.OAuthProfile | None,
269
  ) -> tuple[
270
- gr.Dataframe, gr.Radio, gr.Button, gr.Button, gr.Checkbox, gr.Button,
271
- gr.Button, gr.Checkbox, gr.Button, gr.Textbox, gr.Button, str,
272
  ]:
273
- """Enable the admin controls only for a logged-in user in the admin set.
274
-
275
- Runs on every page load and re-runs on LoginButton auth events, so
276
- the table value is also refreshed from the live Hub data instead of
277
- staying pinned to whatever rows existed when the Space process
278
- booted. Non-admins and logged-out visitors get the tab with the
279
- table read-only and every control disabled, mirroring the server-side
280
- re-check in each handler. The armed-by-confirmation buttons (delete,
281
- stop-and-delete, rescore-selected, rescore-all) always load disarmed:
282
- they only enable once their confirm box is ticked / phrase typed.
 
 
 
283
  """
284
- admin_df, error = _safe_load_admin()
285
- if error:
286
- gr.Warning(f"Admin table unavailable: {error}")
287
  admin = is_admin(profile)
 
 
 
 
 
 
288
  if profile is None:
289
- status = "Log in with an admin account to enable the controls below."
290
  elif admin:
291
  status = f"Signed in as `{profile.username}`. Admin controls enabled."
292
  else:
293
  status = (
294
  f"Signed in as `{profile.username}`, which is not in the admin "
295
- "set. Controls are disabled."
296
  )
297
  return (
 
298
  gr.Dataframe(value=admin_df, interactive=admin),
299
  gr.Radio(interactive=admin),
300
  gr.Button(interactive=admin),
@@ -322,13 +329,26 @@ def _arm_delete(
322
  return gr.Button(interactive=armed), gr.Button(interactive=armed)
323
 
324
 
325
- def _refresh_admin_table() -> pd.DataFrame:
 
 
 
 
 
 
 
 
 
 
326
  """Admin Refresh button handler: reload the admin table, toast on failure.
327
 
328
  Uses the no-crash :func:`_safe_load_admin` so a Hub read failure
329
  surfaces as a loud ``gr.Warning`` plus an empty table rather than an
330
- uncaught exception.
 
331
  """
 
 
332
  admin_df, error = _safe_load_admin()
333
  if error:
334
  gr.Warning(f"Admin table unavailable: {error}")
@@ -356,7 +376,10 @@ def _reapply_selection(
356
  return fresh
357
 
358
 
359
- def _auto_refresh_admin_table(current_df: pd.DataFrame | None) -> pd.DataFrame:
 
 
 
360
  """Timer-tick handler: reload the admin table, preserving ticked rows.
361
 
362
  The leaderboard tables auto-refresh every 10s but the admin table did
@@ -365,7 +388,10 @@ def _auto_refresh_admin_table(current_df: pd.DataFrame | None) -> pd.DataFrame:
365
  Unlike the leaderboard handler it stays silent (no per-tick toast)
366
  and, on a Hub read failure, returns the current frame unchanged so a
367
  transient blip never blanks the table or drops the user's selection.
 
368
  """
 
 
369
  admin_df, error = _safe_load_admin()
370
  if error:
371
  return current_df if current_df is not None else admin_df
@@ -1123,116 +1149,124 @@ to publish the resulting row on the public leaderboard.
1123
  gr.Markdown(ABOUT_MD)
1124
 
1125
  with gr.Tab("Admin"):
1126
- # Maintainer-only controls. The tab is visible to everyone (a
1127
- # hint the path exists); the table + buttons are gated to OAuth
1128
- # users in the CADGENBENCH_ADMINS set via the `blocks.load`
1129
- # handler below + a server-side re-check in every handler. See
1130
- # decisions/validation-policy.md.
1131
- gr.Markdown(
1132
- "## Admin\n"
1133
- "Tick rows in the **select** column, then promote them into the "
1134
- "**Validated** tier (recording an evidence type), demote them back "
1135
- "to **Unvalidated**, delete them, or rescore them against the "
1136
- "current ground truth. Actions apply to every ticked row at once. "
1137
- "Limited to maintainers in the admin set; everyone else sees the "
1138
- "tab with the controls disabled."
1139
- )
1140
  admin_login_btn = gr.LoginButton()
1141
  admin_status = gr.Markdown(
1142
- "Log in with an admin account to enable the controls below."
1143
- )
1144
- # Only the leading `select` column is editable; the rest is
1145
- # read-only context. Click-to-tick drives every action below.
1146
- # `_safe_load_admin` keeps a Hub read failure from crashing the
1147
- # Space at boot (the admin table loads at construction time).
1148
- initial_admin_table, _ = _safe_load_admin()
1149
- admin_table = gr.Dataframe(
1150
- value=initial_admin_table,
1151
- datatype=[
1152
- "bool", "str", "str", "str", "str", "str", "str", "number",
1153
- "str",
1154
- ],
1155
- static_columns=list(range(1, len(ADMIN_COLUMNS))),
1156
- interactive=False,
1157
- label="Submissions (tick select to choose rows)",
1158
- wrap=True,
1159
  )
1160
- admin_selection_md = gr.Markdown("_No rows selected._")
1161
- admin_method_radio = gr.Radio(
1162
- choices=list(VALID_METHODS),
1163
- value="manual",
1164
- label="validation_method (applied to all rows on promote)",
1165
- interactive=False,
1166
- )
1167
- with gr.Row():
1168
- promote_btn = gr.Button(
1169
- "Mark validated", variant="primary", interactive=False,
1170
- )
1171
- demote_btn = gr.Button("Mark unvalidated", interactive=False)
1172
- with gr.Accordion("Danger zone: delete", open=False):
1173
  gr.Markdown(
1174
- "Permanently deletes the ticked rows **and** their uploaded "
1175
- "zip + report files from the submissions dataset. This cannot "
1176
- "be undone (only a manual revert of the dataset commit).\n\n"
1177
- "**Stop & delete** additionally cancels any still-running "
1178
- "evaluation job(s) for the ticked rows before deleting use "
1179
- "it for pending submissions whose GPU eval is in flight."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1180
  )
1181
- delete_confirm = gr.Checkbox(
1182
- label=(
1183
- "I understand this permanently deletes the selected "
1184
- "submissions and their files."
1185
- ),
1186
- value=False,
1187
  interactive=False,
1188
  )
1189
  with gr.Row():
1190
- delete_btn = gr.Button(
1191
- "Delete selected", variant="stop", interactive=False,
1192
  )
1193
- stop_delete_btn = gr.Button(
1194
- "Stop & delete selected", variant="stop",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1195
  interactive=False,
1196
  )
1197
- with gr.Accordion("Danger zone: rescore", open=False):
1198
- gr.Markdown(
1199
- "Re-evaluates submissions against the **current** "
1200
- "ground truth + data: each row flips back to pending, the "
1201
- "gallery renders and the per-submission report HTML are "
1202
- "regenerated, and the score is recomputed. Use after a "
1203
- "ground-truth swap or a metric change that invalidates the "
1204
- "existing scores.\n\n"
1205
- "Rescoring is **re-runnable**: if a row's eval fails, mark it "
1206
- "and rescore again (or rescore all) — each run is "
1207
- "independent and converges.\n\n"
1208
- "- **Rescore selected** re-evaluates the ticked rows.\n"
1209
- f"- **Rescore all** re-evaluates every submission that has a "
1210
- f"stored zip and isn't already pending type "
1211
- f"`{RESCORE_ALL_PHRASE}` to arm it."
1212
- )
1213
- rescore_confirm = gr.Checkbox(
1214
- label=(
1215
- "I understand this flips the selected rows to pending and "
1216
- "recomputes their scores."
1217
- ),
1218
- value=False,
1219
- interactive=False,
1220
- )
1221
- rescore_selected_btn = gr.Button(
1222
- "Rescore selected", variant="stop", interactive=False,
1223
- )
1224
- rescore_all_phrase = gr.Textbox(
1225
- label=(
1226
- f"Type '{RESCORE_ALL_PHRASE}' to arm the board-wide "
1227
- f"rescore"
1228
- ),
1229
- placeholder=RESCORE_ALL_PHRASE,
1230
- interactive=False,
1231
- )
1232
- rescore_all_btn = gr.Button(
1233
- "Rescore ALL submissions", variant="stop", interactive=False,
1234
- )
1235
- admin_refresh_btn = gr.Button("Refresh", size="sm")
 
 
 
 
 
 
 
 
 
1236
 
1237
  admin_table.change(
1238
  fn=_admin_selection_status,
@@ -1331,12 +1365,14 @@ to publish the resulting row on the public leaderboard.
1331
  blocks.load(fn=_gallery_iframe_html, outputs=gallery_html)
1332
  blocks.load(fn=_tasks_iframe_html, outputs=tasks_html)
1333
 
1334
- # Same per-load OAuth read, gating the Admin tab's controls on
1335
- # membership in the CADGENBENCH_ADMINS set. Logged-out / non-admin
1336
- # visitors get the tab with everything disabled.
 
1337
  blocks.load(
1338
  fn=_gate_admin_controls,
1339
  outputs=[
 
1340
  admin_table,
1341
  admin_method_radio,
1342
  promote_btn,
 
267
  def _gate_admin_controls(
268
  profile: gr.OAuthProfile | None,
269
  ) -> tuple[
270
+ gr.Column, gr.Dataframe, gr.Radio, gr.Button, gr.Button, gr.Checkbox,
271
+ gr.Button, gr.Button, gr.Checkbox, gr.Button, gr.Textbox, gr.Button, str,
272
  ]:
273
+ """Reveal the admin panel only for a logged-in user in the admin set.
274
+
275
+ Runs on every page load and re-runs on LoginButton auth events. The
276
+ entire admin panel (table + every control) lives in a column that
277
+ stays hidden unless the visitor is logged in AND in the admin set, so
278
+ non-admins and logged-out visitors see only the login/logout button
279
+ and a status line -- no table, no buttons. For admins the panel is
280
+ shown, its controls enabled, and the table refreshed from live Hub
281
+ data. Data is only loaded into the table for admins, and a server-side
282
+ ``is_admin`` re-check still guards every handler. The armed-by-
283
+ confirmation buttons (delete, stop-and-delete, rescore-selected,
284
+ rescore-all) always load disarmed: they only enable once their confirm
285
+ box is ticked / phrase typed.
286
  """
 
 
 
287
  admin = is_admin(profile)
288
+ if admin:
289
+ admin_df, error = _safe_load_admin()
290
+ if error:
291
+ gr.Warning(f"Admin table unavailable: {error}")
292
+ else:
293
+ admin_df = _empty_admin_table()
294
  if profile is None:
295
+ status = "Log in with an admin account to access the controls."
296
  elif admin:
297
  status = f"Signed in as `{profile.username}`. Admin controls enabled."
298
  else:
299
  status = (
300
  f"Signed in as `{profile.username}`, which is not in the admin "
301
+ "set. You can log out with the button above."
302
  )
303
  return (
304
+ gr.Column(visible=admin),
305
  gr.Dataframe(value=admin_df, interactive=admin),
306
  gr.Radio(interactive=admin),
307
  gr.Button(interactive=admin),
 
329
  return gr.Button(interactive=armed), gr.Button(interactive=armed)
330
 
331
 
332
+ def _empty_admin_table() -> pd.DataFrame:
333
+ """An admin frame with headers but no rows -- what non-admins get.
334
+
335
+ The admin panel is hidden from non-admins, but the table refreshers
336
+ still run server-side; returning an empty frame ensures no submission
337
+ data is ever streamed into a non-admin's (hidden) table.
338
+ """
339
+ return pd.DataFrame(columns=list(ADMIN_COLUMNS))
340
+
341
+
342
+ def _refresh_admin_table(profile: gr.OAuthProfile | None) -> pd.DataFrame:
343
  """Admin Refresh button handler: reload the admin table, toast on failure.
344
 
345
  Uses the no-crash :func:`_safe_load_admin` so a Hub read failure
346
  surfaces as a loud ``gr.Warning`` plus an empty table rather than an
347
+ uncaught exception. Returns an empty frame to non-admins so a tampered
348
+ client can't pull the table out from behind the hidden panel.
349
  """
350
+ if not is_admin(profile):
351
+ return _empty_admin_table()
352
  admin_df, error = _safe_load_admin()
353
  if error:
354
  gr.Warning(f"Admin table unavailable: {error}")
 
376
  return fresh
377
 
378
 
379
+ def _auto_refresh_admin_table(
380
+ current_df: pd.DataFrame | None,
381
+ profile: gr.OAuthProfile | None,
382
+ ) -> pd.DataFrame:
383
  """Timer-tick handler: reload the admin table, preserving ticked rows.
384
 
385
  The leaderboard tables auto-refresh every 10s but the admin table did
 
388
  Unlike the leaderboard handler it stays silent (no per-tick toast)
389
  and, on a Hub read failure, returns the current frame unchanged so a
390
  transient blip never blanks the table or drops the user's selection.
391
+ Non-admins get an empty frame so the (hidden) table is never fed data.
392
  """
393
+ if not is_admin(profile):
394
+ return _empty_admin_table()
395
  admin_df, error = _safe_load_admin()
396
  if error:
397
  return current_df if current_df is not None else admin_df
 
1149
  gr.Markdown(ABOUT_MD)
1150
 
1151
  with gr.Tab("Admin"):
1152
+ # Maintainer-only controls. The Admin *tab* is visible to everyone
1153
+ # (a hint the path exists), but ALL admin UI -- the table, the
1154
+ # actions, the danger zones -- lives in `admin_panel`, a column that
1155
+ # stays hidden unless the logged-in user is in CADGENBENCH_ADMINS.
1156
+ # The `blocks.load` handler below flips that column's visibility and
1157
+ # only loads table data for admins; a server-side `is_admin` re-check
1158
+ # still guards every handler. Non-admins (and logged-out visitors)
1159
+ # see only the login/logout button + a status line, nothing else.
1160
+ # See decisions/validation-policy.md.
 
 
 
 
 
1161
  admin_login_btn = gr.LoginButton()
1162
  admin_status = gr.Markdown(
1163
+ "Log in with an admin account to access the controls."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1164
  )
1165
+ # Everything below is admin-only: hidden by default, revealed by
1166
+ # `_gate_admin_controls` only for a logged-in user in the admin set.
1167
+ with gr.Column(visible=False) as admin_panel:
 
 
 
 
 
 
 
 
 
 
1168
  gr.Markdown(
1169
+ "## Admin\n"
1170
+ "Tick rows in the **select** column, then promote them into "
1171
+ "the **Validated** tier (recording an evidence type), demote "
1172
+ "them back to **Unvalidated**, delete them, or rescore them "
1173
+ "against the current ground truth. Actions apply to every "
1174
+ "ticked row at once."
1175
+ )
1176
+ # Only the leading `select` column is editable; the rest is
1177
+ # read-only context. Click-to-tick drives every action below.
1178
+ # Starts empty; `_gate_admin_controls` loads rows on page load
1179
+ # for admins only, so non-admins never receive the data.
1180
+ admin_table = gr.Dataframe(
1181
+ value=_empty_admin_table(),
1182
+ datatype=[
1183
+ "bool", "str", "str", "str", "str", "str", "str",
1184
+ "number", "str",
1185
+ ],
1186
+ static_columns=list(range(1, len(ADMIN_COLUMNS))),
1187
+ interactive=False,
1188
+ label="Submissions (tick select to choose rows)",
1189
+ wrap=True,
1190
  )
1191
+ admin_selection_md = gr.Markdown("_No rows selected._")
1192
+ admin_method_radio = gr.Radio(
1193
+ choices=list(VALID_METHODS),
1194
+ value="manual",
1195
+ label="validation_method (applied to all rows on promote)",
 
1196
  interactive=False,
1197
  )
1198
  with gr.Row():
1199
+ promote_btn = gr.Button(
1200
+ "Mark validated", variant="primary", interactive=False,
1201
  )
1202
+ demote_btn = gr.Button("Mark unvalidated", interactive=False)
1203
+ with gr.Accordion("Danger zone: delete", open=False):
1204
+ gr.Markdown(
1205
+ "Permanently deletes the ticked rows **and** their "
1206
+ "uploaded zip + report files from the submissions "
1207
+ "dataset. This cannot be undone (only a manual revert of "
1208
+ "the dataset commit).\n\n"
1209
+ "**Stop & delete** additionally cancels any still-running "
1210
+ "evaluation job(s) for the ticked rows before deleting — "
1211
+ "use it for pending submissions whose GPU eval is in "
1212
+ "flight."
1213
+ )
1214
+ delete_confirm = gr.Checkbox(
1215
+ label=(
1216
+ "I understand this permanently deletes the selected "
1217
+ "submissions and their files."
1218
+ ),
1219
+ value=False,
1220
  interactive=False,
1221
  )
1222
+ with gr.Row():
1223
+ delete_btn = gr.Button(
1224
+ "Delete selected", variant="stop", interactive=False,
1225
+ )
1226
+ stop_delete_btn = gr.Button(
1227
+ "Stop & delete selected", variant="stop",
1228
+ interactive=False,
1229
+ )
1230
+ with gr.Accordion("Danger zone: rescore", open=False):
1231
+ gr.Markdown(
1232
+ "Re-evaluates submissions against the **current** "
1233
+ "ground truth + data: each row flips back to pending, the "
1234
+ "gallery renders and the per-submission report HTML are "
1235
+ "regenerated, and the score is recomputed. Use after a "
1236
+ "ground-truth swap or a metric change that invalidates "
1237
+ "the existing scores.\n\n"
1238
+ "Rescoring is **re-runnable**: if a row's eval fails, "
1239
+ "mark it and rescore again (or rescore all) — each run is "
1240
+ "independent and converges.\n\n"
1241
+ "- **Rescore selected** re-evaluates the ticked rows.\n"
1242
+ f"- **Rescore all** re-evaluates every submission that "
1243
+ f"has a stored zip and isn't already pending — type "
1244
+ f"`{RESCORE_ALL_PHRASE}` to arm it."
1245
+ )
1246
+ rescore_confirm = gr.Checkbox(
1247
+ label=(
1248
+ "I understand this flips the selected rows to pending "
1249
+ "and recomputes their scores."
1250
+ ),
1251
+ value=False,
1252
+ interactive=False,
1253
+ )
1254
+ rescore_selected_btn = gr.Button(
1255
+ "Rescore selected", variant="stop", interactive=False,
1256
+ )
1257
+ rescore_all_phrase = gr.Textbox(
1258
+ label=(
1259
+ f"Type '{RESCORE_ALL_PHRASE}' to arm the board-wide "
1260
+ f"rescore"
1261
+ ),
1262
+ placeholder=RESCORE_ALL_PHRASE,
1263
+ interactive=False,
1264
+ )
1265
+ rescore_all_btn = gr.Button(
1266
+ "Rescore ALL submissions", variant="stop",
1267
+ interactive=False,
1268
+ )
1269
+ admin_refresh_btn = gr.Button("Refresh", size="sm")
1270
 
1271
  admin_table.change(
1272
  fn=_admin_selection_status,
 
1365
  blocks.load(fn=_gallery_iframe_html, outputs=gallery_html)
1366
  blocks.load(fn=_tasks_iframe_html, outputs=tasks_html)
1367
 
1368
+ # Same per-load OAuth read, gating the Admin tab on membership in the
1369
+ # CADGENBENCH_ADMINS set. Logged-out / non-admin visitors get the
1370
+ # admin_panel hidden entirely (no table, no controls) -- just the
1371
+ # login/logout button and a status line.
1372
  blocks.load(
1373
  fn=_gate_admin_controls,
1374
  outputs=[
1375
+ admin_panel,
1376
  admin_table,
1377
  admin_method_radio,
1378
  promote_btn,
tests/test_proxy.py CHANGED
@@ -185,13 +185,45 @@ def test_gate_admin_controls_refreshes_live_table(monkeypatch):
185
  monkeypatch.setattr(app, "_safe_load_admin", lambda: (live_df, None))
186
  monkeypatch.setattr(app, "is_admin", lambda profile: True)
187
 
188
- table_update = app._gate_admin_controls(types.SimpleNamespace(username="michaelr27"))[0]
 
 
189
 
 
190
  assert table_update.value["headers"] == leaderboard.ADMIN_COLUMNS
191
  assert table_update.value["data"][0][3] == "UC3 e2e test 20260602-205316"
192
  assert table_update.interactive is True
193
 
194
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  def test_admin_delete_refreshes_gallery(monkeypatch):
196
  """Deleting rows also replaces the Gallery iframe srcdoc."""
197
  table_df = pd.DataFrame(
 
185
  monkeypatch.setattr(app, "_safe_load_admin", lambda: (live_df, None))
186
  monkeypatch.setattr(app, "is_admin", lambda profile: True)
187
 
188
+ updates = app._gate_admin_controls(types.SimpleNamespace(username="michaelr27"))
189
+ # [0] is now the admin_panel Column (visible for admins); [1] is the table.
190
+ panel_update, table_update = updates[0], updates[1]
191
 
192
+ assert panel_update.visible is True
193
  assert table_update.value["headers"] == leaderboard.ADMIN_COLUMNS
194
  assert table_update.value["data"][0][3] == "UC3 e2e test 20260602-205316"
195
  assert table_update.interactive is True
196
 
197
 
198
+ def test_gate_admin_controls_hides_panel_for_non_admin(monkeypatch):
199
+ """Non-admins get the panel hidden and no submission data in the table."""
200
+ live_df = pd.DataFrame(
201
+ [{"select": False, "submission_id": "secret-row"}],
202
+ columns=leaderboard.ADMIN_COLUMNS,
203
+ )
204
+
205
+ def _boom():
206
+ raise AssertionError("_safe_load_admin must not run for non-admins")
207
+
208
+ monkeypatch.setattr(app, "_safe_load_admin", _boom)
209
+ monkeypatch.setattr(app, "is_admin", lambda profile: False)
210
+
211
+ updates = app._gate_admin_controls(types.SimpleNamespace(username="someone"))
212
+ panel_update, table_update = updates[0], updates[1]
213
+
214
+ assert panel_update.visible is False
215
+ assert table_update.interactive is False
216
+ # No rows are streamed to a non-admin (empty frame, headers only).
217
+ assert table_update.value["data"] == []
218
+
219
+
220
+ def test_gate_admin_controls_logged_out_hides_panel(monkeypatch):
221
+ """A logged-out visitor (profile=None) also gets the panel hidden."""
222
+ monkeypatch.setattr(app, "is_admin", lambda profile: False)
223
+ updates = app._gate_admin_controls(None)
224
+ assert updates[0].visible is False
225
+
226
+
227
  def test_admin_delete_refreshes_gallery(monkeypatch):
228
  """Deleting rows also replaces the Gallery iframe srcdoc."""
229
  table_df = pd.DataFrame(