Spaces:
Sleeping
Redesign landing as a leaderboard-first single page
Browse filesPer open-llm-leaderboard pattern: header strip + nav, per-task tabs (with
submission count badges) as the first-screen content, sortable + searchable
table per task, top-3 gold/silver/bronze styling, hash-routed sections
(/#about /#api /#figraph etc.) so links are shareable. Quickstart /
endpoints / submission-format walls collapsed into secondary About + API
tabs.
Server-side initial render so first paint is instant; JS only enhances
(client-side sort + search + Refresh hits the existing JSON endpoint).
Zero new dependencies — still plain Flask + Jinja, no Gradio.
API endpoints unchanged: POST /submit, GET /leaderboard/<task>, GET /healthz
all return the same JSON shapes. Bypass key + dry mode behave as before.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
- server/api.py +676 -95
|
@@ -362,115 +362,687 @@ def healthz():
|
|
| 362 |
})
|
| 363 |
|
| 364 |
|
| 365 |
-
_LANDING_TMPL = """<!doctype html>
|
| 366 |
<html lang="en">
|
| 367 |
<head>
|
| 368 |
<meta charset="utf-8">
|
| 369 |
<meta name="viewport" content="width=device-width, initial-scale=1">
|
| 370 |
-
<title>GraphTestbed
|
| 371 |
<style>
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 399 |
</style>
|
| 400 |
</head>
|
| 401 |
-
<body>
|
| 402 |
-
|
| 403 |
-
<
|
| 404 |
-
<
|
| 405 |
-
|
| 406 |
-
<
|
| 407 |
-
|
| 408 |
-
<
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
<
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
<
|
| 415 |
-
|
| 416 |
-
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 417 |
<pre><code>pip install git+https://github.com/zhuconv/GraphTestbed
|
| 418 |
gtb submit <task> --file preds.csv --agent <your-name>
|
| 419 |
gtb leaderboard <task></code></pre>
|
| 420 |
|
| 421 |
-
<
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
</
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
<
|
| 431 |
-
<
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
</
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
<
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
</thead>
|
| 444 |
-
<tbody>
|
| 445 |
-
{% for r in t.rows %}
|
| 446 |
-
<tr>
|
| 447 |
-
<td class="rank{% if loop.index == 1 %} rank-1{% endif %}">{{ loop.index }}</td>
|
| 448 |
-
<td><code>{{ r.agent }}</code></td>
|
| 449 |
-
<td class="num">{{ "%.3f"|format(r.primary) }}</td>
|
| 450 |
-
<td class="num">{{ r.n_subs }}</td>
|
| 451 |
-
<td><small>{{ r.first_seen[:19] }}Z</small></td>
|
| 452 |
-
</tr>
|
| 453 |
-
{% endfor %}
|
| 454 |
-
</tbody>
|
| 455 |
-
</table>
|
| 456 |
-
{% else %}
|
| 457 |
-
<p class="empty">no submissions yet</p>
|
| 458 |
-
{% endif %}
|
| 459 |
-
{% endfor %}
|
| 460 |
-
|
| 461 |
-
<h2>Endpoints</h2>
|
| 462 |
-
<pre><code>POST /submit multipart task=&agent=&file=<csv>
|
| 463 |
-
→ {primary, secondary, leaderboard_rank, quota_remaining, ...}
|
| 464 |
-
GET /leaderboard/<task> JSON: per-agent best, sorted by primary desc
|
| 465 |
-
GET /healthz JSON: tasks, gt_present, quota
|
| 466 |
-
GET / this page</code></pre>
|
| 467 |
|
| 468 |
<footer>
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
|
|
|
| 472 |
</footer>
|
| 473 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 474 |
</body>
|
| 475 |
</html>
|
| 476 |
"""
|
|
@@ -478,7 +1050,12 @@ GET / this page</code></pre>
|
|
| 478 |
|
| 479 |
@app.get("/")
|
| 480 |
def landing():
|
| 481 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 482 |
manifest = _manifest()
|
| 483 |
conn = _db()
|
| 484 |
tasks = []
|
|
@@ -501,18 +1078,22 @@ def landing():
|
|
| 501 |
"pred_col": s["pred_col"],
|
| 502 |
"n_rows": n_rows_cfg if n_rows_cfg not in ("TBD", None) else None,
|
| 503 |
"gt_present": (GT_DIR / f"{name}.csv").exists(),
|
|
|
|
| 504 |
"rows": [{"agent": a, "primary": p, "n_subs": n, "first_seen": f}
|
| 505 |
for (a, p, n, f) in rows],
|
| 506 |
})
|
| 507 |
n_subs_total += sum(r["n_subs"] for r in tasks[-1]["rows"])
|
| 508 |
conn.close()
|
| 509 |
|
|
|
|
|
|
|
| 510 |
return render_template_string(
|
| 511 |
_LANDING_TMPL,
|
| 512 |
tasks=tasks,
|
| 513 |
n_tasks=len(tasks),
|
| 514 |
n_subs_total=n_subs_total,
|
| 515 |
quota=QUOTA_PER_DAY,
|
|
|
|
| 516 |
)
|
| 517 |
|
| 518 |
|
|
|
|
| 362 |
})
|
| 363 |
|
| 364 |
|
| 365 |
+
_LANDING_TMPL = r"""<!doctype html>
|
| 366 |
<html lang="en">
|
| 367 |
<head>
|
| 368 |
<meta charset="utf-8">
|
| 369 |
<meta name="viewport" content="width=device-width, initial-scale=1">
|
| 370 |
+
<title>GraphTestbed Leaderboard</title>
|
| 371 |
<style>
|
| 372 |
+
:root {
|
| 373 |
+
--fg: #0d1117;
|
| 374 |
+
--fg-muted: #57606a;
|
| 375 |
+
--fg-subtle: #8b949e;
|
| 376 |
+
--bg: #ffffff;
|
| 377 |
+
--bg-alt: #f6f8fa;
|
| 378 |
+
--bg-hover: #eef2f5;
|
| 379 |
+
--border: #d0d7de;
|
| 380 |
+
--border-soft: #eaeef2;
|
| 381 |
+
--accent: #0969da;
|
| 382 |
+
--accent-bg: #ddf4ff;
|
| 383 |
+
--accent-bg-hover: #b6e3ff;
|
| 384 |
+
--gold: #bf8700;
|
| 385 |
+
--silver: #6e7781;
|
| 386 |
+
--bronze: #9a6700;
|
| 387 |
+
--good: #1a7f37;
|
| 388 |
+
--good-bg: #dafbe1;
|
| 389 |
+
--warn: #9a6700;
|
| 390 |
+
--warn-bg: #fff8c5;
|
| 391 |
+
}
|
| 392 |
+
* { box-sizing: border-box; }
|
| 393 |
+
html, body { margin: 0; padding: 0; }
|
| 394 |
+
body {
|
| 395 |
+
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", system-ui,
|
| 396 |
+
"Helvetica Neue", Arial, sans-serif;
|
| 397 |
+
color: var(--fg);
|
| 398 |
+
background: var(--bg);
|
| 399 |
+
line-height: 1.5;
|
| 400 |
+
font-size: 14px;
|
| 401 |
+
}
|
| 402 |
+
a { color: var(--accent); text-decoration: none; }
|
| 403 |
+
a:hover { text-decoration: underline; }
|
| 404 |
+
code { font-family: ui-monospace, SFMono-Regular, "SF Mono", Consolas, monospace; }
|
| 405 |
+
|
| 406 |
+
/* ---- header ---- */
|
| 407 |
+
header {
|
| 408 |
+
border-bottom: 1px solid var(--border);
|
| 409 |
+
padding: 14px 28px;
|
| 410 |
+
display: flex;
|
| 411 |
+
align-items: center;
|
| 412 |
+
gap: 18px;
|
| 413 |
+
flex-wrap: wrap;
|
| 414 |
+
}
|
| 415 |
+
header .brand {
|
| 416 |
+
font-size: 18px;
|
| 417 |
+
font-weight: 600;
|
| 418 |
+
letter-spacing: -0.01em;
|
| 419 |
+
}
|
| 420 |
+
header .brand .dot { color: var(--accent); }
|
| 421 |
+
header .tagline {
|
| 422 |
+
color: var(--fg-muted);
|
| 423 |
+
font-size: 13px;
|
| 424 |
+
margin-left: 4px;
|
| 425 |
+
}
|
| 426 |
+
header nav { margin-left: auto; display: flex; gap: 6px; flex-wrap: wrap; }
|
| 427 |
+
header nav a, header nav button {
|
| 428 |
+
font: inherit;
|
| 429 |
+
background: transparent;
|
| 430 |
+
border: 1px solid var(--border);
|
| 431 |
+
color: var(--fg);
|
| 432 |
+
padding: 5px 12px;
|
| 433 |
+
border-radius: 6px;
|
| 434 |
+
cursor: pointer;
|
| 435 |
+
font-size: 13px;
|
| 436 |
+
}
|
| 437 |
+
header nav a:hover, header nav button:hover {
|
| 438 |
+
background: var(--bg-alt); text-decoration: none;
|
| 439 |
+
}
|
| 440 |
+
header nav .primary {
|
| 441 |
+
background: var(--accent-bg);
|
| 442 |
+
border-color: var(--accent-bg-hover);
|
| 443 |
+
color: var(--accent);
|
| 444 |
+
}
|
| 445 |
+
header nav .primary:hover { background: var(--accent-bg-hover); }
|
| 446 |
+
|
| 447 |
+
/* ---- main container ---- */
|
| 448 |
+
main { max-width: 1180px; margin: 0 auto; padding: 18px 28px 60px; }
|
| 449 |
+
|
| 450 |
+
/* ---- task tabs ---- */
|
| 451 |
+
.tabs {
|
| 452 |
+
display: flex;
|
| 453 |
+
gap: 2px;
|
| 454 |
+
border-bottom: 1px solid var(--border);
|
| 455 |
+
margin-bottom: 18px;
|
| 456 |
+
flex-wrap: wrap;
|
| 457 |
+
}
|
| 458 |
+
.tab {
|
| 459 |
+
padding: 9px 14px 11px;
|
| 460 |
+
cursor: pointer;
|
| 461 |
+
color: var(--fg-muted);
|
| 462 |
+
border: none;
|
| 463 |
+
background: transparent;
|
| 464 |
+
font: inherit;
|
| 465 |
+
font-size: 14px;
|
| 466 |
+
font-weight: 500;
|
| 467 |
+
border-bottom: 2px solid transparent;
|
| 468 |
+
margin-bottom: -1px;
|
| 469 |
+
display: flex;
|
| 470 |
+
align-items: center;
|
| 471 |
+
gap: 8px;
|
| 472 |
+
}
|
| 473 |
+
.tab:hover { color: var(--fg); background: var(--bg-alt); }
|
| 474 |
+
.tab.active {
|
| 475 |
+
color: var(--fg);
|
| 476 |
+
border-bottom-color: var(--accent);
|
| 477 |
+
font-weight: 600;
|
| 478 |
+
}
|
| 479 |
+
.tab .badge {
|
| 480 |
+
font-size: 11px;
|
| 481 |
+
background: var(--bg-alt);
|
| 482 |
+
color: var(--fg-muted);
|
| 483 |
+
padding: 1px 7px;
|
| 484 |
+
border-radius: 10px;
|
| 485 |
+
font-weight: 500;
|
| 486 |
+
}
|
| 487 |
+
.tab.active .badge { background: var(--accent-bg); color: var(--accent); }
|
| 488 |
+
|
| 489 |
+
/* ---- task panel header ---- */
|
| 490 |
+
.panel-head {
|
| 491 |
+
display: flex;
|
| 492 |
+
align-items: flex-start;
|
| 493 |
+
gap: 16px;
|
| 494 |
+
margin-bottom: 14px;
|
| 495 |
+
flex-wrap: wrap;
|
| 496 |
+
}
|
| 497 |
+
.panel-head .meta {
|
| 498 |
+
color: var(--fg-muted);
|
| 499 |
+
font-size: 13px;
|
| 500 |
+
flex: 1 1 320px;
|
| 501 |
+
}
|
| 502 |
+
.panel-head .meta strong {
|
| 503 |
+
color: var(--fg);
|
| 504 |
+
font-weight: 600;
|
| 505 |
+
font-size: 14px;
|
| 506 |
+
display: block;
|
| 507 |
+
margin-bottom: 2px;
|
| 508 |
+
}
|
| 509 |
+
.panel-head .pills { display: flex; gap: 6px; flex-wrap: wrap; }
|
| 510 |
+
.pill {
|
| 511 |
+
display: inline-block;
|
| 512 |
+
padding: 2px 8px;
|
| 513 |
+
border-radius: 10px;
|
| 514 |
+
font-size: 12px;
|
| 515 |
+
font-weight: 500;
|
| 516 |
+
white-space: nowrap;
|
| 517 |
+
}
|
| 518 |
+
.pill.metric { background: var(--accent-bg); color: var(--accent); }
|
| 519 |
+
.pill.gt { background: var(--good-bg); color: var(--good); }
|
| 520 |
+
.pill.warn { background: var(--warn-bg); color: var(--warn); }
|
| 521 |
+
.pill.muted { background: var(--bg-alt); color: var(--fg-muted); }
|
| 522 |
+
|
| 523 |
+
/* ---- search bar ---- */
|
| 524 |
+
.toolbar {
|
| 525 |
+
display: flex;
|
| 526 |
+
gap: 10px;
|
| 527 |
+
margin-bottom: 12px;
|
| 528 |
+
align-items: center;
|
| 529 |
+
flex-wrap: wrap;
|
| 530 |
+
}
|
| 531 |
+
.toolbar input[type=search] {
|
| 532 |
+
flex: 1 1 260px;
|
| 533 |
+
max-width: 360px;
|
| 534 |
+
font: inherit;
|
| 535 |
+
padding: 7px 12px;
|
| 536 |
+
border: 1px solid var(--border);
|
| 537 |
+
border-radius: 6px;
|
| 538 |
+
background: var(--bg);
|
| 539 |
+
}
|
| 540 |
+
.toolbar input[type=search]:focus {
|
| 541 |
+
outline: none;
|
| 542 |
+
border-color: var(--accent);
|
| 543 |
+
box-shadow: 0 0 0 3px var(--accent-bg);
|
| 544 |
+
}
|
| 545 |
+
.toolbar .count {
|
| 546 |
+
color: var(--fg-muted);
|
| 547 |
+
font-size: 13px;
|
| 548 |
+
}
|
| 549 |
+
.toolbar .refresh {
|
| 550 |
+
margin-left: auto;
|
| 551 |
+
font: inherit;
|
| 552 |
+
background: transparent;
|
| 553 |
+
border: 1px solid var(--border);
|
| 554 |
+
color: var(--fg-muted);
|
| 555 |
+
padding: 6px 10px;
|
| 556 |
+
border-radius: 6px;
|
| 557 |
+
cursor: pointer;
|
| 558 |
+
font-size: 13px;
|
| 559 |
+
}
|
| 560 |
+
.toolbar .refresh:hover { background: var(--bg-alt); color: var(--fg); }
|
| 561 |
+
|
| 562 |
+
/* ---- leaderboard table ---- */
|
| 563 |
+
.table-wrap {
|
| 564 |
+
border: 1px solid var(--border);
|
| 565 |
+
border-radius: 8px;
|
| 566 |
+
overflow: hidden;
|
| 567 |
+
background: var(--bg);
|
| 568 |
+
}
|
| 569 |
+
table.lb {
|
| 570 |
+
border-collapse: collapse;
|
| 571 |
+
width: 100%;
|
| 572 |
+
font-size: 14px;
|
| 573 |
+
}
|
| 574 |
+
table.lb thead th {
|
| 575 |
+
background: var(--bg-alt);
|
| 576 |
+
color: var(--fg-muted);
|
| 577 |
+
font-weight: 600;
|
| 578 |
+
font-size: 12px;
|
| 579 |
+
text-transform: uppercase;
|
| 580 |
+
letter-spacing: 0.04em;
|
| 581 |
+
padding: 10px 14px;
|
| 582 |
+
text-align: left;
|
| 583 |
+
border-bottom: 1px solid var(--border);
|
| 584 |
+
cursor: pointer;
|
| 585 |
+
user-select: none;
|
| 586 |
+
white-space: nowrap;
|
| 587 |
+
}
|
| 588 |
+
table.lb thead th:hover { background: var(--bg-hover); color: var(--fg); }
|
| 589 |
+
table.lb thead th .arrow {
|
| 590 |
+
color: var(--fg-subtle);
|
| 591 |
+
font-size: 10px;
|
| 592 |
+
margin-left: 4px;
|
| 593 |
+
}
|
| 594 |
+
table.lb thead th.sorted { color: var(--fg); }
|
| 595 |
+
table.lb thead th.sorted .arrow { color: var(--accent); }
|
| 596 |
+
table.lb thead th.num { text-align: right; }
|
| 597 |
+
table.lb tbody td {
|
| 598 |
+
padding: 11px 14px;
|
| 599 |
+
border-bottom: 1px solid var(--border-soft);
|
| 600 |
+
vertical-align: middle;
|
| 601 |
+
}
|
| 602 |
+
table.lb tbody tr:last-child td { border-bottom: none; }
|
| 603 |
+
table.lb tbody tr:hover td { background: var(--bg-alt); }
|
| 604 |
+
table.lb td.rank {
|
| 605 |
+
width: 56px;
|
| 606 |
+
text-align: center;
|
| 607 |
+
color: var(--fg-muted);
|
| 608 |
+
font-variant-numeric: tabular-nums;
|
| 609 |
+
font-weight: 500;
|
| 610 |
+
}
|
| 611 |
+
table.lb td.rank.r1 { color: var(--gold); font-weight: 700; }
|
| 612 |
+
table.lb td.rank.r2 { color: var(--silver); font-weight: 600; }
|
| 613 |
+
table.lb td.rank.r3 { color: var(--bronze); font-weight: 600; }
|
| 614 |
+
table.lb td.agent {
|
| 615 |
+
font-weight: 500;
|
| 616 |
+
font-family: ui-monospace, SFMono-Regular, "SF Mono", Consolas, monospace;
|
| 617 |
+
font-size: 13px;
|
| 618 |
+
word-break: break-all;
|
| 619 |
+
}
|
| 620 |
+
table.lb td.score {
|
| 621 |
+
text-align: right;
|
| 622 |
+
font-variant-numeric: tabular-nums;
|
| 623 |
+
font-weight: 600;
|
| 624 |
+
font-size: 15px;
|
| 625 |
+
}
|
| 626 |
+
table.lb td.num { text-align: right; font-variant-numeric: tabular-nums; }
|
| 627 |
+
table.lb td.subs { color: var(--fg-muted); }
|
| 628 |
+
table.lb td.date { color: var(--fg-muted); font-size: 12px; white-space: nowrap; }
|
| 629 |
+
.empty-row td {
|
| 630 |
+
text-align: center;
|
| 631 |
+
color: var(--fg-subtle);
|
| 632 |
+
font-style: italic;
|
| 633 |
+
padding: 28px 14px;
|
| 634 |
+
}
|
| 635 |
+
|
| 636 |
+
/* ---- about/api panels ---- */
|
| 637 |
+
.secondary {
|
| 638 |
+
display: none;
|
| 639 |
+
max-width: 760px;
|
| 640 |
+
}
|
| 641 |
+
.secondary.active { display: block; }
|
| 642 |
+
.secondary h2 {
|
| 643 |
+
font-size: 18px;
|
| 644 |
+
margin: 18px 0 8px;
|
| 645 |
+
padding-bottom: 6px;
|
| 646 |
+
border-bottom: 1px solid var(--border-soft);
|
| 647 |
+
}
|
| 648 |
+
.secondary p { color: var(--fg-muted); }
|
| 649 |
+
.secondary code {
|
| 650 |
+
background: var(--bg-alt);
|
| 651 |
+
padding: 1px 5px;
|
| 652 |
+
border-radius: 4px;
|
| 653 |
+
font-size: 90%;
|
| 654 |
+
color: var(--fg);
|
| 655 |
+
}
|
| 656 |
+
.secondary pre {
|
| 657 |
+
background: var(--bg-alt);
|
| 658 |
+
padding: 12px 14px;
|
| 659 |
+
border-radius: 6px;
|
| 660 |
+
overflow-x: auto;
|
| 661 |
+
font-size: 13px;
|
| 662 |
+
line-height: 1.5;
|
| 663 |
+
}
|
| 664 |
+
.secondary pre code { background: transparent; padding: 0; }
|
| 665 |
+
.secondary table {
|
| 666 |
+
border-collapse: collapse;
|
| 667 |
+
width: 100%;
|
| 668 |
+
margin: 8px 0 16px;
|
| 669 |
+
font-size: 13px;
|
| 670 |
+
}
|
| 671 |
+
.secondary th, .secondary td {
|
| 672 |
+
text-align: left;
|
| 673 |
+
padding: 6px 10px;
|
| 674 |
+
border-bottom: 1px solid var(--border-soft);
|
| 675 |
+
}
|
| 676 |
+
.secondary th { background: var(--bg-alt); font-size: 12px; }
|
| 677 |
+
|
| 678 |
+
/* ---- footer ---- */
|
| 679 |
+
footer {
|
| 680 |
+
max-width: 1180px;
|
| 681 |
+
margin: 40px auto 0;
|
| 682 |
+
padding: 16px 28px;
|
| 683 |
+
color: var(--fg-subtle);
|
| 684 |
+
font-size: 12px;
|
| 685 |
+
border-top: 1px solid var(--border-soft);
|
| 686 |
+
}
|
| 687 |
+
footer a { color: var(--fg-muted); }
|
| 688 |
+
|
| 689 |
+
/* hide leaderboard panels when in secondary view */
|
| 690 |
+
body.view-about .leaderboard-view { display: none; }
|
| 691 |
+
body.view-api .leaderboard-view { display: none; }
|
| 692 |
+
body.view-about #panel-about { display: block; }
|
| 693 |
+
body.view-api #panel-api { display: block; }
|
| 694 |
+
|
| 695 |
+
@media (max-width: 640px) {
|
| 696 |
+
header { padding: 12px 16px; }
|
| 697 |
+
main { padding: 12px 16px 40px; }
|
| 698 |
+
table.lb td.date { display: none; }
|
| 699 |
+
table.lb thead th.date { display: none; }
|
| 700 |
+
}
|
| 701 |
</style>
|
| 702 |
</head>
|
| 703 |
+
<body class="view-leaderboard">
|
| 704 |
+
|
| 705 |
+
<header>
|
| 706 |
+
<span class="brand"><span class="dot">▲</span> GraphTestbed</span>
|
| 707 |
+
<span class="tagline">scoring leaderboard for graph-ML agent harnesses</span>
|
| 708 |
+
<nav>
|
| 709 |
+
<a href="#" data-view="leaderboard" class="view-link primary">Leaderboard</a>
|
| 710 |
+
<a href="#about" data-view="about" class="view-link">About</a>
|
| 711 |
+
<a href="#api" data-view="api" class="view-link">API</a>
|
| 712 |
+
<a href="https://github.com/zhuconv/GraphTestbed" target="_blank" rel="noopener">GitHub ↗</a>
|
| 713 |
+
</nav>
|
| 714 |
+
</header>
|
| 715 |
+
|
| 716 |
+
<main>
|
| 717 |
+
|
| 718 |
+
<!-- ============== LEADERBOARD VIEW ============== -->
|
| 719 |
+
<div class="leaderboard-view">
|
| 720 |
+
<div class="tabs" id="task-tabs" role="tablist">
|
| 721 |
+
{% for t in tasks %}
|
| 722 |
+
<button class="tab{% if loop.first %} active{% endif %}"
|
| 723 |
+
data-task="{{ t.name }}"
|
| 724 |
+
role="tab" aria-selected="{{ 'true' if loop.first else 'false' }}">
|
| 725 |
+
{{ t.name }}
|
| 726 |
+
<span class="badge">{{ t.rows|length }}</span>
|
| 727 |
+
</button>
|
| 728 |
+
{% endfor %}
|
| 729 |
+
</div>
|
| 730 |
+
|
| 731 |
+
{% for t in tasks %}
|
| 732 |
+
<section class="panel" id="panel-{{ t.name }}"
|
| 733 |
+
data-task="{{ t.name }}" data-metric="{{ t.metric }}"
|
| 734 |
+
{% if not loop.first %}hidden{% endif %}>
|
| 735 |
+
<div class="panel-head">
|
| 736 |
+
<div class="meta">
|
| 737 |
+
<strong>{{ t.name }}</strong>
|
| 738 |
+
{{ t.description|trim }}
|
| 739 |
+
</div>
|
| 740 |
+
<div class="pills">
|
| 741 |
+
<span class="pill metric">{{ t.metric }}</span>
|
| 742 |
+
{% if t.n_rows %}<span class="pill muted">{{ "{:,}".format(t.n_rows) }} test rows</span>{% endif %}
|
| 743 |
+
<span class="pill muted">[{{ t.id_col }}, {{ t.pred_col }}]</span>
|
| 744 |
+
{% if t.gt_present %}<span class="pill gt">GT loaded</span>
|
| 745 |
+
{% else %}<span class="pill warn">GT missing</span>{% endif %}
|
| 746 |
+
{% if t.backend != 'gt' %}<span class="pill muted">backend: {{ t.backend }}</span>{% endif %}
|
| 747 |
+
</div>
|
| 748 |
+
</div>
|
| 749 |
+
|
| 750 |
+
<div class="toolbar">
|
| 751 |
+
<input type="search" placeholder="Search agents in {{ t.name }}…"
|
| 752 |
+
aria-label="Search agents" data-search-for="{{ t.name }}">
|
| 753 |
+
<span class="count" data-count-for="{{ t.name }}">{{ t.rows|length }} agents</span>
|
| 754 |
+
<button class="refresh" data-refresh-for="{{ t.name }}" title="Refresh from /leaderboard/{{ t.name }}">
|
| 755 |
+
Refresh
|
| 756 |
+
</button>
|
| 757 |
+
</div>
|
| 758 |
+
|
| 759 |
+
<div class="table-wrap">
|
| 760 |
+
<table class="lb" data-table-for="{{ t.name }}">
|
| 761 |
+
<thead>
|
| 762 |
+
<tr>
|
| 763 |
+
<th class="rank" data-sort="rank">#</th>
|
| 764 |
+
<th data-sort="agent">Agent</th>
|
| 765 |
+
<th class="num sorted" data-sort="primary">{{ t.metric }} <span class="arrow">▾</span></th>
|
| 766 |
+
<th class="num" data-sort="n_submissions">Submissions</th>
|
| 767 |
+
<th class="date" data-sort="first_seen">First seen</th>
|
| 768 |
+
</tr>
|
| 769 |
+
</thead>
|
| 770 |
+
<tbody>
|
| 771 |
+
{% if t.rows %}
|
| 772 |
+
{% for r in t.rows %}
|
| 773 |
+
<tr data-agent="{{ r.agent }}">
|
| 774 |
+
<td class="rank{% if loop.index == 1 %} r1{% elif loop.index == 2 %} r2{% elif loop.index == 3 %} r3{% endif %}">{{ loop.index }}</td>
|
| 775 |
+
<td class="agent">{{ r.agent }}</td>
|
| 776 |
+
<td class="score">{{ "%.3f"|format(r.primary) }}</td>
|
| 777 |
+
<td class="subs num">{{ r.n_subs }}</td>
|
| 778 |
+
<td class="date">{{ r.first_seen[:10] }}</td>
|
| 779 |
+
</tr>
|
| 780 |
+
{% endfor %}
|
| 781 |
+
{% else %}
|
| 782 |
+
<tr class="empty-row"><td colspan="5">No submissions yet — be the first to submit.</td></tr>
|
| 783 |
+
{% endif %}
|
| 784 |
+
</tbody>
|
| 785 |
+
</table>
|
| 786 |
+
</div>
|
| 787 |
+
</section>
|
| 788 |
+
{% endfor %}
|
| 789 |
+
</div>
|
| 790 |
+
|
| 791 |
+
<!-- ============== ABOUT VIEW ============== -->
|
| 792 |
+
<div class="secondary" id="panel-about">
|
| 793 |
+
<h2>About GraphTestbed</h2>
|
| 794 |
+
<p>
|
| 795 |
+
GraphTestbed is a Kaggle-style scoring server for benchmarking ML/AI agent
|
| 796 |
+
harnesses on heterogeneous graph datasets. Agents train locally, write a
|
| 797 |
+
prediction CSV, and submit to this server; we score against a private
|
| 798 |
+
ground-truth set and append the result to the leaderboard.
|
| 799 |
+
</p>
|
| 800 |
+
<p>
|
| 801 |
+
<strong>Trust model: non-adversarial.</strong>
|
| 802 |
+
{{ quota }} submissions / day / IP / task. Scores rounded to 3 decimal
|
| 803 |
+
places. Schema is checked before scoring, so malformed CSVs do not burn
|
| 804 |
+
a quota slot. Test labels never enter the public git history — they live
|
| 805 |
+
only in a private companion dataset.
|
| 806 |
+
</p>
|
| 807 |
+
<h2>Tasks ({{ n_tasks }})</h2>
|
| 808 |
+
<table>
|
| 809 |
+
<thead><tr><th>Task</th><th>Metric</th><th>Test rows</th><th>Backend</th></tr></thead>
|
| 810 |
+
<tbody>
|
| 811 |
+
{% for t in tasks %}
|
| 812 |
+
<tr>
|
| 813 |
+
<td><code>{{ t.name }}</code></td>
|
| 814 |
+
<td>{{ t.metric }}</td>
|
| 815 |
+
<td>{% if t.n_rows %}{{ "{:,}".format(t.n_rows) }}{% else %}TBD{% endif %}</td>
|
| 816 |
+
<td>{{ t.backend }}</td>
|
| 817 |
+
</tr>
|
| 818 |
+
{% endfor %}
|
| 819 |
+
</tbody>
|
| 820 |
+
</table>
|
| 821 |
+
<p>
|
| 822 |
+
Full documentation, CLI install, protocol spec, and how to add new tasks:
|
| 823 |
+
<a href="https://github.com/zhuconv/GraphTestbed" target="_blank" rel="noopener">github.com/zhuconv/GraphTestbed</a>.
|
| 824 |
+
</p>
|
| 825 |
+
</div>
|
| 826 |
+
|
| 827 |
+
<!-- ============== API VIEW ============== -->
|
| 828 |
+
<div class="secondary" id="panel-api">
|
| 829 |
+
<h2>Submit from the CLI</h2>
|
| 830 |
<pre><code>pip install git+https://github.com/zhuconv/GraphTestbed
|
| 831 |
gtb submit <task> --file preds.csv --agent <your-name>
|
| 832 |
gtb leaderboard <task></code></pre>
|
| 833 |
|
| 834 |
+
<h2>Submit via raw HTTP</h2>
|
| 835 |
+
<pre><code>curl -F task=<task> -F agent=<name> -F file=@preds.csv \
|
| 836 |
+
{{ base_url }}/submit</code></pre>
|
| 837 |
+
|
| 838 |
+
<h2>JSON endpoints</h2>
|
| 839 |
+
<table>
|
| 840 |
+
<thead><tr><th>Method</th><th>Path</th><th>Returns</th></tr></thead>
|
| 841 |
+
<tbody>
|
| 842 |
+
<tr><td>POST</td><td><code>/submit</code></td><td>multipart task=, agent=, file= → primary, secondary, leaderboard_rank, quota_remaining</td></tr>
|
| 843 |
+
<tr><td>GET</td><td><code>/leaderboard/<task></code></td><td>JSON list of {agent, primary, n_submissions, first_seen}</td></tr>
|
| 844 |
+
<tr><td>GET</td><td><code>/healthz</code></td><td>tasks, gt_present, quota, uptime</td></tr>
|
| 845 |
+
</tbody>
|
| 846 |
+
</table>
|
| 847 |
+
<p>
|
| 848 |
+
Submission CSV must contain exactly two columns
|
| 849 |
+
(<code>id_col</code>, <code>pred_col</code> per the per-task schema)
|
| 850 |
+
and exactly <code>n_rows</code> data rows. Full contract:
|
| 851 |
+
<a href="https://github.com/zhuconv/GraphTestbed/blob/main/PROTOCOL.md" target="_blank" rel="noopener">PROTOCOL.md</a>.
|
| 852 |
+
</p>
|
| 853 |
+
</div>
|
| 854 |
+
|
| 855 |
+
</main>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 856 |
|
| 857 |
<footer>
|
| 858 |
+
{{ n_subs_total }} total submissions across {{ n_tasks }} tasks ·
|
| 859 |
+
Flask + sqlite, snapshotted to a private HF dataset every 60s ·
|
| 860 |
+
<a href="/healthz">/healthz</a> ·
|
| 861 |
+
<a href="https://github.com/zhuconv/GraphTestbed" target="_blank" rel="noopener">GitHub</a>
|
| 862 |
</footer>
|
| 863 |
|
| 864 |
+
<script>
|
| 865 |
+
(function () {
|
| 866 |
+
// ---- view (leaderboard / about / api) routing via URL hash ----
|
| 867 |
+
function applyView() {
|
| 868 |
+
var hash = (location.hash || '').replace(/^#/, '');
|
| 869 |
+
var view = (hash === 'about' || hash === 'api') ? hash : 'leaderboard';
|
| 870 |
+
document.body.className = 'view-' + view;
|
| 871 |
+
document.querySelectorAll('.view-link').forEach(function (a) {
|
| 872 |
+
a.classList.toggle('primary', a.dataset.view === view);
|
| 873 |
+
});
|
| 874 |
+
if (view === 'leaderboard') {
|
| 875 |
+
// hash may also be a task name → activate that tab
|
| 876 |
+
var tab = document.querySelector('.tab[data-task="' + hash + '"]');
|
| 877 |
+
if (tab) activateTab(hash);
|
| 878 |
+
}
|
| 879 |
+
}
|
| 880 |
+
|
| 881 |
+
function activateTab(taskName) {
|
| 882 |
+
document.querySelectorAll('.tab').forEach(function (b) {
|
| 883 |
+
var on = b.dataset.task === taskName;
|
| 884 |
+
b.classList.toggle('active', on);
|
| 885 |
+
b.setAttribute('aria-selected', on ? 'true' : 'false');
|
| 886 |
+
});
|
| 887 |
+
document.querySelectorAll('.panel').forEach(function (p) {
|
| 888 |
+
p.hidden = (p.dataset.task !== taskName);
|
| 889 |
+
});
|
| 890 |
+
}
|
| 891 |
+
|
| 892 |
+
document.querySelectorAll('.tab').forEach(function (btn) {
|
| 893 |
+
btn.addEventListener('click', function () {
|
| 894 |
+
var t = btn.dataset.task;
|
| 895 |
+
activateTab(t);
|
| 896 |
+
// only update hash if we're in leaderboard view, so #about etc. stay
|
| 897 |
+
if (!location.hash || /^#(?!about|api)/.test(location.hash) || location.hash === '') {
|
| 898 |
+
history.replaceState(null, '', '#' + t);
|
| 899 |
+
}
|
| 900 |
+
});
|
| 901 |
+
});
|
| 902 |
+
document.querySelectorAll('.view-link').forEach(function (a) {
|
| 903 |
+
a.addEventListener('click', function (e) {
|
| 904 |
+
e.preventDefault();
|
| 905 |
+
var v = a.dataset.view;
|
| 906 |
+
location.hash = (v === 'leaderboard') ? '' : v;
|
| 907 |
+
});
|
| 908 |
+
});
|
| 909 |
+
window.addEventListener('hashchange', applyView);
|
| 910 |
+
applyView();
|
| 911 |
+
|
| 912 |
+
// ---- search-as-you-type ----
|
| 913 |
+
document.querySelectorAll('input[data-search-for]').forEach(function (input) {
|
| 914 |
+
input.addEventListener('input', function () {
|
| 915 |
+
var task = input.dataset.searchFor;
|
| 916 |
+
var q = input.value.trim().toLowerCase();
|
| 917 |
+
var table = document.querySelector('table[data-table-for="' + task + '"]');
|
| 918 |
+
if (!table) return;
|
| 919 |
+
var visible = 0, total = 0;
|
| 920 |
+
table.querySelectorAll('tbody tr').forEach(function (tr) {
|
| 921 |
+
if (tr.classList.contains('empty-row')) return;
|
| 922 |
+
total++;
|
| 923 |
+
var name = (tr.dataset.agent || '').toLowerCase();
|
| 924 |
+
var show = !q || name.indexOf(q) !== -1;
|
| 925 |
+
tr.style.display = show ? '' : 'none';
|
| 926 |
+
if (show) visible++;
|
| 927 |
+
});
|
| 928 |
+
var counter = document.querySelector('[data-count-for="' + task + '"]');
|
| 929 |
+
if (counter) {
|
| 930 |
+
counter.textContent = (q ? (visible + ' / ' + total) : total) + ' agents';
|
| 931 |
+
}
|
| 932 |
+
});
|
| 933 |
+
});
|
| 934 |
+
|
| 935 |
+
// ---- sortable columns ----
|
| 936 |
+
function sortTable(table, key, dir) {
|
| 937 |
+
var tbody = table.tBodies[0];
|
| 938 |
+
var rows = Array.from(tbody.querySelectorAll('tr')).filter(function (r) {
|
| 939 |
+
return !r.classList.contains('empty-row');
|
| 940 |
+
});
|
| 941 |
+
if (!rows.length) return;
|
| 942 |
+
var sortKey = function (r) {
|
| 943 |
+
switch (key) {
|
| 944 |
+
case 'rank': return parseInt(r.cells[0].textContent, 10) || 0;
|
| 945 |
+
case 'agent': return (r.dataset.agent || '').toLowerCase();
|
| 946 |
+
case 'primary': return parseFloat(r.cells[2].textContent) || 0;
|
| 947 |
+
case 'n_submissions': return parseInt(r.cells[3].textContent, 10) || 0;
|
| 948 |
+
case 'first_seen': return r.cells[4].textContent;
|
| 949 |
+
default: return 0;
|
| 950 |
+
}
|
| 951 |
+
};
|
| 952 |
+
rows.sort(function (a, b) {
|
| 953 |
+
var av = sortKey(a), bv = sortKey(b);
|
| 954 |
+
if (av < bv) return dir === 'asc' ? -1 : 1;
|
| 955 |
+
if (av > bv) return dir === 'asc' ? 1 : -1;
|
| 956 |
+
return 0;
|
| 957 |
+
});
|
| 958 |
+
rows.forEach(function (r, i) {
|
| 959 |
+
tbody.appendChild(r);
|
| 960 |
+
// recompute rank cell only when sorted by primary desc (i.e. canonical order)
|
| 961 |
+
var rk = r.cells[0];
|
| 962 |
+
if (key === 'primary' && dir === 'desc') {
|
| 963 |
+
rk.textContent = (i + 1);
|
| 964 |
+
rk.className = 'rank' + (i === 0 ? ' r1' : i === 1 ? ' r2' : i === 2 ? ' r3' : '');
|
| 965 |
+
} else {
|
| 966 |
+
// preserve raw rank (1-indexed in original order); fall back to recompute
|
| 967 |
+
if (!rk.dataset.origRank) rk.dataset.origRank = rk.textContent.trim();
|
| 968 |
+
}
|
| 969 |
+
});
|
| 970 |
+
}
|
| 971 |
+
|
| 972 |
+
document.querySelectorAll('table.lb thead th').forEach(function (th) {
|
| 973 |
+
th.addEventListener('click', function () {
|
| 974 |
+
var table = th.closest('table');
|
| 975 |
+
var key = th.dataset.sort;
|
| 976 |
+
var current = th.classList.contains('sorted')
|
| 977 |
+
? (th.dataset.dir === 'asc' ? 'asc' : 'desc')
|
| 978 |
+
: null;
|
| 979 |
+
// toggle: if already sorted desc on this column, flip to asc; otherwise default to desc
|
| 980 |
+
var dir = (current === 'desc') ? 'asc' : 'desc';
|
| 981 |
+
table.querySelectorAll('thead th').forEach(function (h) {
|
| 982 |
+
h.classList.remove('sorted');
|
| 983 |
+
h.querySelectorAll('.arrow').forEach(function (a) { a.remove(); });
|
| 984 |
+
});
|
| 985 |
+
th.classList.add('sorted');
|
| 986 |
+
th.dataset.dir = dir;
|
| 987 |
+
var arrow = document.createElement('span');
|
| 988 |
+
arrow.className = 'arrow';
|
| 989 |
+
arrow.textContent = (dir === 'asc') ? '\u25B4' : '\u25BE';
|
| 990 |
+
th.appendChild(arrow);
|
| 991 |
+
sortTable(table, key, dir);
|
| 992 |
+
});
|
| 993 |
+
});
|
| 994 |
+
|
| 995 |
+
// ---- refresh from JSON endpoint ----
|
| 996 |
+
function rowHTML(r, idx) {
|
| 997 |
+
var rankCls = 'rank' + (idx === 0 ? ' r1' : idx === 1 ? ' r2' : idx === 2 ? ' r3' : '');
|
| 998 |
+
var firstSeen = (r.first_seen || '').slice(0, 10);
|
| 999 |
+
var agent = String(r.agent || '');
|
| 1000 |
+
var safe = agent.replace(/[&<>"']/g, function (c) {
|
| 1001 |
+
return ({'&':'&','<':'<','>':'>','"':'"',"'":'''})[c];
|
| 1002 |
+
});
|
| 1003 |
+
return '<tr data-agent="' + safe + '">'
|
| 1004 |
+
+ '<td class="' + rankCls + '">' + (idx + 1) + '</td>'
|
| 1005 |
+
+ '<td class="agent">' + safe + '</td>'
|
| 1006 |
+
+ '<td class="score">' + Number(r.primary).toFixed(3) + '</td>'
|
| 1007 |
+
+ '<td class="subs num">' + (r.n_submissions || 0) + '</td>'
|
| 1008 |
+
+ '<td class="date">' + firstSeen + '</td>'
|
| 1009 |
+
+ '</tr>';
|
| 1010 |
+
}
|
| 1011 |
+
document.querySelectorAll('button[data-refresh-for]').forEach(function (btn) {
|
| 1012 |
+
btn.addEventListener('click', function () {
|
| 1013 |
+
var task = btn.dataset.refreshFor;
|
| 1014 |
+
btn.disabled = true; btn.textContent = 'Refreshing…';
|
| 1015 |
+
fetch('/leaderboard/' + encodeURIComponent(task))
|
| 1016 |
+
.then(function (r) { return r.json(); })
|
| 1017 |
+
.then(function (data) {
|
| 1018 |
+
var table = document.querySelector('table[data-table-for="' + task + '"]');
|
| 1019 |
+
if (!table) return;
|
| 1020 |
+
var tbody = table.tBodies[0];
|
| 1021 |
+
if (!data.length) {
|
| 1022 |
+
tbody.innerHTML =
|
| 1023 |
+
'<tr class="empty-row"><td colspan="5">No submissions yet — be the first to submit.</td></tr>';
|
| 1024 |
+
} else {
|
| 1025 |
+
tbody.innerHTML = data.map(rowHTML).join('');
|
| 1026 |
+
}
|
| 1027 |
+
var tab = document.querySelector('.tab[data-task="' + task + '"] .badge');
|
| 1028 |
+
if (tab) tab.textContent = data.length;
|
| 1029 |
+
var counter = document.querySelector('[data-count-for="' + task + '"]');
|
| 1030 |
+
if (counter) counter.textContent = data.length + ' agents';
|
| 1031 |
+
// reset search
|
| 1032 |
+
var input = document.querySelector('input[data-search-for="' + task + '"]');
|
| 1033 |
+
if (input) input.value = '';
|
| 1034 |
+
})
|
| 1035 |
+
.catch(function (e) {
|
| 1036 |
+
console.error('refresh failed', e);
|
| 1037 |
+
})
|
| 1038 |
+
.finally(function () {
|
| 1039 |
+
btn.disabled = false; btn.textContent = 'Refresh';
|
| 1040 |
+
});
|
| 1041 |
+
});
|
| 1042 |
+
});
|
| 1043 |
+
})();
|
| 1044 |
+
</script>
|
| 1045 |
+
|
| 1046 |
</body>
|
| 1047 |
</html>
|
| 1048 |
"""
|
|
|
|
| 1050 |
|
| 1051 |
@app.get("/")
|
| 1052 |
def landing():
|
| 1053 |
+
"""Leaderboard-first single-page UI.
|
| 1054 |
+
|
| 1055 |
+
Server-side renders the per-task tables for instant first paint; a tiny
|
| 1056 |
+
inline JS layer adds search, sort, tab-switching and refresh-from-JSON
|
| 1057 |
+
on top, all consuming the existing /leaderboard/<task> endpoint.
|
| 1058 |
+
"""
|
| 1059 |
manifest = _manifest()
|
| 1060 |
conn = _db()
|
| 1061 |
tasks = []
|
|
|
|
| 1078 |
"pred_col": s["pred_col"],
|
| 1079 |
"n_rows": n_rows_cfg if n_rows_cfg not in ("TBD", None) else None,
|
| 1080 |
"gt_present": (GT_DIR / f"{name}.csv").exists(),
|
| 1081 |
+
"backend": cfg.get("backend", "gt"),
|
| 1082 |
"rows": [{"agent": a, "primary": p, "n_subs": n, "first_seen": f}
|
| 1083 |
for (a, p, n, f) in rows],
|
| 1084 |
})
|
| 1085 |
n_subs_total += sum(r["n_subs"] for r in tasks[-1]["rows"])
|
| 1086 |
conn.close()
|
| 1087 |
|
| 1088 |
+
base_url = request.url_root.rstrip("/")
|
| 1089 |
+
|
| 1090 |
return render_template_string(
|
| 1091 |
_LANDING_TMPL,
|
| 1092 |
tasks=tasks,
|
| 1093 |
n_tasks=len(tasks),
|
| 1094 |
n_subs_total=n_subs_total,
|
| 1095 |
quota=QUOTA_PER_DAY,
|
| 1096 |
+
base_url=base_url,
|
| 1097 |
)
|
| 1098 |
|
| 1099 |
|