Spaces:

lanczos
/

graphtestbed

Running

Zhu Jiajun (jz28583) Claude Opus 4.7 (1M context) commited on Apr 20

Commit

464248e

1 Parent(s): 5ead61d

Add Flask + Jinja2 landing page at GET /

Single-page UI rendered server-side: title + status pills (task count,
submission count, quota, healthz/github links), three-line Quickstart,
then per-task sections with description, schema/metric metadata, GT-loaded
indicator, and a rank/agent/primary/subs/first-seen leaderboard table.
Empty state for tasks with no entries. Inline CSS, no external assets, no JS.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (1) hide show

server/api.py +155 -1

server/api.py CHANGED Viewed

@@ -36,7 +36,7 @@ from pathlib import Path
 import pandas as pd
 import yaml
-from flask import Flask, jsonify, request
 GT_DIR = Path(os.environ.get("GT_DIR", "/var/graphtestbed/gt"))
@@ -282,6 +282,160 @@ def healthz():
     })
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", "8080"))
     app.run(host="0.0.0.0", port=port)

 import pandas as pd
 import yaml
+from flask import Flask, jsonify, render_template_string, request
 GT_DIR = Path(os.environ.get("GT_DIR", "/var/graphtestbed/gt"))
     })
+_LANDING_TMPL = """<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+  <title>GraphTestbed Scoring</title>
+  <style>
+    body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", system-ui, sans-serif;
+           max-width: 920px; margin: 2em auto; padding: 0 1em; color: #1f2328; line-height: 1.55; }
+    h1 { margin: 0 0 .15em; }
+    h2 { margin-top: 1.8em; padding-bottom: .25em; border-bottom: 1px solid #d0d7de; }
+    .subtitle { color: #57606a; margin: 0 0 1em; }
+    pre { background: #f6f8fa; padding: 12px 14px; border-radius: 6px;
+          overflow-x: auto; font-size: 13px; line-height: 1.45; }
+    code { background: #eaeef2; padding: 1px 5px; border-radius: 4px; font-size: 90%; }
+    pre code { background: transparent; padding: 0; font-size: inherit; }
+    table { border-collapse: collapse; width: 100%; margin: .4em 0 1.4em; font-size: 14px; }
+    th, td { padding: .5em .75em; text-align: left; border-bottom: 1px solid #eaeef2; }
+    th { background: #f6f8fa; font-weight: 600; font-size: 13px; }
+    td.num { text-align: right; font-variant-numeric: tabular-nums; }
+    td.rank { text-align: right; color: #57606a; width: 3em; }
+    td.rank-1 { color: #bf8700; font-weight: 700; }
+    .empty { color: #8b949e; font-style: italic; padding: .4em 0; font-size: 14px; }
+    .meta { color: #57606a; font-size: 13px; margin: .25em 0 .8em; }
+    .meta code { font-size: 90%; }
+    .pills { display: flex; gap: .5em; flex-wrap: wrap; margin: .5em 0 1.5em; }
+    .pill { background: #ddf4ff; color: #0969da; padding: .2em .65em; border-radius: 12px;
+            font-size: 12px; font-weight: 500; text-decoration: none; }
+    .pill:hover { background: #b6e3ff; }
+    .pill.gt { background: #dafbe1; color: #1a7f37; }
+    .pill.warn { background: #fff8c5; color: #9a6700; }
+    a { color: #0969da; }
+    footer { margin-top: 3em; padding-top: 1em; border-top: 1px solid #d0d7de;
+             color: #8b949e; font-size: 13px; }
+  </style>
+</head>
+<body>
+<h1>📊 GraphTestbed Scoring</h1>
+<p class="subtitle">
+  Public leaderboard for benchmarking ML/AI agent harnesses on heterogeneous graph datasets.
+</p>
+<div class="pills">
+  <span class="pill">{{ n_tasks }} tasks</span>
+  <span class="pill">{{ n_subs_total }} submissions</span>
+  <span class="pill">quota: {{ quota }}/day/IP/task</span>
+  <a class="pill" href="/healthz">healthz</a>
+  <a class="pill" href="https://github.com/zhuconv/GraphTestbed">github</a>
+</div>
+<h2>Quickstart</h2>
+<pre><code>pip install git+https://github.com/zhuconv/GraphTestbed
+gtb submit &lt;task&gt; --file preds.csv --agent &lt;your-name&gt;
+gtb leaderboard &lt;task&gt;</code></pre>
+<p>
+  Submission CSV must have exactly two columns
+  (<code>id_col</code>, <code>pred_col</code> per the per-task schema below)
+  and exactly <code>n_rows</code> data rows. Schema is checked client-side
+  first, so a malformed file never burns a quota slot.
+  Full contract: <a href="https://github.com/zhuconv/GraphTestbed/blob/main/PROTOCOL.md">PROTOCOL.md</a>.
+</p>
+{% for t in tasks %}
+<h2 id="{{ t.name }}">{{ t.name }}</h2>
+<p class="meta">
+  metric: <code>{{ t.metric }}</code>
+  {%- if t.n_rows %} · <code>{{ t.n_rows }}</code> test rows{% endif %} ·
+  columns: <code>[{{ t.id_col }}, {{ t.pred_col }}]</code>
+  {% if t.gt_present %}<span class="pill gt">GT loaded</span>
+  {%- else %}<span class="pill warn">GT missing</span>{% endif %}
+</p>
+{% if t.description %}<p>{{ t.description|trim }}</p>{% endif %}
+{% if t.rows %}
+<table>
+  <thead>
+    <tr><th>#</th><th>agent</th><th class="num">{{ t.metric }}</th><th class="num">subs</th><th>first&nbsp;seen</th></tr>
+  </thead>
+  <tbody>
+    {% for r in t.rows %}
+    <tr>
+      <td class="rank{% if loop.index == 1 %} rank-1{% endif %}">{{ loop.index }}</td>
+      <td><code>{{ r.agent }}</code></td>
+      <td class="num">{{ "%.3f"|format(r.primary) }}</td>
+      <td class="num">{{ r.n_subs }}</td>
+      <td><small>{{ r.first_seen[:19] }}Z</small></td>
+    </tr>
+    {% endfor %}
+  </tbody>
+</table>
+{% else %}
+<p class="empty">no submissions yet</p>
+{% endif %}
+{% endfor %}
+<h2>Endpoints</h2>
+<pre><code>POST /submit                          multipart task=&amp;agent=&amp;file=&lt;csv&gt;
+                                      → {primary, secondary, leaderboard_rank, quota_remaining, ...}
+GET  /leaderboard/&lt;task&gt;              JSON: per-agent best, sorted by primary desc
+GET  /healthz                         JSON: tasks, gt_present, quota
+GET  /                                this page</code></pre>
+<footer>
+  Backed by Flask + sqlite at <code>/data/leaderboard.db</code>; periodic
+  snapshot to a private HF dataset for durability. Non-adversarial trust
+  model — see PROTOCOL.md.
+</footer>
+</body>
+</html>
+"""
+@app.get("/")
+def landing():
+    """Single-page UI: quickstart + per-task leaderboard tables."""
+    manifest = _manifest()
+    conn = _db()
+    tasks = []
+    n_subs_total = 0
+    for name in sorted(manifest):
+        cfg = manifest[name]
+        s = cfg["submission_schema"]
+        rows = conn.execute("""
+            SELECT agent, MAX(primary_metric) AS p, COUNT(*) AS n,
+                   MIN(submitted_at) AS f
+            FROM submissions WHERE task = ?
+            GROUP BY agent ORDER BY p DESC
+        """, (name,)).fetchall()
+        n_rows_cfg = s.get("n_rows")
+        tasks.append({
+            "name": name,
+            "description": str(cfg.get("description", "")),
+            "metric": cfg["metric"]["primary"],
+            "id_col": s["id_col"],
+            "pred_col": s["pred_col"],
+            "n_rows": n_rows_cfg if n_rows_cfg not in ("TBD", None) else None,
+            "gt_present": (GT_DIR / f"{name}.csv").exists(),
+            "rows": [{"agent": a, "primary": p, "n_subs": n, "first_seen": f}
+                     for (a, p, n, f) in rows],
+        })
+        n_subs_total += sum(r["n_subs"] for r in tasks[-1]["rows"])
+    conn.close()
+    return render_template_string(
+        _LANDING_TMPL,
+        tasks=tasks,
+        n_tasks=len(tasks),
+        n_subs_total=n_subs_total,
+        quota=QUOTA_PER_DAY,
+    )
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", "8080"))
     app.run(host="0.0.0.0", port=port)