Spaces:
Running
Running
Commit ·
f0a2927
0
Parent(s):
Initialize SourceBench leaderboard Space
Browse files- README.md +21 -0
- index.html +1308 -0
- leaderboard_data.js +0 -0
- leaderboard_data.json +0 -0
- wuklab_logo.png +0 -0
README.md
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: SourceBench Leaderboard
|
| 3 |
+
emoji: 📚
|
| 4 |
+
colorFrom: amber
|
| 5 |
+
colorTo: orange
|
| 6 |
+
sdk: static
|
| 7 |
+
app_file: index.html
|
| 8 |
+
pinned: false
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
# SourceBench Leaderboard
|
| 12 |
+
|
| 13 |
+
This Hugging Face Space hosts the public-facing SourceBench leaderboard frontend.
|
| 14 |
+
|
| 15 |
+
Official leaderboard entries are validated and judged by the SourceBench team.
|
| 16 |
+
|
| 17 |
+
Resources:
|
| 18 |
+
|
| 19 |
+
- Benchmark repo: https://github.com/WukLab/SourceBench
|
| 20 |
+
- arXiv: https://arxiv.org/abs/2602.16942
|
| 21 |
+
- blog: https://mlsys.wuklab.io/posts/sourcebench/
|
index.html
ADDED
|
@@ -0,0 +1,1308 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>SourceBench Leaderboard</title>
|
| 7 |
+
<style>
|
| 8 |
+
:root {
|
| 9 |
+
--bg: #f6efe4;
|
| 10 |
+
--paper: rgba(255, 251, 244, 0.92);
|
| 11 |
+
--paper-solid: #fff8ef;
|
| 12 |
+
--ink: #172033;
|
| 13 |
+
--muted: #6d7382;
|
| 14 |
+
--line: rgba(23, 32, 51, 0.12);
|
| 15 |
+
--accent: #b24a1f;
|
| 16 |
+
--accent-deep: #7c2d12;
|
| 17 |
+
--gold: #e9a23b;
|
| 18 |
+
--green: #206a5d;
|
| 19 |
+
--blue: #255a8f;
|
| 20 |
+
--shadow: 0 18px 40px rgba(23, 32, 51, 0.09);
|
| 21 |
+
--radius: 22px;
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
* {
|
| 25 |
+
box-sizing: border-box;
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
html {
|
| 29 |
+
scroll-behavior: smooth;
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
body {
|
| 33 |
+
margin: 0;
|
| 34 |
+
color: var(--ink);
|
| 35 |
+
background:
|
| 36 |
+
radial-gradient(circle at 0% 0%, rgba(233, 162, 59, 0.18), transparent 28%),
|
| 37 |
+
radial-gradient(circle at 100% 10%, rgba(178, 74, 31, 0.14), transparent 22%),
|
| 38 |
+
radial-gradient(circle at 80% 100%, rgba(37, 90, 143, 0.09), transparent 22%),
|
| 39 |
+
linear-gradient(180deg, #f8f2e8 0%, #f4ebdf 100%);
|
| 40 |
+
font-family: "Avenir Next", "Segoe UI", "Helvetica Neue", sans-serif;
|
| 41 |
+
font-size: 17px;
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
.shell {
|
| 45 |
+
max-width: 1280px;
|
| 46 |
+
margin: 0 auto;
|
| 47 |
+
padding: 26px 32px 72px;
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
.topbar {
|
| 51 |
+
display: flex;
|
| 52 |
+
justify-content: space-between;
|
| 53 |
+
align-items: center;
|
| 54 |
+
gap: 16px;
|
| 55 |
+
margin-bottom: 18px;
|
| 56 |
+
flex-wrap: wrap;
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
.brand {
|
| 60 |
+
display: flex;
|
| 61 |
+
align-items: center;
|
| 62 |
+
gap: 12px;
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
.brand-mark {
|
| 66 |
+
width: 84px;
|
| 67 |
+
height: 66px;
|
| 68 |
+
border-radius: 0;
|
| 69 |
+
display: flex;
|
| 70 |
+
align-items: center;
|
| 71 |
+
justify-content: center;
|
| 72 |
+
background: transparent;
|
| 73 |
+
color: var(--ink);
|
| 74 |
+
font-weight: 700;
|
| 75 |
+
overflow: hidden;
|
| 76 |
+
border: none;
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
.brand-mark img {
|
| 80 |
+
width: 84px;
|
| 81 |
+
height: auto;
|
| 82 |
+
display: block;
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
.brand-copy strong {
|
| 86 |
+
display: block;
|
| 87 |
+
font-size: 1rem;
|
| 88 |
+
letter-spacing: 0.06em;
|
| 89 |
+
text-transform: uppercase;
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
.brand-copy span {
|
| 93 |
+
color: var(--muted);
|
| 94 |
+
font-size: 0.92rem;
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
.nav {
|
| 98 |
+
display: flex;
|
| 99 |
+
flex-wrap: wrap;
|
| 100 |
+
gap: 10px;
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
.nav a,
|
| 104 |
+
button {
|
| 105 |
+
appearance: none;
|
| 106 |
+
border: 1px solid rgba(23, 32, 51, 0.18);
|
| 107 |
+
background: rgba(255, 255, 255, 0.65);
|
| 108 |
+
color: var(--ink);
|
| 109 |
+
padding: 10px 14px;
|
| 110 |
+
border-radius: 999px;
|
| 111 |
+
text-decoration: none;
|
| 112 |
+
font: inherit;
|
| 113 |
+
cursor: pointer;
|
| 114 |
+
transition: transform 120ms ease, background 120ms ease, border-color 120ms ease;
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
.nav a:hover,
|
| 118 |
+
button:hover {
|
| 119 |
+
transform: translateY(-1px);
|
| 120 |
+
background: white;
|
| 121 |
+
border-color: rgba(23, 32, 51, 0.28);
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
.hero {
|
| 125 |
+
display: block;
|
| 126 |
+
margin-bottom: 18px;
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
.panel {
|
| 130 |
+
background: var(--paper);
|
| 131 |
+
border: 1px solid var(--line);
|
| 132 |
+
border-radius: var(--radius);
|
| 133 |
+
box-shadow: var(--shadow);
|
| 134 |
+
backdrop-filter: blur(8px);
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
.plain-section {
|
| 138 |
+
padding: 8px 8px 2px;
|
| 139 |
+
margin-bottom: 22px;
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
.hero-main {
|
| 143 |
+
padding: 34px 30px 30px;
|
| 144 |
+
min-height: 300px;
|
| 145 |
+
border: none;
|
| 146 |
+
border-radius: 0;
|
| 147 |
+
box-shadow: none;
|
| 148 |
+
backdrop-filter: none;
|
| 149 |
+
background:
|
| 150 |
+
linear-gradient(150deg, rgba(255,255,255,0.42), rgba(255,248,239,0.2)),
|
| 151 |
+
linear-gradient(135deg, rgba(178,74,31,0.035), rgba(37,90,143,0.02));
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
h1, h2, h3 {
|
| 155 |
+
margin: 0;
|
| 156 |
+
line-height: 1.05;
|
| 157 |
+
}
|
| 158 |
+
|
| 159 |
+
.hero-label {
|
| 160 |
+
display: block;
|
| 161 |
+
margin-bottom: 12px;
|
| 162 |
+
color: var(--accent);
|
| 163 |
+
font-size: 1.52rem;
|
| 164 |
+
font-weight: 700;
|
| 165 |
+
letter-spacing: 0.01em;
|
| 166 |
+
text-align: center;
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
h1 {
|
| 170 |
+
font-family: Georgia, "Times New Roman", serif;
|
| 171 |
+
font-size: clamp(2rem, 3.1vw, 3.2rem);
|
| 172 |
+
letter-spacing: -0.05em;
|
| 173 |
+
margin-bottom: 18px;
|
| 174 |
+
max-width: none;
|
| 175 |
+
white-space: nowrap;
|
| 176 |
+
text-align: center;
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
.hero-text {
|
| 180 |
+
max-width: 1040px;
|
| 181 |
+
font-size: 1.1rem;
|
| 182 |
+
line-height: 1.72;
|
| 183 |
+
color: #374151;
|
| 184 |
+
margin-bottom: 24px;
|
| 185 |
+
margin-left: auto;
|
| 186 |
+
margin-right: auto;
|
| 187 |
+
}
|
| 188 |
+
|
| 189 |
+
.resource-links {
|
| 190 |
+
display: flex;
|
| 191 |
+
gap: 10px;
|
| 192 |
+
flex-wrap: wrap;
|
| 193 |
+
margin-bottom: 20px;
|
| 194 |
+
justify-content: center;
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
+
.resource-links a {
|
| 198 |
+
display: inline-flex;
|
| 199 |
+
align-items: center;
|
| 200 |
+
gap: 7px;
|
| 201 |
+
color: var(--blue);
|
| 202 |
+
text-decoration: none;
|
| 203 |
+
font-size: 0.96rem;
|
| 204 |
+
border-bottom: 1px solid rgba(37, 90, 143, 0.25);
|
| 205 |
+
padding-bottom: 1px;
|
| 206 |
+
}
|
| 207 |
+
|
| 208 |
+
.resource-links a:hover {
|
| 209 |
+
color: var(--accent);
|
| 210 |
+
border-bottom-color: rgba(178, 74, 31, 0.35);
|
| 211 |
+
}
|
| 212 |
+
|
| 213 |
+
.resource-links svg {
|
| 214 |
+
width: 15px;
|
| 215 |
+
height: 15px;
|
| 216 |
+
display: block;
|
| 217 |
+
flex: 0 0 auto;
|
| 218 |
+
fill: currentColor;
|
| 219 |
+
}
|
| 220 |
+
|
| 221 |
+
.hero-points {
|
| 222 |
+
display: grid;
|
| 223 |
+
grid-template-columns: repeat(3, minmax(0, 1fr));
|
| 224 |
+
gap: 14px;
|
| 225 |
+
padding-top: 10px;
|
| 226 |
+
border-top: 1px solid rgba(23, 32, 51, 0.08);
|
| 227 |
+
}
|
| 228 |
+
|
| 229 |
+
.study-section {
|
| 230 |
+
margin: 24px 0 28px;
|
| 231 |
+
}
|
| 232 |
+
|
| 233 |
+
.study-copy {
|
| 234 |
+
max-width: 860px;
|
| 235 |
+
color: var(--muted);
|
| 236 |
+
line-height: 1.65;
|
| 237 |
+
margin: 0 0 14px;
|
| 238 |
+
}
|
| 239 |
+
|
| 240 |
+
.study-list {
|
| 241 |
+
display: grid;
|
| 242 |
+
gap: 10px;
|
| 243 |
+
margin: 0 0 16px;
|
| 244 |
+
}
|
| 245 |
+
|
| 246 |
+
.study-item {
|
| 247 |
+
padding-left: 18px;
|
| 248 |
+
position: relative;
|
| 249 |
+
color: var(--muted);
|
| 250 |
+
line-height: 1.6;
|
| 251 |
+
}
|
| 252 |
+
|
| 253 |
+
.study-item::before {
|
| 254 |
+
content: "";
|
| 255 |
+
position: absolute;
|
| 256 |
+
left: 0;
|
| 257 |
+
top: 0.55rem;
|
| 258 |
+
width: 8px;
|
| 259 |
+
height: 8px;
|
| 260 |
+
border-radius: 999px;
|
| 261 |
+
background: var(--accent);
|
| 262 |
+
}
|
| 263 |
+
|
| 264 |
+
.study-item strong {
|
| 265 |
+
color: var(--ink);
|
| 266 |
+
}
|
| 267 |
+
|
| 268 |
+
.hero-point {
|
| 269 |
+
padding: 10px 6px 0 0;
|
| 270 |
+
}
|
| 271 |
+
|
| 272 |
+
.hero-point strong {
|
| 273 |
+
display: block;
|
| 274 |
+
margin-bottom: 6px;
|
| 275 |
+
font-size: 1rem;
|
| 276 |
+
}
|
| 277 |
+
|
| 278 |
+
.hero-point span {
|
| 279 |
+
color: var(--muted);
|
| 280 |
+
font-size: 0.98rem;
|
| 281 |
+
line-height: 1.55;
|
| 282 |
+
}
|
| 283 |
+
|
| 284 |
+
.section-kicker {
|
| 285 |
+
font-size: 0.88rem;
|
| 286 |
+
text-transform: uppercase;
|
| 287 |
+
letter-spacing: 0.08em;
|
| 288 |
+
color: var(--accent);
|
| 289 |
+
margin-bottom: 8px;
|
| 290 |
+
}
|
| 291 |
+
|
| 292 |
+
.micro {
|
| 293 |
+
color: var(--muted);
|
| 294 |
+
font-size: 0.98rem;
|
| 295 |
+
line-height: 1.58;
|
| 296 |
+
}
|
| 297 |
+
|
| 298 |
+
.stats {
|
| 299 |
+
display: grid;
|
| 300 |
+
grid-template-columns: repeat(5, minmax(0, 1fr));
|
| 301 |
+
gap: 12px;
|
| 302 |
+
margin: 18px 0;
|
| 303 |
+
}
|
| 304 |
+
|
| 305 |
+
.stat {
|
| 306 |
+
padding: 20px;
|
| 307 |
+
}
|
| 308 |
+
|
| 309 |
+
.stat-label {
|
| 310 |
+
display: block;
|
| 311 |
+
color: var(--muted);
|
| 312 |
+
font-size: 0.84rem;
|
| 313 |
+
text-transform: uppercase;
|
| 314 |
+
letter-spacing: 0.08em;
|
| 315 |
+
margin-bottom: 8px;
|
| 316 |
+
}
|
| 317 |
+
|
| 318 |
+
.stat-value {
|
| 319 |
+
font-family: Georgia, "Times New Roman", serif;
|
| 320 |
+
font-size: 2rem;
|
| 321 |
+
line-height: 1;
|
| 322 |
+
margin-bottom: 6px;
|
| 323 |
+
}
|
| 324 |
+
|
| 325 |
+
.stat-sub {
|
| 326 |
+
color: var(--muted);
|
| 327 |
+
font-size: 0.96rem;
|
| 328 |
+
}
|
| 329 |
+
|
| 330 |
+
.grid-2 {
|
| 331 |
+
display: grid;
|
| 332 |
+
grid-template-columns: 1fr 1fr;
|
| 333 |
+
gap: 18px;
|
| 334 |
+
margin-bottom: 18px;
|
| 335 |
+
}
|
| 336 |
+
|
| 337 |
+
.section {
|
| 338 |
+
padding: 24px;
|
| 339 |
+
}
|
| 340 |
+
|
| 341 |
+
.section-head {
|
| 342 |
+
display: flex;
|
| 343 |
+
justify-content: space-between;
|
| 344 |
+
align-items: baseline;
|
| 345 |
+
gap: 12px;
|
| 346 |
+
margin-bottom: 16px;
|
| 347 |
+
flex-wrap: wrap;
|
| 348 |
+
}
|
| 349 |
+
|
| 350 |
+
.section-head p {
|
| 351 |
+
margin: 0;
|
| 352 |
+
color: var(--muted);
|
| 353 |
+
max-width: 720px;
|
| 354 |
+
line-height: 1.55;
|
| 355 |
+
}
|
| 356 |
+
|
| 357 |
+
.metric-intro {
|
| 358 |
+
margin: 0 0 16px;
|
| 359 |
+
color: var(--muted);
|
| 360 |
+
max-width: none;
|
| 361 |
+
line-height: 1.65;
|
| 362 |
+
}
|
| 363 |
+
|
| 364 |
+
.metric-core {
|
| 365 |
+
display: grid;
|
| 366 |
+
grid-template-columns: repeat(3, minmax(0, 1fr));
|
| 367 |
+
gap: 10px 18px;
|
| 368 |
+
margin: 0 0 14px;
|
| 369 |
+
padding: 0;
|
| 370 |
+
list-style: none;
|
| 371 |
+
}
|
| 372 |
+
|
| 373 |
+
.metric-core li {
|
| 374 |
+
padding-left: 16px;
|
| 375 |
+
position: relative;
|
| 376 |
+
color: var(--muted);
|
| 377 |
+
line-height: 1.55;
|
| 378 |
+
font-size: 0.97rem;
|
| 379 |
+
}
|
| 380 |
+
|
| 381 |
+
.metric-core li::before {
|
| 382 |
+
content: "";
|
| 383 |
+
position: absolute;
|
| 384 |
+
left: 0;
|
| 385 |
+
top: 0.58rem;
|
| 386 |
+
width: 7px;
|
| 387 |
+
height: 7px;
|
| 388 |
+
border-radius: 999px;
|
| 389 |
+
background: var(--blue);
|
| 390 |
+
}
|
| 391 |
+
|
| 392 |
+
.metric-core strong {
|
| 393 |
+
color: var(--ink);
|
| 394 |
+
}
|
| 395 |
+
|
| 396 |
+
.metric-defs {
|
| 397 |
+
display: grid;
|
| 398 |
+
grid-template-columns: repeat(4, minmax(0, 1fr));
|
| 399 |
+
gap: 12px 18px;
|
| 400 |
+
margin: 0 0 18px;
|
| 401 |
+
padding: 0;
|
| 402 |
+
list-style: none;
|
| 403 |
+
}
|
| 404 |
+
|
| 405 |
+
.metric-defs li {
|
| 406 |
+
padding-left: 16px;
|
| 407 |
+
position: relative;
|
| 408 |
+
color: var(--muted);
|
| 409 |
+
line-height: 1.55;
|
| 410 |
+
font-size: 0.97rem;
|
| 411 |
+
}
|
| 412 |
+
|
| 413 |
+
.metric-defs li::before {
|
| 414 |
+
content: "";
|
| 415 |
+
position: absolute;
|
| 416 |
+
left: 0;
|
| 417 |
+
top: 0.58rem;
|
| 418 |
+
width: 7px;
|
| 419 |
+
height: 7px;
|
| 420 |
+
border-radius: 999px;
|
| 421 |
+
background: var(--accent);
|
| 422 |
+
}
|
| 423 |
+
|
| 424 |
+
.metric-defs strong {
|
| 425 |
+
color: var(--ink);
|
| 426 |
+
}
|
| 427 |
+
|
| 428 |
+
.findings,
|
| 429 |
+
.plain-list {
|
| 430 |
+
display: grid;
|
| 431 |
+
gap: 12px;
|
| 432 |
+
}
|
| 433 |
+
|
| 434 |
+
.finding,
|
| 435 |
+
.plain-item {
|
| 436 |
+
padding: 0 0 0 18px;
|
| 437 |
+
position: relative;
|
| 438 |
+
}
|
| 439 |
+
|
| 440 |
+
.finding::before,
|
| 441 |
+
.plain-item::before {
|
| 442 |
+
content: "";
|
| 443 |
+
position: absolute;
|
| 444 |
+
left: 0;
|
| 445 |
+
top: 0.5rem;
|
| 446 |
+
width: 8px;
|
| 447 |
+
height: 8px;
|
| 448 |
+
border-radius: 999px;
|
| 449 |
+
background: var(--accent);
|
| 450 |
+
}
|
| 451 |
+
|
| 452 |
+
.finding strong,
|
| 453 |
+
.plain-item strong {
|
| 454 |
+
display: block;
|
| 455 |
+
margin-bottom: 6px;
|
| 456 |
+
font-size: 1.06rem;
|
| 457 |
+
}
|
| 458 |
+
|
| 459 |
+
.finding span,
|
| 460 |
+
.plain-item span {
|
| 461 |
+
color: var(--muted);
|
| 462 |
+
line-height: 1.55;
|
| 463 |
+
font-size: 0.98rem;
|
| 464 |
+
}
|
| 465 |
+
|
| 466 |
+
.mini-table {
|
| 467 |
+
overflow-x: auto;
|
| 468 |
+
border-radius: 18px;
|
| 469 |
+
border: 1px solid var(--line);
|
| 470 |
+
background: rgba(255,255,255,0.72);
|
| 471 |
+
}
|
| 472 |
+
|
| 473 |
+
table {
|
| 474 |
+
width: 100%;
|
| 475 |
+
border-collapse: collapse;
|
| 476 |
+
min-width: 760px;
|
| 477 |
+
}
|
| 478 |
+
|
| 479 |
+
th, td {
|
| 480 |
+
text-align: left;
|
| 481 |
+
padding: 12px 14px;
|
| 482 |
+
border-bottom: 1px solid rgba(23, 32, 51, 0.08);
|
| 483 |
+
white-space: nowrap;
|
| 484 |
+
font-size: 0.93rem;
|
| 485 |
+
}
|
| 486 |
+
|
| 487 |
+
th {
|
| 488 |
+
background: rgba(255,248,239,0.97);
|
| 489 |
+
color: var(--accent);
|
| 490 |
+
font-size: 0.8rem;
|
| 491 |
+
text-transform: uppercase;
|
| 492 |
+
letter-spacing: 0.08em;
|
| 493 |
+
}
|
| 494 |
+
|
| 495 |
+
.stage-grid {
|
| 496 |
+
display: grid;
|
| 497 |
+
grid-template-columns: repeat(4, minmax(0, 1fr));
|
| 498 |
+
gap: 12px;
|
| 499 |
+
}
|
| 500 |
+
|
| 501 |
+
.stage {
|
| 502 |
+
padding: 16px;
|
| 503 |
+
border-radius: 18px;
|
| 504 |
+
background: rgba(255,255,255,0.72);
|
| 505 |
+
border: 1px solid var(--line);
|
| 506 |
+
position: relative;
|
| 507 |
+
overflow: hidden;
|
| 508 |
+
}
|
| 509 |
+
|
| 510 |
+
.stage::before {
|
| 511 |
+
content: "";
|
| 512 |
+
position: absolute;
|
| 513 |
+
inset: 0 auto 0 0;
|
| 514 |
+
width: 5px;
|
| 515 |
+
background: linear-gradient(180deg, var(--gold), var(--accent));
|
| 516 |
+
}
|
| 517 |
+
|
| 518 |
+
.stage small {
|
| 519 |
+
display: block;
|
| 520 |
+
color: var(--accent);
|
| 521 |
+
text-transform: uppercase;
|
| 522 |
+
letter-spacing: 0.08em;
|
| 523 |
+
margin-bottom: 8px;
|
| 524 |
+
font-size: 0.78rem;
|
| 525 |
+
}
|
| 526 |
+
|
| 527 |
+
.stage h3 {
|
| 528 |
+
font-size: 1.18rem;
|
| 529 |
+
margin-bottom: 8px;
|
| 530 |
+
}
|
| 531 |
+
|
| 532 |
+
.stage p {
|
| 533 |
+
margin: 0 0 10px;
|
| 534 |
+
color: var(--muted);
|
| 535 |
+
line-height: 1.58;
|
| 536 |
+
font-size: 1rem;
|
| 537 |
+
}
|
| 538 |
+
|
| 539 |
+
.stage code {
|
| 540 |
+
display: block;
|
| 541 |
+
white-space: pre-wrap;
|
| 542 |
+
font-size: 0.84rem;
|
| 543 |
+
color: #2b3240;
|
| 544 |
+
background: rgba(23,32,51,0.04);
|
| 545 |
+
padding: 10px;
|
| 546 |
+
border-radius: 12px;
|
| 547 |
+
}
|
| 548 |
+
|
| 549 |
+
.two-col {
|
| 550 |
+
display: grid;
|
| 551 |
+
grid-template-columns: 1.1fr 0.9fr;
|
| 552 |
+
gap: 18px;
|
| 553 |
+
margin-bottom: 18px;
|
| 554 |
+
}
|
| 555 |
+
|
| 556 |
+
.policy-list,
|
| 557 |
+
.guide-list {
|
| 558 |
+
display: grid;
|
| 559 |
+
gap: 12px;
|
| 560 |
+
}
|
| 561 |
+
|
| 562 |
+
.policy-item,
|
| 563 |
+
.guide-item {
|
| 564 |
+
padding-left: 18px;
|
| 565 |
+
position: relative;
|
| 566 |
+
}
|
| 567 |
+
|
| 568 |
+
.policy-item strong,
|
| 569 |
+
.guide-item strong {
|
| 570 |
+
display: block;
|
| 571 |
+
margin-bottom: 6px;
|
| 572 |
+
}
|
| 573 |
+
|
| 574 |
+
.policy-item::before,
|
| 575 |
+
.guide-item::before {
|
| 576 |
+
content: "";
|
| 577 |
+
position: absolute;
|
| 578 |
+
left: 0;
|
| 579 |
+
top: 0.5rem;
|
| 580 |
+
width: 8px;
|
| 581 |
+
height: 8px;
|
| 582 |
+
border-radius: 999px;
|
| 583 |
+
background: var(--accent);
|
| 584 |
+
}
|
| 585 |
+
|
| 586 |
+
.policy-item span,
|
| 587 |
+
.guide-item span {
|
| 588 |
+
color: var(--muted);
|
| 589 |
+
line-height: 1.55;
|
| 590 |
+
font-size: 0.98rem;
|
| 591 |
+
}
|
| 592 |
+
|
| 593 |
+
.controls {
|
| 594 |
+
display: grid;
|
| 595 |
+
gap: 12px;
|
| 596 |
+
grid-template-columns: repeat(3, minmax(0, 1fr));
|
| 597 |
+
padding: 16px;
|
| 598 |
+
margin-bottom: 14px;
|
| 599 |
+
}
|
| 600 |
+
|
| 601 |
+
.toggle-row {
|
| 602 |
+
display: flex;
|
| 603 |
+
justify-content: space-between;
|
| 604 |
+
align-items: center;
|
| 605 |
+
gap: 12px;
|
| 606 |
+
margin: 4px 0 14px;
|
| 607 |
+
flex-wrap: wrap;
|
| 608 |
+
}
|
| 609 |
+
|
| 610 |
+
.toggle {
|
| 611 |
+
display: inline-flex;
|
| 612 |
+
align-items: center;
|
| 613 |
+
gap: 10px;
|
| 614 |
+
color: var(--muted);
|
| 615 |
+
font-size: 0.97rem;
|
| 616 |
+
}
|
| 617 |
+
|
| 618 |
+
.toggle input {
|
| 619 |
+
width: 18px;
|
| 620 |
+
height: 18px;
|
| 621 |
+
margin: 0;
|
| 622 |
+
accent-color: var(--ink);
|
| 623 |
+
}
|
| 624 |
+
|
| 625 |
+
label {
|
| 626 |
+
display: grid;
|
| 627 |
+
gap: 6px;
|
| 628 |
+
color: var(--muted);
|
| 629 |
+
font-size: 0.98rem;
|
| 630 |
+
}
|
| 631 |
+
|
| 632 |
+
select,
|
| 633 |
+
input[type="search"] {
|
| 634 |
+
width: 100%;
|
| 635 |
+
padding: 11px 12px;
|
| 636 |
+
border-radius: 12px;
|
| 637 |
+
border: 1px solid var(--line);
|
| 638 |
+
background: rgba(255,255,255,0.94);
|
| 639 |
+
color: var(--ink);
|
| 640 |
+
font: inherit;
|
| 641 |
+
}
|
| 642 |
+
|
| 643 |
+
.tabs {
|
| 644 |
+
display: inline-flex;
|
| 645 |
+
gap: 8px;
|
| 646 |
+
margin-bottom: 14px;
|
| 647 |
+
flex-wrap: wrap;
|
| 648 |
+
}
|
| 649 |
+
|
| 650 |
+
.tab.active {
|
| 651 |
+
background: var(--ink);
|
| 652 |
+
color: white;
|
| 653 |
+
border-color: var(--ink);
|
| 654 |
+
}
|
| 655 |
+
|
| 656 |
+
.table-wrap {
|
| 657 |
+
overflow-x: auto;
|
| 658 |
+
border: 1px solid var(--line);
|
| 659 |
+
border-radius: 20px;
|
| 660 |
+
background: rgba(255,255,255,0.72);
|
| 661 |
+
}
|
| 662 |
+
|
| 663 |
+
.table-wrap table {
|
| 664 |
+
min-width: 980px;
|
| 665 |
+
}
|
| 666 |
+
|
| 667 |
+
tbody tr:hover {
|
| 668 |
+
background: rgba(233, 162, 59, 0.09);
|
| 669 |
+
}
|
| 670 |
+
|
| 671 |
+
code {
|
| 672 |
+
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace;
|
| 673 |
+
}
|
| 674 |
+
|
| 675 |
+
.footer-note {
|
| 676 |
+
color: var(--muted);
|
| 677 |
+
font-size: 1rem;
|
| 678 |
+
line-height: 1.55;
|
| 679 |
+
margin-top: 14px;
|
| 680 |
+
}
|
| 681 |
+
|
| 682 |
+
.load-status {
|
| 683 |
+
color: var(--muted);
|
| 684 |
+
font-size: 0.95rem;
|
| 685 |
+
line-height: 1.5;
|
| 686 |
+
margin-top: 10px;
|
| 687 |
+
}
|
| 688 |
+
|
| 689 |
+
.markdown-note {
|
| 690 |
+
color: var(--muted);
|
| 691 |
+
font-size: 1rem;
|
| 692 |
+
line-height: 1.7;
|
| 693 |
+
}
|
| 694 |
+
|
| 695 |
+
.markdown-note p {
|
| 696 |
+
margin: 0 0 10px;
|
| 697 |
+
}
|
| 698 |
+
|
| 699 |
+
.markdown-note ul {
|
| 700 |
+
margin: 0;
|
| 701 |
+
padding-left: 20px;
|
| 702 |
+
}
|
| 703 |
+
|
| 704 |
+
.markdown-note li {
|
| 705 |
+
margin: 0 0 8px;
|
| 706 |
+
}
|
| 707 |
+
|
| 708 |
+
.markdown-note pre {
|
| 709 |
+
margin: 12px 0 16px;
|
| 710 |
+
padding: 14px 16px;
|
| 711 |
+
overflow-x: auto;
|
| 712 |
+
border-radius: 14px;
|
| 713 |
+
border: 1px solid rgba(23, 32, 51, 0.08);
|
| 714 |
+
background: rgba(255, 255, 255, 0.68);
|
| 715 |
+
color: #223049;
|
| 716 |
+
font-size: 0.92rem;
|
| 717 |
+
line-height: 1.55;
|
| 718 |
+
}
|
| 719 |
+
|
| 720 |
+
.markdown-note code {
|
| 721 |
+
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace;
|
| 722 |
+
}
|
| 723 |
+
|
| 724 |
+
@media (max-width: 1120px) {
|
| 725 |
+
.hero,
|
| 726 |
+
.grid-2,
|
| 727 |
+
.two-col,
|
| 728 |
+
.stats,
|
| 729 |
+
.controls,
|
| 730 |
+
.stage-grid,
|
| 731 |
+
.hero-points,
|
| 732 |
+
.metric-core,
|
| 733 |
+
.metric-defs {
|
| 734 |
+
grid-template-columns: 1fr;
|
| 735 |
+
}
|
| 736 |
+
|
| 737 |
+
h1 {
|
| 738 |
+
white-space: normal;
|
| 739 |
+
}
|
| 740 |
+
}
|
| 741 |
+
</style>
|
| 742 |
+
</head>
|
| 743 |
+
<body>
|
| 744 |
+
<main class="shell">
|
| 745 |
+
<header class="topbar">
|
| 746 |
+
<div class="brand">
|
| 747 |
+
<div class="brand-mark" aria-hidden="true">
|
| 748 |
+
<img src="./wuklab_logo.png" alt="WukLab logo">
|
| 749 |
+
</div>
|
| 750 |
+
<div class="brand-copy">
|
| 751 |
+
<strong>SourceBench</strong>
|
| 752 |
+
<span>Leaderboard for cited source quality in generative engines</span>
|
| 753 |
+
</div>
|
| 754 |
+
</div>
|
| 755 |
+
<nav class="nav">
|
| 756 |
+
<a href="#leaderboard">Leaderboard</a>
|
| 757 |
+
<a href="#findings">Findings</a>
|
| 758 |
+
<a href="#deepseek-study">Study</a>
|
| 759 |
+
<a href="#policy">Policy</a>
|
| 760 |
+
</nav>
|
| 761 |
+
</header>
|
| 762 |
+
|
| 763 |
+
<section class="hero">
|
| 764 |
+
<article class="panel hero-main">
|
| 765 |
+
<div class="hero-label">SourceBench</div>
|
| 766 |
+
<h1>Can AI answers cite high-quality web sources?</h1>
|
| 767 |
+
<div class="resource-links">
|
| 768 |
+
<a href="https://arxiv.org/abs/2602.16942" target="_blank" rel="noopener noreferrer">
|
| 769 |
+
<svg viewBox="0 0 24 24" aria-hidden="true"><path d="M12 2 1.8 7.5v9L12 22l10.2-5.5v-9L12 2Zm0 2.3 7.8 4.2-2.4 1.3L12 6.8 6.6 9.8 4.2 8.5 12 4.3Zm-6 5.5 4.8 2.6v5.5L6 15.3V9.8Zm6 9.1v-5.5l4.8-2.6v5.5L12 18.9Z"/></svg>
|
| 770 |
+
<span>arXiv</span>
|
| 771 |
+
</a>
|
| 772 |
+
<a href="https://github.com/WukLab/SourceBench" target="_blank" rel="noopener noreferrer">
|
| 773 |
+
<svg viewBox="0 0 24 24" aria-hidden="true"><path d="M12 2C6.48 2 2 6.6 2 12.26c0 4.52 2.87 8.36 6.84 9.71.5.1.68-.22.68-.49 0-.24-.01-1.05-.01-1.9-2.78.62-3.37-1.22-3.37-1.22-.45-1.19-1.11-1.5-1.11-1.5-.91-.64.07-.63.07-.63 1 .08 1.53 1.06 1.53 1.06.9 1.57 2.35 1.12 2.92.86.09-.67.35-1.12.64-1.38-2.22-.26-4.55-1.14-4.55-5.08 0-1.12.39-2.04 1.03-2.76-.1-.26-.45-1.3.1-2.71 0 0 .84-.28 2.75 1.05A9.3 9.3 0 0 1 12 7.64c.85 0 1.71.12 2.51.35 1.91-1.33 2.75-1.05 2.75-1.05.55 1.41.2 2.45.1 2.71.64.72 1.03 1.64 1.03 2.76 0 3.95-2.33 4.82-4.56 5.07.36.32.68.95.68 1.92 0 1.39-.01 2.5-.01 2.84 0 .27.18.6.69.49A10.27 10.27 0 0 0 22 12.26C22 6.6 17.52 2 12 2Z"/></svg>
|
| 774 |
+
<span>GitHub</span>
|
| 775 |
+
</a>
|
| 776 |
+
<a href="https://mlsys.wuklab.io/posts/sourcebench/" target="_blank" rel="noopener noreferrer">
|
| 777 |
+
<svg viewBox="0 0 24 24" aria-hidden="true"><path d="M4 4h10l6 6v10H4V4Zm10 1.5V10h4.5L14 5.5ZM7 13h10v1.5H7V13Zm0 3h10v1.5H7V16Zm0-6h5v1.5H7V10Z"/></svg>
|
| 778 |
+
<span>Blog</span>
|
| 779 |
+
</a>
|
| 780 |
+
</div>
|
| 781 |
+
<p class="hero-text">
|
| 782 |
+
SourceBench focuses on a different evaluation target from standard answer-quality benchmarks.
|
| 783 |
+
Instead of only asking whether a model answered well, it asks whether the model cited sources that are
|
| 784 |
+
relevant, accurate, fresh, transparent, authoritative, and usable. This Space hosts the public-facing
|
| 785 |
+
leaderboard frontend. Official leaderboard entries are validated and judged by the SourceBench team.
|
| 786 |
+
</p>
|
| 787 |
+
<div class="hero-points">
|
| 788 |
+
<div class="hero-point">
|
| 789 |
+
<strong>Benchmark target</strong>
|
| 790 |
+
<span>Quality of cited sources, not just final answer correctness.</span>
|
| 791 |
+
</div>
|
| 792 |
+
<div class="hero-point">
|
| 793 |
+
<strong>Current scope</strong>
|
| 794 |
+
<span>Generative engines with built-in web search, plus official validation for leaderboard inclusion.</span>
|
| 795 |
+
</div>
|
| 796 |
+
<div class="hero-point">
|
| 797 |
+
<strong>Official policy</strong>
|
| 798 |
+
<span>Official leaderboard entries are validated and judged by the SourceBench team using fixed hidden evaluation settings.</span>
|
| 799 |
+
</div>
|
| 800 |
+
</div>
|
| 801 |
+
</article>
|
| 802 |
+
</section>
|
| 803 |
+
|
| 804 |
+
<section class="stats" id="summary">
|
| 805 |
+
<article class="panel stat">
|
| 806 |
+
<span class="stat-label">Models in current board</span>
|
| 807 |
+
<div class="stat-value" id="stat-models">0</div>
|
| 808 |
+
<div class="stat-sub">Current leaderboard snapshot</div>
|
| 809 |
+
</article>
|
| 810 |
+
<article class="panel stat">
|
| 811 |
+
<span class="stat-label">Query types</span>
|
| 812 |
+
<div class="stat-value" id="stat-query-types">0</div>
|
| 813 |
+
<div class="stat-sub">Benchmark query slices</div>
|
| 814 |
+
</article>
|
| 815 |
+
<article class="panel stat">
|
| 816 |
+
<span class="stat-label">Top model</span>
|
| 817 |
+
<div class="stat-value" id="stat-top-model">-</div>
|
| 818 |
+
<div class="stat-sub">Highest weighted content score</div>
|
| 819 |
+
</article>
|
| 820 |
+
<article class="panel stat">
|
| 821 |
+
<span class="stat-label">Top weighted score</span>
|
| 822 |
+
<div class="stat-value" id="stat-top-score">-</div>
|
| 823 |
+
<div class="stat-sub">Weighted source-quality metric</div>
|
| 824 |
+
</article>
|
| 825 |
+
<article class="panel stat">
|
| 826 |
+
<span class="stat-label">Generated at</span>
|
| 827 |
+
<div class="stat-value" id="stat-generated">-</div>
|
| 828 |
+
<div class="stat-sub">Artifact timestamp in Pacific Time</div>
|
| 829 |
+
</article>
|
| 830 |
+
</section>
|
| 831 |
+
|
| 832 |
+
<section class="grid-2 plain-section" id="findings">
|
| 833 |
+
<article>
|
| 834 |
+
<div class="section-head">
|
| 835 |
+
<div>
|
| 836 |
+
<div class="section-kicker">Work overview</div>
|
| 837 |
+
<h2 style="font-size:1.45rem;">What this work measures</h2>
|
| 838 |
+
</div>
|
| 839 |
+
</div>
|
| 840 |
+
<div class="plain-list">
|
| 841 |
+
<div class="plain-item">
|
| 842 |
+
<strong>Source quality dimensions</strong>
|
| 843 |
+
<span>Each cited source is judged on semantic relevance, factual accuracy, freshness, objectivity, layout/ad density, accountability, transparency, and authority.</span>
|
| 844 |
+
</div>
|
| 845 |
+
<div class="plain-item">
|
| 846 |
+
<strong>Main leaderboard target</strong>
|
| 847 |
+
<span>The leaderboard emphasizes quality of referenced web sources, because a model can produce fluent answers while still relying on weak or misleading citations.</span>
|
| 848 |
+
</div>
|
| 849 |
+
<div class="plain-item">
|
| 850 |
+
<strong>Why this matters</strong>
|
| 851 |
+
<span>For web-grounded assistants, citation quality is a first-class property. A stronger source profile should make answers more trustworthy and auditable.</span>
|
| 852 |
+
</div>
|
| 853 |
+
</div>
|
| 854 |
+
</article>
|
| 855 |
+
|
| 856 |
+
<article>
|
| 857 |
+
<div class="section-head">
|
| 858 |
+
<div>
|
| 859 |
+
<div class="section-kicker">Simple conclusions</div>
|
| 860 |
+
<h2 style="font-size:1.45rem;">Current board highlights</h2>
|
| 861 |
+
</div>
|
| 862 |
+
</div>
|
| 863 |
+
<div class="findings" id="findings-list">
|
| 864 |
+
<div class="finding">
|
| 865 |
+
<strong>Load leaderboard data to see findings</strong>
|
| 866 |
+
<span>The page will summarize the top models, hardest query types, and search-overlap patterns from the current artifact.</span>
|
| 867 |
+
</div>
|
| 868 |
+
</div>
|
| 869 |
+
</article>
|
| 870 |
+
</section>
|
| 871 |
+
|
| 872 |
+
<section class="panel section" id="leaderboard">
|
| 873 |
+
<div class="section-head">
|
| 874 |
+
<div>
|
| 875 |
+
<div class="section-kicker">Leaderboard</div>
|
| 876 |
+
<h2 style="font-size:1.6rem;">Ranking Table</h2>
|
| 877 |
+
</div>
|
| 878 |
+
</div>
|
| 879 |
+
|
| 880 |
+
<p class="metric-intro">
|
| 881 |
+
SourceBench reports one overall score together with eight source-quality dimensions used by the judge model.
|
| 882 |
+
The overall weighted score is the main ranking target; the dimension columns below make it easier to see
|
| 883 |
+
whether a system is strong because it cites more relevant, more accurate, fresher, more transparent, or more authoritative sources.
|
| 884 |
+
</p>
|
| 885 |
+
<ul class="metric-core">
|
| 886 |
+
<li><strong>Weighted Score.</strong> The main leaderboard score, combining the judged dimensions into one overall source-quality metric.</li>
|
| 887 |
+
<li><strong>Unweighted Mean.</strong> The simple average across the judged dimension scores, without weighting.</li>
|
| 888 |
+
<li><strong>% In SE.</strong> The percentage of cited GE sources that also appear in the matched search-engine source set.</li>
|
| 889 |
+
</ul>
|
| 890 |
+
<ul class="metric-defs">
|
| 891 |
+
<li><strong>Semantic Relevance.</strong> Whether the cited source is directly relevant to the query and answer.</li>
|
| 892 |
+
<li><strong>Factual Accuracy.</strong> Whether the cited source appears reliable and factually correct.</li>
|
| 893 |
+
<li><strong>Freshness.</strong> Whether the source is timely for the topic being answered.</li>
|
| 894 |
+
<li><strong>Objectivity / Tone.</strong> Whether the source is balanced rather than sensational or biased.</li>
|
| 895 |
+
<li><strong>Layout / Ad Density.</strong> Whether the page is usable and not overwhelmed by ads or clutter.</li>
|
| 896 |
+
<li><strong>Accountability.</strong> Whether the source clearly indicates ownership, responsibility, or editorial control.</li>
|
| 897 |
+
<li><strong>Transparency.</strong> Whether the source clearly presents provenance, disclosure, or supporting context.</li>
|
| 898 |
+
<li><strong>Authority.</strong> Whether the source appears credible and institutionally trustworthy for the topic.</li>
|
| 899 |
+
</ul>
|
| 900 |
+
|
| 901 |
+
<div class="tabs">
|
| 902 |
+
<button class="tab active" data-view="overall" type="button">Overall</button>
|
| 903 |
+
<button class="tab" data-view="by_query_type" type="button">By Query Type</button>
|
| 904 |
+
</div>
|
| 905 |
+
|
| 906 |
+
<section class="panel controls">
|
| 907 |
+
<label>
|
| 908 |
+
View
|
| 909 |
+
<select id="view-select">
|
| 910 |
+
<option value="overall">Overall</option>
|
| 911 |
+
<option value="by_query_type">By query type</option>
|
| 912 |
+
</select>
|
| 913 |
+
</label>
|
| 914 |
+
<label>
|
| 915 |
+
Sort by
|
| 916 |
+
<select id="sort-key">
|
| 917 |
+
<option value="weighted_total_content_score">Weighted score</option>
|
| 918 |
+
<option value="unweighted_mean_score">Unweighted mean</option>
|
| 919 |
+
<option value="percentage_ge_sources_in_se_sources">% In SE</option>
|
| 920 |
+
</select>
|
| 921 |
+
</label>
|
| 922 |
+
<label>
|
| 923 |
+
Filter model
|
| 924 |
+
<input id="search-input" type="search" placeholder="Search model name">
|
| 925 |
+
</label>
|
| 926 |
+
</section>
|
| 927 |
+
|
| 928 |
+
<div class="toggle-row">
|
| 929 |
+
<label class="toggle">
|
| 930 |
+
<input id="show-dimensions" type="checkbox">
|
| 931 |
+
<span>Show dimension scores</span>
|
| 932 |
+
</label>
|
| 933 |
+
</div>
|
| 934 |
+
|
| 935 |
+
<div class="section-head" style="margin-top:10px;">
|
| 936 |
+
<h3 id="table-title" style="font-size:1.1rem;">Overall ranking</h3>
|
| 937 |
+
<div class="micro" id="table-count">0 rows</div>
|
| 938 |
+
</div>
|
| 939 |
+
|
| 940 |
+
<div class="table-wrap">
|
| 941 |
+
<table id="leaderboard-table">
|
| 942 |
+
<thead></thead>
|
| 943 |
+
<tbody></tbody>
|
| 944 |
+
</table>
|
| 945 |
+
</div>
|
| 946 |
+
|
| 947 |
+
<div class="footer-note" id="board-note">
|
| 948 |
+
Official leaderboard entries are validated and judged by the SourceBench team. The benchmark codebase and public evaluation pipeline are maintained separately in the SourceBench repository.
|
| 949 |
+
</div>
|
| 950 |
+
<div class="load-status" id="load-status">Loading leaderboard data...</div>
|
| 951 |
+
</section>
|
| 952 |
+
|
| 953 |
+
<section class="plain-section study-section" id="deepseek-study">
|
| 954 |
+
<div class="section-head">
|
| 955 |
+
<div>
|
| 956 |
+
<div class="section-kicker">DeepSeek Tool Study</div>
|
| 957 |
+
<h2 style="font-size:1.5rem;">DeepSeek variants with different search backends</h2>
|
| 958 |
+
</div>
|
| 959 |
+
</div>
|
| 960 |
+
<p class="study-copy">
|
| 961 |
+
SourceBench also includes a focused comparison of DeepSeek variants paired with different retrieval setups.
|
| 962 |
+
This is a separate study rather than part of the main model family ranking: the purpose is to isolate how
|
| 963 |
+
search backend choice and reasoning mode change citation quality, overlap with search results, and the final weighted source score.
|
| 964 |
+
</p>
|
| 965 |
+
<div class="study-list" id="deepseek-findings">
|
| 966 |
+
<div class="study-item"><strong>Load leaderboard data to see the DeepSeek tool study.</strong></div>
|
| 967 |
+
</div>
|
| 968 |
+
<div class="table-wrap">
|
| 969 |
+
<table id="deepseek-table">
|
| 970 |
+
<thead></thead>
|
| 971 |
+
<tbody></tbody>
|
| 972 |
+
</table>
|
| 973 |
+
</div>
|
| 974 |
+
</section>
|
| 975 |
+
|
| 976 |
+
<section class="plain-section" id="policy">
|
| 977 |
+
<div class="section-head">
|
| 978 |
+
<div>
|
| 979 |
+
<div class="section-kicker">Official Policy</div>
|
| 980 |
+
<h2 style="font-size:1.7rem;">How official leaderboard evaluation works</h2>
|
| 981 |
+
</div>
|
| 982 |
+
</div>
|
| 983 |
+
<div class="markdown-note">
|
| 984 |
+
<p><strong>Local self-check</strong> can be run with the public SourceBench benchmark code and the fixed public query split.</p>
|
| 985 |
+
<p><strong>Official leaderboard entries</strong> are not accepted from participant-computed final scores alone. Instead, entries are validated and judged by the SourceBench team.</p>
|
| 986 |
+
<p>For official evaluation, SourceBench uses hidden holdout queries, the fixed judging setup, and the fixed metric computation pipeline so that leaderboard rows remain comparable across systems.</p>
|
| 987 |
+
</div>
|
| 988 |
+
</section>
|
| 989 |
+
|
| 990 |
+
<section class="plain-section">
|
| 991 |
+
<div class="section-head">
|
| 992 |
+
<div>
|
| 993 |
+
<div class="section-kicker">Submission</div>
|
| 994 |
+
<h2 style="font-size:1.65rem;">What participants should submit</h2>
|
| 995 |
+
</div>
|
| 996 |
+
</div>
|
| 997 |
+
<div class="markdown-note">
|
| 998 |
+
<p><strong>Preferred submission: endpoint access.</strong> Submit the model endpoint, API key, model name, API format, and optional generation settings. The SourceBench team will run hidden queries, source collection, scraping, judging, and metric computation on our side.</p>
|
| 999 |
+
<p><strong>Fallback submission: answer + cited URL bundle.</strong> If endpoint access cannot be shared, submit per-query answer text together with cited URLs. The SourceBench team will run scraping, post-processing, judging, and metric computation server-side.</p>
|
| 1000 |
+
<p><strong>Why these boundaries?</strong> They keep the standardized parts of the benchmark under SourceBench control. Official leaderboard entries are validated and judged by the SourceBench team, rather than accepted from participant-provided final scores.</p>
|
| 1001 |
+
<p><strong>Benchmark repository and submission examples:</strong></p>
|
| 1002 |
+
<ul>
|
| 1003 |
+
<li><a href="https://github.com/WukLab/SourceBench" target="_blank" rel="noopener noreferrer">SourceBench benchmark repository</a></li>
|
| 1004 |
+
<li><code>leaderboard/examples/endpoint_submission.example.json</code> in the benchmark repo</li>
|
| 1005 |
+
<li><code>leaderboard/examples/answer_url_bundle.example.json</code> in the benchmark repo</li>
|
| 1006 |
+
</ul>
|
| 1007 |
+
</div>
|
| 1008 |
+
</section>
|
| 1009 |
+
|
| 1010 |
+
</main>
|
| 1011 |
+
|
| 1012 |
+
<script src="./leaderboard_data.js"></script>
|
| 1013 |
+
<script>
|
| 1014 |
+
const state = {
|
| 1015 |
+
payload: null,
|
| 1016 |
+
currentView: "overall",
|
| 1017 |
+
showDimensions: false,
|
| 1018 |
+
};
|
| 1019 |
+
|
| 1020 |
+
const baseColumns = [
|
| 1021 |
+
"model_name",
|
| 1022 |
+
"query_type",
|
| 1023 |
+
"weighted_total_content_score",
|
| 1024 |
+
"unweighted_mean_score",
|
| 1025 |
+
"percentage_ge_sources_in_se_sources",
|
| 1026 |
+
];
|
| 1027 |
+
|
| 1028 |
+
const dimensionColumns = [
|
| 1029 |
+
"semantic_relevance",
|
| 1030 |
+
"factual_accuracy",
|
| 1031 |
+
"freshness",
|
| 1032 |
+
"objectivity_tone",
|
| 1033 |
+
"layout_ad_density",
|
| 1034 |
+
"accountability",
|
| 1035 |
+
"transparency",
|
| 1036 |
+
"authority",
|
| 1037 |
+
];
|
| 1038 |
+
|
| 1039 |
+
const labels = {
|
| 1040 |
+
model_name: "Model",
|
| 1041 |
+
query_type: "Query Type",
|
| 1042 |
+
weighted_total_content_score: "Weighted Score",
|
| 1043 |
+
unweighted_mean_score: "Unweighted Mean",
|
| 1044 |
+
percentage_ge_sources_in_se_sources: "% In SE",
|
| 1045 |
+
semantic_relevance: "Semantic Relevance",
|
| 1046 |
+
factual_accuracy: "Factual Accuracy",
|
| 1047 |
+
freshness: "Freshness",
|
| 1048 |
+
objectivity_tone: "Objectivity / Tone",
|
| 1049 |
+
layout_ad_density: "Layout / Ad Density",
|
| 1050 |
+
accountability: "Accountability",
|
| 1051 |
+
transparency: "Transparency",
|
| 1052 |
+
authority: "Authority",
|
| 1053 |
+
};
|
| 1054 |
+
|
| 1055 |
+
const viewSelect = document.getElementById("view-select");
|
| 1056 |
+
const sortKey = document.getElementById("sort-key");
|
| 1057 |
+
const searchInput = document.getElementById("search-input");
|
| 1058 |
+
const showDimensionsInput = document.getElementById("show-dimensions");
|
| 1059 |
+
const tableTitle = document.getElementById("table-title");
|
| 1060 |
+
const tableCount = document.getElementById("table-count");
|
| 1061 |
+
const tableHead = document.querySelector("#leaderboard-table thead");
|
| 1062 |
+
const tableBody = document.querySelector("#leaderboard-table tbody");
|
| 1063 |
+
const findingsList = document.getElementById("findings-list");
|
| 1064 |
+
const deepseekFindings = document.getElementById("deepseek-findings");
|
| 1065 |
+
const deepseekTableHead = document.querySelector("#deepseek-table thead");
|
| 1066 |
+
const deepseekTableBody = document.querySelector("#deepseek-table tbody");
|
| 1067 |
+
const boardNote = document.getElementById("board-note");
|
| 1068 |
+
const loadStatus = document.getElementById("load-status");
|
| 1069 |
+
|
| 1070 |
+
function formatNumber(value, digits = 2) {
|
| 1071 |
+
if (value === null || value === undefined || value === "") return "-";
|
| 1072 |
+
if (typeof value === "number") return value.toFixed(digits);
|
| 1073 |
+
const parsed = Number(value);
|
| 1074 |
+
if (!Number.isNaN(parsed)) return parsed.toFixed(digits);
|
| 1075 |
+
return String(value);
|
| 1076 |
+
}
|
| 1077 |
+
|
| 1078 |
+
function formatPacificTimestamp(value) {
|
| 1079 |
+
if (!value) return "-";
|
| 1080 |
+
const date = new Date(value);
|
| 1081 |
+
if (Number.isNaN(date.getTime())) return value;
|
| 1082 |
+
return new Intl.DateTimeFormat("en-US", {
|
| 1083 |
+
timeZone: "America/Los_Angeles",
|
| 1084 |
+
year: "numeric",
|
| 1085 |
+
month: "short",
|
| 1086 |
+
day: "numeric",
|
| 1087 |
+
hour: "numeric",
|
| 1088 |
+
minute: "2-digit",
|
| 1089 |
+
timeZoneName: "short",
|
| 1090 |
+
}).format(date);
|
| 1091 |
+
}
|
| 1092 |
+
|
| 1093 |
+
function setLoadStatus(message, isError = false) {
|
| 1094 |
+
loadStatus.textContent = message;
|
| 1095 |
+
loadStatus.style.color = isError ? "#b91c1c" : "var(--muted)";
|
| 1096 |
+
}
|
| 1097 |
+
|
| 1098 |
+
function updateTopStats(payload) {
|
| 1099 |
+
const overall = payload.overall || [];
|
| 1100 |
+
const byType = payload.by_query_type || [];
|
| 1101 |
+
const top = overall[0];
|
| 1102 |
+
document.getElementById("stat-models").textContent = overall.length;
|
| 1103 |
+
document.getElementById("stat-query-types").textContent = new Set(byType.map((row) => row.query_type)).size;
|
| 1104 |
+
document.getElementById("stat-top-model").textContent = top ? top.model_name : "-";
|
| 1105 |
+
document.getElementById("stat-top-score").textContent = top ? formatNumber(top.weighted_total_content_score) : "-";
|
| 1106 |
+
document.getElementById("stat-generated").textContent = formatPacificTimestamp(payload.metadata?.generated_at);
|
| 1107 |
+
}
|
| 1108 |
+
|
| 1109 |
+
function computeFindings(payload) {
|
| 1110 |
+
const overall = payload.overall || [];
|
| 1111 |
+
const byType = payload.by_query_type || [];
|
| 1112 |
+
if (!overall.length || !byType.length) {
|
| 1113 |
+
return [{
|
| 1114 |
+
title: "No findings available",
|
| 1115 |
+
body: "Load a leaderboard artifact to generate summary findings."
|
| 1116 |
+
}];
|
| 1117 |
+
}
|
| 1118 |
+
|
| 1119 |
+
const top = overall[0];
|
| 1120 |
+
const bestFreshness = [...overall].sort((a, b) => (b.freshness || 0) - (a.freshness || 0))[0];
|
| 1121 |
+
const bestOverlap = [...overall]
|
| 1122 |
+
.filter((row) => row.percentage_ge_sources_in_se_sources !== null && row.percentage_ge_sources_in_se_sources !== undefined)
|
| 1123 |
+
.sort((a, b) => b.percentage_ge_sources_in_se_sources - a.percentage_ge_sources_in_se_sources)[0];
|
| 1124 |
+
const qualityLeaders = {
|
| 1125 |
+
transparency: [...overall].sort((a, b) => (b.transparency || 0) - (a.transparency || 0))[0],
|
| 1126 |
+
authority: [...overall].sort((a, b) => (b.authority || 0) - (a.authority || 0))[0],
|
| 1127 |
+
accountability: [...overall].sort((a, b) => (b.accountability || 0) - (a.accountability || 0))[0],
|
| 1128 |
+
};
|
| 1129 |
+
|
| 1130 |
+
const groupedByType = new Map();
|
| 1131 |
+
for (const row of byType) {
|
| 1132 |
+
if (!groupedByType.has(row.query_type)) groupedByType.set(row.query_type, []);
|
| 1133 |
+
groupedByType.get(row.query_type).push(row);
|
| 1134 |
+
}
|
| 1135 |
+
|
| 1136 |
+
const typeAverages = Array.from(groupedByType.entries()).map(([queryType, rows]) => {
|
| 1137 |
+
const avg = rows.reduce((sum, row) => sum + (row.weighted_total_content_score || 0), 0) / rows.length;
|
| 1138 |
+
return { queryType, avg };
|
| 1139 |
+
}).sort((a, b) => a.avg - b.avg);
|
| 1140 |
+
|
| 1141 |
+
const hardest = typeAverages[0];
|
| 1142 |
+
const easiest = typeAverages[typeAverages.length - 1];
|
| 1143 |
+
|
| 1144 |
+
return [
|
| 1145 |
+
{
|
| 1146 |
+
title: "Overall source quality remains meaningfully separated across systems",
|
| 1147 |
+
body: `${top.model_name} is the current overall leader with a weighted score of ${formatNumber(top.weighted_total_content_score)}. The spread across the current board suggests that citation quality is not saturated: systems still differ substantially once source relevance, accuracy, transparency, and authority are scored directly.`
|
| 1148 |
+
},
|
| 1149 |
+
{
|
| 1150 |
+
title: "Question type matters, and multi-hop fact synthesis is still the hardest slice",
|
| 1151 |
+
body: `Across the current artifact, ${hardest.queryType} has the lowest average weighted score (${formatNumber(hardest.avg)}), while ${easiest.queryType} is the easiest (${formatNumber(easiest.avg)}). This matches the broader SourceBench framing that harder query types expose source-selection weaknesses even when answers may still look fluent.`
|
| 1152 |
+
},
|
| 1153 |
+
{
|
| 1154 |
+
title: "High search overlap is not the same thing as high source quality",
|
| 1155 |
+
body: bestOverlap
|
| 1156 |
+
? `${bestOverlap.model_name} has the highest visible search overlap at ${formatNumber(bestOverlap.percentage_ge_sources_in_se_sources)}% In SE, but the best overall weighted score still belongs to ${top.model_name}. This mirrors the paper's emphasis that leaderboard quality should not be reduced to overlap with search results alone.`
|
| 1157 |
+
: "The current artifact includes quality metrics beyond simple overlap with search-engine results, which is one of the main design points of SourceBench."
|
| 1158 |
+
},
|
| 1159 |
+
{
|
| 1160 |
+
title: "Dimension scores reveal different strengths behind similar overall rankings",
|
| 1161 |
+
body: `${bestFreshness.model_name} currently leads freshness at ${formatNumber(bestFreshness.freshness)}, while ${qualityLeaders.transparency.model_name}, ${qualityLeaders.authority.model_name}, and ${qualityLeaders.accountability.model_name} lead key trust-related dimensions such as transparency, authority, and accountability. These per-dimension columns make it easier to see why two systems with similar overall scores can still have very different citation profiles.`
|
| 1162 |
+
}
|
| 1163 |
+
];
|
| 1164 |
+
}
|
| 1165 |
+
|
| 1166 |
+
function renderFindings(payload) {
|
| 1167 |
+
const findings = computeFindings(payload);
|
| 1168 |
+
findingsList.innerHTML = "";
|
| 1169 |
+
for (const finding of findings) {
|
| 1170 |
+
const div = document.createElement("div");
|
| 1171 |
+
div.className = "finding";
|
| 1172 |
+
div.innerHTML = `<strong>${finding.title}</strong><span>${finding.body}</span>`;
|
| 1173 |
+
findingsList.appendChild(div);
|
| 1174 |
+
}
|
| 1175 |
+
}
|
| 1176 |
+
|
| 1177 |
+
function renderDeepSeekStudy(payload) {
|
| 1178 |
+
const rows = (payload.overall || [])
|
| 1179 |
+
.filter((row) => typeof row.model_name === "string" && row.model_name.startsWith("deepseek"));
|
| 1180 |
+
|
| 1181 |
+
if (!rows.length) {
|
| 1182 |
+
deepseekFindings.innerHTML = '<div class="study-item"><strong>No DeepSeek study rows found in the current artifact.</strong></div>';
|
| 1183 |
+
deepseekTableHead.innerHTML = "";
|
| 1184 |
+
deepseekTableBody.innerHTML = "";
|
| 1185 |
+
return;
|
| 1186 |
+
}
|
| 1187 |
+
|
| 1188 |
+
const sorted = [...rows].sort((a, b) => b.weighted_total_content_score - a.weighted_total_content_score);
|
| 1189 |
+
const best = sorted[0];
|
| 1190 |
+
const genseeRows = sorted.filter((row) => row.model_name.includes("gensee"));
|
| 1191 |
+
const tavilyRows = sorted.filter((row) => row.model_name.includes("tavily"));
|
| 1192 |
+
const avg = (items, key) => items.length ? items.reduce((sum, item) => sum + (item[key] || 0), 0) / items.length : null;
|
| 1193 |
+
const genseeAvg = avg(genseeRows, "weighted_total_content_score");
|
| 1194 |
+
const tavilyAvg = avg(tavilyRows, "weighted_total_content_score");
|
| 1195 |
+
const reasoningRows = sorted.filter((row) => row.model_name.includes("reasoning"));
|
| 1196 |
+
const chatRows = sorted.filter((row) => row.model_name.includes("chat-"));
|
| 1197 |
+
const reasoningAvg = avg(reasoningRows, "weighted_total_content_score");
|
| 1198 |
+
const chatAvg = avg(chatRows, "weighted_total_content_score");
|
| 1199 |
+
|
| 1200 |
+
deepseekFindings.innerHTML = `
|
| 1201 |
+
<div class="study-item"><strong>Best DeepSeek variant in the current artifact: ${best.model_name}</strong> with a weighted score of ${formatNumber(best.weighted_total_content_score)} and % In SE of ${formatNumber(best.percentage_ge_sources_in_se_sources)}.</div>
|
| 1202 |
+
<div class="study-item"><strong>Backend choice changes citation quality materially.</strong> The Gensee-backed variants average ${formatNumber(genseeAvg)} weighted score, while the Tavily-backed variants average ${formatNumber(tavilyAvg)}.</div>
|
| 1203 |
+
<div class="study-item"><strong>Reasoning mode does not dominate by itself.</strong> In this artifact, reasoning variants average ${formatNumber(reasoningAvg)} weighted score versus ${formatNumber(chatAvg)} for chat variants, suggesting that retrieval setup and source selection quality still matter directly.</div>
|
| 1204 |
+
`;
|
| 1205 |
+
|
| 1206 |
+
const cols = ["model_name", "weighted_total_content_score", "unweighted_mean_score", "percentage_ge_sources_in_se_sources"];
|
| 1207 |
+
deepseekTableHead.innerHTML = `<tr>${cols.map((key) => `<th>${labels[key] || key}</th>`).join("")}</tr>`;
|
| 1208 |
+
deepseekTableBody.innerHTML = sorted.map((row) => `
|
| 1209 |
+
<tr>
|
| 1210 |
+
${cols.map((key) => `<td>${typeof row[key] === "number" ? formatNumber(row[key]) : (row[key] ?? "-")}</td>`).join("")}
|
| 1211 |
+
</tr>
|
| 1212 |
+
`).join("");
|
| 1213 |
+
}
|
| 1214 |
+
|
| 1215 |
+
function getRows() {
|
| 1216 |
+
if (!state.payload) return [];
|
| 1217 |
+
let rows = state.currentView === "overall"
|
| 1218 |
+
? [...(state.payload.overall || [])]
|
| 1219 |
+
: [...(state.payload.by_query_type || [])];
|
| 1220 |
+
|
| 1221 |
+
const q = searchInput.value.trim().toLowerCase();
|
| 1222 |
+
if (q) {
|
| 1223 |
+
rows = rows.filter((row) => String(row.model_name || "").toLowerCase().includes(q));
|
| 1224 |
+
}
|
| 1225 |
+
|
| 1226 |
+
const metric = sortKey.value;
|
| 1227 |
+
rows.sort((a, b) => {
|
| 1228 |
+
const av = Number(a[metric]);
|
| 1229 |
+
const bv = Number(b[metric]);
|
| 1230 |
+
const aa = Number.isNaN(av) ? -Infinity : av;
|
| 1231 |
+
const bb = Number.isNaN(bv) ? -Infinity : bv;
|
| 1232 |
+
if (bb !== aa) return bb - aa;
|
| 1233 |
+
return String(a.model_name || "").localeCompare(String(b.model_name || ""));
|
| 1234 |
+
});
|
| 1235 |
+
return rows;
|
| 1236 |
+
}
|
| 1237 |
+
|
| 1238 |
+
function renderTable() {
|
| 1239 |
+
const rows = getRows();
|
| 1240 |
+
const cols = [
|
| 1241 |
+
...(state.currentView === "overall" ? baseColumns.filter((key) => key !== "query_type") : baseColumns),
|
| 1242 |
+
...(state.showDimensions ? dimensionColumns : []),
|
| 1243 |
+
];
|
| 1244 |
+
|
| 1245 |
+
tableTitle.textContent = state.currentView === "overall" ? "Overall ranking" : "Ranking by query type";
|
| 1246 |
+
tableCount.textContent = `${rows.length} rows`;
|
| 1247 |
+
tableHead.innerHTML = `<tr>${cols.map((key) => `<th>${labels[key] || key}</th>`).join("")}</tr>`;
|
| 1248 |
+
tableBody.innerHTML = rows.map((row) => `
|
| 1249 |
+
<tr>
|
| 1250 |
+
${cols.map((key) => {
|
| 1251 |
+
const val = row[key];
|
| 1252 |
+
const rendered = typeof val === "number" ? formatNumber(val) : (val ?? "-");
|
| 1253 |
+
return `<td>${rendered}</td>`;
|
| 1254 |
+
}).join("")}
|
| 1255 |
+
</tr>
|
| 1256 |
+
`).join("");
|
| 1257 |
+
}
|
| 1258 |
+
|
| 1259 |
+
function applyPayload(payload) {
|
| 1260 |
+
state.payload = payload;
|
| 1261 |
+
updateTopStats(payload);
|
| 1262 |
+
renderFindings(payload);
|
| 1263 |
+
renderDeepSeekStudy(payload);
|
| 1264 |
+
renderTable();
|
| 1265 |
+
setLoadStatus("Leaderboard data loaded.");
|
| 1266 |
+
}
|
| 1267 |
+
|
| 1268 |
+
async function autoLoad() {
|
| 1269 |
+
if (window.SOURCEBENCH_PAYLOAD) {
|
| 1270 |
+
applyPayload(window.SOURCEBENCH_PAYLOAD);
|
| 1271 |
+
return;
|
| 1272 |
+
}
|
| 1273 |
+
try {
|
| 1274 |
+
const response = await fetch("./leaderboard_data.json", { cache: "no-store" });
|
| 1275 |
+
if (!response.ok) throw new Error("No local leaderboard_data.json found next to index.html");
|
| 1276 |
+
const payload = await response.json();
|
| 1277 |
+
applyPayload(payload);
|
| 1278 |
+
} catch (error) {
|
| 1279 |
+
console.error(error);
|
| 1280 |
+
setLoadStatus("Could not load leaderboard data.", true);
|
| 1281 |
+
}
|
| 1282 |
+
}
|
| 1283 |
+
|
| 1284 |
+
for (const tab of document.querySelectorAll(".tab")) {
|
| 1285 |
+
tab.addEventListener("click", () => {
|
| 1286 |
+
state.currentView = tab.dataset.view;
|
| 1287 |
+
viewSelect.value = state.currentView;
|
| 1288 |
+
document.querySelectorAll(".tab").forEach((node) => node.classList.toggle("active", node === tab));
|
| 1289 |
+
renderTable();
|
| 1290 |
+
});
|
| 1291 |
+
}
|
| 1292 |
+
|
| 1293 |
+
viewSelect.addEventListener("change", (event) => {
|
| 1294 |
+
state.currentView = event.target.value;
|
| 1295 |
+
document.querySelectorAll(".tab").forEach((node) => node.classList.toggle("active", node.dataset.view === state.currentView));
|
| 1296 |
+
renderTable();
|
| 1297 |
+
});
|
| 1298 |
+
sortKey.addEventListener("change", renderTable);
|
| 1299 |
+
searchInput.addEventListener("input", renderTable);
|
| 1300 |
+
showDimensionsInput.addEventListener("change", (event) => {
|
| 1301 |
+
state.showDimensions = event.target.checked;
|
| 1302 |
+
renderTable();
|
| 1303 |
+
});
|
| 1304 |
+
|
| 1305 |
+
autoLoad();
|
| 1306 |
+
</script>
|
| 1307 |
+
</body>
|
| 1308 |
+
</html>
|
leaderboard_data.js
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
leaderboard_data.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
wuklab_logo.png
ADDED
|