Upload folder using huggingface_hub
Browse files- .gitattributes +3 -0
- README.md +13 -5
- assets/failure_analysis.png +3 -0
- assets/games_overview_updated.png +3 -0
- assets/tier_overview.png +3 -0
- css/leaderboard.css +175 -0
- css/main.css +328 -0
- css/viewer.css +177 -0
- index.html +295 -18
- js/app.js +31 -0
- js/leaderboard-data.js +50 -0
- js/leaderboard.js +281 -0
- js/viewer.js +263 -0
- manifest.json +0 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
assets/failure_analysis.png filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
assets/games_overview_updated.png filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
assets/tier_overview.png filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
|
@@ -1,10 +1,18 @@
|
|
| 1 |
---
|
| 2 |
-
title: LudoBench
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: static
|
| 7 |
pinned: false
|
|
|
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: "LudoBench: Board Game Reasoning Benchmark"
|
| 3 |
+
emoji: "\U0001F3B2"
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: purple
|
| 6 |
sdk: static
|
| 7 |
pinned: false
|
| 8 |
+
license: mit
|
| 9 |
---
|
| 10 |
|
| 11 |
+
# LudoBench
|
| 12 |
+
|
| 13 |
+
A multimodal board-game reasoning benchmark evaluating LLM/VLM reasoning across 5 strategy games and 3 difficulty tiers.
|
| 14 |
+
|
| 15 |
+
- 638 annotated QA pairs
|
| 16 |
+
- 5 games: Kingdomino, Res Arcana, Pax Renaissance, Carcassonne, Catan
|
| 17 |
+
- 3 tiers: Environment Perception, Rules Integration, Short-Horizon Optimization
|
| 18 |
+
- 9 models benchmarked across 3 modalities (None, Text, Image)
|
assets/failure_analysis.png
ADDED
|
Git LFS Details
|
assets/games_overview_updated.png
ADDED
|
Git LFS Details
|
assets/tier_overview.png
ADDED
|
Git LFS Details
|
css/leaderboard.css
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/* ---- Filter controls ---- */
|
| 2 |
+
#leaderboard-filters {
|
| 3 |
+
display: flex;
|
| 4 |
+
flex-wrap: wrap;
|
| 5 |
+
gap: 16px;
|
| 6 |
+
margin-bottom: 20px;
|
| 7 |
+
}
|
| 8 |
+
|
| 9 |
+
.filter-group {
|
| 10 |
+
display: flex;
|
| 11 |
+
align-items: center;
|
| 12 |
+
gap: 6px;
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
.filter-label {
|
| 16 |
+
font-size: 0.85rem;
|
| 17 |
+
color: #666;
|
| 18 |
+
font-weight: 600;
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
.filter-buttons {
|
| 22 |
+
display: flex;
|
| 23 |
+
gap: 4px;
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
.filter-btn {
|
| 27 |
+
background: #f0f2f5;
|
| 28 |
+
border: 1px solid #ccc;
|
| 29 |
+
color: #555;
|
| 30 |
+
font-size: 0.8rem;
|
| 31 |
+
padding: 5px 12px;
|
| 32 |
+
border-radius: 14px;
|
| 33 |
+
cursor: pointer;
|
| 34 |
+
transition: background 0.12s, color 0.12s;
|
| 35 |
+
width: auto;
|
| 36 |
+
height: auto;
|
| 37 |
+
display: inline-flex;
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
.filter-btn:hover { background: #e4e6ea; color: #333; }
|
| 41 |
+
.filter-btn.active { background: #2563b1; color: #fff; border-color: #2563b1; }
|
| 42 |
+
|
| 43 |
+
/* ---- Table wrapper ---- */
|
| 44 |
+
#leaderboard-table-wrap {
|
| 45 |
+
overflow-x: auto;
|
| 46 |
+
border: 1px solid #ddd;
|
| 47 |
+
border-radius: 8px;
|
| 48 |
+
background: #fff;
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
/* ---- Table ---- */
|
| 52 |
+
#leaderboard-table {
|
| 53 |
+
width: 100%;
|
| 54 |
+
border-collapse: collapse;
|
| 55 |
+
font-size: 0.85rem;
|
| 56 |
+
white-space: nowrap;
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
#leaderboard-table th,
|
| 60 |
+
#leaderboard-table td {
|
| 61 |
+
padding: 8px 12px;
|
| 62 |
+
text-align: center;
|
| 63 |
+
border-bottom: 1px solid #eee;
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
/* Game group header row */
|
| 67 |
+
#leaderboard-table thead tr.game-header th {
|
| 68 |
+
background: #f5f6f8;
|
| 69 |
+
color: #333;
|
| 70 |
+
font-size: 0.82rem;
|
| 71 |
+
font-weight: 700;
|
| 72 |
+
border-bottom: 2px solid #ddd;
|
| 73 |
+
border-left: 1px solid #ddd;
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
#leaderboard-table thead tr.game-header th:first-child {
|
| 77 |
+
border-left: none;
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
/* Sub-header row (modalities) */
|
| 81 |
+
#leaderboard-table thead tr.mod-header th {
|
| 82 |
+
background: #fafbfc;
|
| 83 |
+
color: #666;
|
| 84 |
+
font-size: 0.78rem;
|
| 85 |
+
font-weight: 500;
|
| 86 |
+
cursor: pointer;
|
| 87 |
+
user-select: none;
|
| 88 |
+
border-left: 1px solid #eee;
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
#leaderboard-table thead tr.mod-header th:hover {
|
| 92 |
+
color: #222;
|
| 93 |
+
background: #f0f1f3;
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
.sort-arrow {
|
| 97 |
+
font-size: 0.7rem;
|
| 98 |
+
margin-left: 3px;
|
| 99 |
+
color: #bbb;
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
.sort-arrow.active { color: #2563b1; }
|
| 103 |
+
|
| 104 |
+
/* Model name column */
|
| 105 |
+
#leaderboard-table thead tr.mod-header th:first-child {
|
| 106 |
+
text-align: left;
|
| 107 |
+
cursor: default;
|
| 108 |
+
border-left: none;
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
#leaderboard-table td.model-cell {
|
| 112 |
+
text-align: left;
|
| 113 |
+
font-weight: 600;
|
| 114 |
+
color: #1a1a1a;
|
| 115 |
+
background: #fafbfc;
|
| 116 |
+
position: sticky;
|
| 117 |
+
left: 0;
|
| 118 |
+
z-index: 1;
|
| 119 |
+
border-right: 1px solid #ddd;
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
/* Tier label within model cell */
|
| 123 |
+
.tier-tag {
|
| 124 |
+
font-size: 0.72rem;
|
| 125 |
+
color: #999;
|
| 126 |
+
font-weight: 400;
|
| 127 |
+
margin-left: 6px;
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
/* Heatmap cell */
|
| 131 |
+
#leaderboard-table td.score-cell {
|
| 132 |
+
font-variant-numeric: tabular-nums;
|
| 133 |
+
font-weight: 500;
|
| 134 |
+
border-left: 1px solid #f0f0f0;
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
#leaderboard-table td.score-cell.null-cell {
|
| 138 |
+
color: #ccc;
|
| 139 |
+
background: #fafafa;
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
/* Average column */
|
| 143 |
+
#leaderboard-table th.avg-col,
|
| 144 |
+
#leaderboard-table td.avg-col {
|
| 145 |
+
border-right: 2px solid #ccc;
|
| 146 |
+
font-weight: 700;
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
/* Tier group separator row */
|
| 150 |
+
#leaderboard-table tbody tr.tier-separator td {
|
| 151 |
+
background: #e8edf3;
|
| 152 |
+
color: #2563b1;
|
| 153 |
+
font-weight: 700;
|
| 154 |
+
font-size: 0.82rem;
|
| 155 |
+
text-align: left;
|
| 156 |
+
padding: 6px 12px;
|
| 157 |
+
border-bottom: 2px solid #c0cfe0;
|
| 158 |
+
border-top: 1px solid #c0cfe0;
|
| 159 |
+
letter-spacing: 0.02em;
|
| 160 |
+
}
|
| 161 |
+
|
| 162 |
+
#leaderboard-table tbody tr.tier-separator:first-child td {
|
| 163 |
+
border-top: none;
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
/* Row hover */
|
| 167 |
+
#leaderboard-table tbody tr:hover:not(.tier-separator) td {
|
| 168 |
+
filter: brightness(0.96);
|
| 169 |
+
}
|
| 170 |
+
|
| 171 |
+
/* ---- Responsive ---- */
|
| 172 |
+
@media (max-width: 700px) {
|
| 173 |
+
#leaderboard-filters { gap: 10px; }
|
| 174 |
+
.filter-buttons { flex-wrap: wrap; }
|
| 175 |
+
}
|
css/main.css
ADDED
|
@@ -0,0 +1,328 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/* Global reset */
|
| 2 |
+
* { box-sizing: border-box; }
|
| 3 |
+
html { font-size: 16px; }
|
| 4 |
+
|
| 5 |
+
body {
|
| 6 |
+
margin: 0;
|
| 7 |
+
padding: 0;
|
| 8 |
+
background: #f8f9fa;
|
| 9 |
+
color: #1a1a1a;
|
| 10 |
+
font-family: system-ui, -apple-system, BlinkMacSystemFont, sans-serif;
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
/* ---- Navigation ---- */
|
| 14 |
+
#topNav {
|
| 15 |
+
position: sticky;
|
| 16 |
+
top: 0;
|
| 17 |
+
z-index: 100;
|
| 18 |
+
display: flex;
|
| 19 |
+
align-items: center;
|
| 20 |
+
justify-content: center;
|
| 21 |
+
gap: 8px;
|
| 22 |
+
padding: 0 24px;
|
| 23 |
+
height: 54px;
|
| 24 |
+
background: #fff;
|
| 25 |
+
border-bottom: 1px solid #ddd;
|
| 26 |
+
box-shadow: 0 1px 3px rgba(0,0,0,0.06);
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
.nav-brand {
|
| 30 |
+
font-size: 1.25rem;
|
| 31 |
+
font-weight: 700;
|
| 32 |
+
color: #1a1a1a;
|
| 33 |
+
margin-right: 12px;
|
| 34 |
+
white-space: nowrap;
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
.nav-tabs {
|
| 38 |
+
display: flex;
|
| 39 |
+
gap: 4px;
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
.tab-btn {
|
| 43 |
+
background: transparent;
|
| 44 |
+
border: none;
|
| 45 |
+
color: #666;
|
| 46 |
+
font-size: 0.9rem;
|
| 47 |
+
font-weight: 500;
|
| 48 |
+
padding: 8px 16px;
|
| 49 |
+
border-radius: 6px;
|
| 50 |
+
cursor: pointer;
|
| 51 |
+
transition: background 0.15s, color 0.15s;
|
| 52 |
+
width: auto;
|
| 53 |
+
height: auto;
|
| 54 |
+
display: inline-flex;
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
.tab-btn:hover { background: #f0f0f0; color: #333; }
|
| 58 |
+
.tab-btn.active { background: #e8e8e8; color: #111; }
|
| 59 |
+
|
| 60 |
+
.nav-links {
|
| 61 |
+
display: flex;
|
| 62 |
+
gap: 8px;
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
.nav-badge {
|
| 66 |
+
display: inline-flex;
|
| 67 |
+
align-items: center;
|
| 68 |
+
gap: 5px;
|
| 69 |
+
color: #444;
|
| 70 |
+
text-decoration: none;
|
| 71 |
+
font-family: inherit;
|
| 72 |
+
font-size: 0.82rem;
|
| 73 |
+
font-weight: 500;
|
| 74 |
+
padding: 5px 12px;
|
| 75 |
+
border-radius: 6px;
|
| 76 |
+
border: 1px solid #ddd;
|
| 77 |
+
background: #f8f9fa;
|
| 78 |
+
cursor: pointer;
|
| 79 |
+
transition: background 0.15s, border-color 0.15s;
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
.nav-badge:hover {
|
| 83 |
+
background: #eef0f3;
|
| 84 |
+
border-color: #bbb;
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
.nav-icon {
|
| 88 |
+
height: 16px;
|
| 89 |
+
width: auto;
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
/* ---- Tab panels ---- */
|
| 93 |
+
main { padding: 24px 32px; }
|
| 94 |
+
|
| 95 |
+
.tab-panel { display: none; }
|
| 96 |
+
.tab-panel.active { display: block; }
|
| 97 |
+
|
| 98 |
+
.tab-subtitle {
|
| 99 |
+
color: #666;
|
| 100 |
+
margin-top: 0;
|
| 101 |
+
margin-bottom: 20px;
|
| 102 |
+
font-size: 0.95rem;
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
/* ---- About / Overview page ---- */
|
| 106 |
+
.about-content {
|
| 107 |
+
max-width: 900px;
|
| 108 |
+
margin: 0 auto;
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
.about-content h2 {
|
| 112 |
+
text-align: center;
|
| 113 |
+
margin-top: 0;
|
| 114 |
+
font-size: 1.5rem;
|
| 115 |
+
color: #1a1a1a;
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
.venue-tag {
|
| 119 |
+
font-size: 0.75em;
|
| 120 |
+
font-weight: 600;
|
| 121 |
+
color: #2563b1;
|
| 122 |
+
white-space: nowrap;
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
.about-badges {
|
| 126 |
+
display: flex;
|
| 127 |
+
justify-content: center;
|
| 128 |
+
gap: 12px;
|
| 129 |
+
margin-bottom: 28px;
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
.about-section {
|
| 133 |
+
margin-bottom: 32px;
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
.about-section h3 {
|
| 137 |
+
text-align: center;
|
| 138 |
+
font-size: 1.15rem;
|
| 139 |
+
margin-bottom: 8px;
|
| 140 |
+
color: #333;
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
.about-section p,
|
| 144 |
+
.about-section li {
|
| 145 |
+
line-height: 1.7;
|
| 146 |
+
color: #444;
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
.about-section ul {
|
| 150 |
+
padding-left: 20px;
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
.about-section a {
|
| 154 |
+
color: #2563b1;
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
.tier-cards {
|
| 158 |
+
display: grid;
|
| 159 |
+
grid-template-columns: repeat(auto-fit, minmax(220px, 1fr));
|
| 160 |
+
gap: 12px;
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
.tier-card {
|
| 164 |
+
background: #fff;
|
| 165 |
+
border: 1px solid #ddd;
|
| 166 |
+
border-radius: 8px;
|
| 167 |
+
padding: 16px;
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
.tier-card h4 {
|
| 171 |
+
margin: 0 0 6px;
|
| 172 |
+
font-size: 0.95rem;
|
| 173 |
+
color: #1a1a1a;
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
.tier-card p {
|
| 177 |
+
margin: 0;
|
| 178 |
+
font-size: 0.9rem;
|
| 179 |
+
color: #555;
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
.game-grid {
|
| 183 |
+
display: grid;
|
| 184 |
+
grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
|
| 185 |
+
gap: 10px;
|
| 186 |
+
}
|
| 187 |
+
|
| 188 |
+
.game-card {
|
| 189 |
+
background: #fff;
|
| 190 |
+
border: 1px solid #ddd;
|
| 191 |
+
border-radius: 8px;
|
| 192 |
+
padding: 14px;
|
| 193 |
+
}
|
| 194 |
+
|
| 195 |
+
.game-card strong { color: #1a1a1a; }
|
| 196 |
+
|
| 197 |
+
.game-card p {
|
| 198 |
+
margin: 4px 0 0;
|
| 199 |
+
font-size: 0.85rem;
|
| 200 |
+
color: #555;
|
| 201 |
+
}
|
| 202 |
+
|
| 203 |
+
.overview-img {
|
| 204 |
+
display: block;
|
| 205 |
+
width: 100%;
|
| 206 |
+
max-width: 900px;
|
| 207 |
+
height: auto;
|
| 208 |
+
border-radius: 8px;
|
| 209 |
+
border: 1px solid #ddd;
|
| 210 |
+
margin: 12px auto;
|
| 211 |
+
}
|
| 212 |
+
|
| 213 |
+
.overview-table {
|
| 214 |
+
width: 100%;
|
| 215 |
+
border-collapse: collapse;
|
| 216 |
+
font-size: 0.9rem;
|
| 217 |
+
margin: 12px auto;
|
| 218 |
+
}
|
| 219 |
+
|
| 220 |
+
.overview-table th,
|
| 221 |
+
.overview-table td {
|
| 222 |
+
padding: 8px 12px;
|
| 223 |
+
border: 1px solid #ddd;
|
| 224 |
+
text-align: left;
|
| 225 |
+
}
|
| 226 |
+
|
| 227 |
+
.overview-table th {
|
| 228 |
+
background: #f0f2f5;
|
| 229 |
+
font-weight: 600;
|
| 230 |
+
color: #333;
|
| 231 |
+
}
|
| 232 |
+
|
| 233 |
+
.overview-table td {
|
| 234 |
+
color: #444;
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
.citation-block {
|
| 238 |
+
background: #f0f2f5;
|
| 239 |
+
padding: 16px;
|
| 240 |
+
border-radius: 8px;
|
| 241 |
+
border: 1px solid #ddd;
|
| 242 |
+
font-size: 0.85rem;
|
| 243 |
+
color: #333;
|
| 244 |
+
overflow-x: auto;
|
| 245 |
+
white-space: pre;
|
| 246 |
+
}
|
| 247 |
+
|
| 248 |
+
/* ---- BibTeX button & modal ---- */
|
| 249 |
+
.bibtex-modal {
|
| 250 |
+
display: none;
|
| 251 |
+
position: fixed;
|
| 252 |
+
top: 0; left: 0; right: 0; bottom: 0;
|
| 253 |
+
background: rgba(0,0,0,0.4);
|
| 254 |
+
z-index: 200;
|
| 255 |
+
justify-content: center;
|
| 256 |
+
align-items: center;
|
| 257 |
+
}
|
| 258 |
+
|
| 259 |
+
.bibtex-modal.open {
|
| 260 |
+
display: flex;
|
| 261 |
+
}
|
| 262 |
+
|
| 263 |
+
.bibtex-modal-content {
|
| 264 |
+
background: #fff;
|
| 265 |
+
border-radius: 10px;
|
| 266 |
+
box-shadow: 0 8px 32px rgba(0,0,0,0.18);
|
| 267 |
+
padding: 24px;
|
| 268 |
+
max-width: 640px;
|
| 269 |
+
width: 90%;
|
| 270 |
+
}
|
| 271 |
+
|
| 272 |
+
.bibtex-modal-header {
|
| 273 |
+
display: flex;
|
| 274 |
+
justify-content: space-between;
|
| 275 |
+
align-items: center;
|
| 276 |
+
margin-bottom: 12px;
|
| 277 |
+
font-weight: 600;
|
| 278 |
+
font-size: 1rem;
|
| 279 |
+
color: #1a1a1a;
|
| 280 |
+
}
|
| 281 |
+
|
| 282 |
+
.bibtex-close {
|
| 283 |
+
background: none;
|
| 284 |
+
border: none;
|
| 285 |
+
font-size: 1.5rem;
|
| 286 |
+
cursor: pointer;
|
| 287 |
+
color: #666;
|
| 288 |
+
padding: 0 4px;
|
| 289 |
+
line-height: 1;
|
| 290 |
+
}
|
| 291 |
+
|
| 292 |
+
.bibtex-close:hover { color: #111; }
|
| 293 |
+
|
| 294 |
+
.bibtex-code {
|
| 295 |
+
background: #f0f2f5;
|
| 296 |
+
padding: 16px;
|
| 297 |
+
border-radius: 8px;
|
| 298 |
+
border: 1px solid #ddd;
|
| 299 |
+
font-size: 0.82rem;
|
| 300 |
+
color: #333;
|
| 301 |
+
overflow-x: auto;
|
| 302 |
+
white-space: pre;
|
| 303 |
+
margin: 0 0 12px;
|
| 304 |
+
}
|
| 305 |
+
|
| 306 |
+
.bibtex-copy-btn {
|
| 307 |
+
display: block;
|
| 308 |
+
margin: 0 auto;
|
| 309 |
+
padding: 8px 20px;
|
| 310 |
+
background: #2563b1;
|
| 311 |
+
color: #fff;
|
| 312 |
+
border: none;
|
| 313 |
+
border-radius: 6px;
|
| 314 |
+
font-size: 0.88rem;
|
| 315 |
+
font-weight: 500;
|
| 316 |
+
cursor: pointer;
|
| 317 |
+
transition: background 0.15s;
|
| 318 |
+
}
|
| 319 |
+
|
| 320 |
+
.bibtex-copy-btn:hover { background: #1d4f91; }
|
| 321 |
+
|
| 322 |
+
/* ---- Responsive ---- */
|
| 323 |
+
@media (max-width: 700px) {
|
| 324 |
+
#topNav { flex-wrap: wrap; height: auto; padding: 10px 16px; }
|
| 325 |
+
.nav-tabs { width: 100%; justify-content: center; }
|
| 326 |
+
.nav-links { width: 100%; justify-content: center; }
|
| 327 |
+
main { padding: 16px; }
|
| 328 |
+
}
|
css/viewer.css
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/* Dataset browser styles — light theme */
|
| 2 |
+
|
| 3 |
+
/* Top controls */
|
| 4 |
+
#tab-browser #controls {
|
| 5 |
+
margin-bottom: 20px;
|
| 6 |
+
display: flex;
|
| 7 |
+
flex-wrap: wrap;
|
| 8 |
+
gap: 10px;
|
| 9 |
+
align-items: center;
|
| 10 |
+
}
|
| 11 |
+
|
| 12 |
+
#tab-browser label {
|
| 13 |
+
font-size: 1rem;
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
#tab-browser select,
|
| 17 |
+
#tab-browser input {
|
| 18 |
+
font-size: 1rem;
|
| 19 |
+
background: #fff;
|
| 20 |
+
color: #1a1a1a;
|
| 21 |
+
border-radius: 4px;
|
| 22 |
+
border: 1px solid #ccc;
|
| 23 |
+
padding: 8px 12px;
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
#navButtons {
|
| 27 |
+
display: flex;
|
| 28 |
+
gap: 10px;
|
| 29 |
+
align-items: center;
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
#tab-browser button {
|
| 33 |
+
font-size: 1rem;
|
| 34 |
+
width: 100px;
|
| 35 |
+
height: 42px;
|
| 36 |
+
display: flex;
|
| 37 |
+
align-items: center;
|
| 38 |
+
justify-content: center;
|
| 39 |
+
gap: 6px;
|
| 40 |
+
background: #e8e8e8;
|
| 41 |
+
color: #333;
|
| 42 |
+
border: 1px solid #ccc;
|
| 43 |
+
border-radius: 4px;
|
| 44 |
+
padding: 8px 12px;
|
| 45 |
+
cursor: pointer;
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
#tab-browser button:hover { background: #ddd; }
|
| 49 |
+
|
| 50 |
+
/* Layout (2 columns) */
|
| 51 |
+
#layout {
|
| 52 |
+
display: grid;
|
| 53 |
+
grid-template-columns: minmax(0, 1.4fr) minmax(0, 1fr);
|
| 54 |
+
gap: 15px;
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
/* Question section */
|
| 58 |
+
#questionCard {
|
| 59 |
+
font-size: 1.2rem;
|
| 60 |
+
background: #fff;
|
| 61 |
+
padding: 16px;
|
| 62 |
+
border-radius: 8px;
|
| 63 |
+
border: 1px solid #ddd;
|
| 64 |
+
margin-bottom: 12px;
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
.id-tag {
|
| 68 |
+
font-size: 0.8em;
|
| 69 |
+
color: #888;
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
.question-label {
|
| 73 |
+
font-weight: bold;
|
| 74 |
+
color: #c0392b;
|
| 75 |
+
font-size: 1.2rem;
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
.question-text {
|
| 79 |
+
font-size: 1.2rem;
|
| 80 |
+
line-height: 1.8;
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
/* Answer block */
|
| 84 |
+
#answerBlock {
|
| 85 |
+
background: #fff;
|
| 86 |
+
padding: 16px;
|
| 87 |
+
border-radius: 8px;
|
| 88 |
+
border: 1px solid #ddd;
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
.answer-info { margin-top: 8px; }
|
| 92 |
+
.answer-info.correct { color: #27ae60; }
|
| 93 |
+
.answer-info.wrong { color: #c0392b; }
|
| 94 |
+
|
| 95 |
+
details { margin-top: 10px; }
|
| 96 |
+
|
| 97 |
+
/* Right side image container */
|
| 98 |
+
#imageContainer {
|
| 99 |
+
background: #fff;
|
| 100 |
+
padding: 16px;
|
| 101 |
+
border-radius: 8px;
|
| 102 |
+
border: 1px solid #ddd;
|
| 103 |
+
max-height: calc(100vh - 200px);
|
| 104 |
+
overflow-y: auto;
|
| 105 |
+
overflow-x: hidden;
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
#imageContainer.hidden { display: none; }
|
| 109 |
+
|
| 110 |
+
/* Multi-image layout */
|
| 111 |
+
#multiImages {
|
| 112 |
+
display: flex;
|
| 113 |
+
flex-direction: column;
|
| 114 |
+
gap: 18px;
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
.multi-img-block {
|
| 118 |
+
background: #f8f9fa;
|
| 119 |
+
padding: 12px;
|
| 120 |
+
border-radius: 8px;
|
| 121 |
+
border: 1px solid #ddd;
|
| 122 |
+
display: flex;
|
| 123 |
+
flex-direction: column;
|
| 124 |
+
align-items: center;
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
.multi-img-block img {
|
| 128 |
+
width: 900px;
|
| 129 |
+
max-width: 100%;
|
| 130 |
+
height: auto;
|
| 131 |
+
object-fit: contain;
|
| 132 |
+
border-radius: 10px;
|
| 133 |
+
border: 1px solid #ccc;
|
| 134 |
+
display: block;
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
.multi-img-caption {
|
| 138 |
+
margin-top: 6px;
|
| 139 |
+
font-size: 12px;
|
| 140 |
+
color: #888;
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
/* Spinner */
|
| 144 |
+
.spinner {
|
| 145 |
+
margin: 10px auto;
|
| 146 |
+
width: 38px;
|
| 147 |
+
height: 38px;
|
| 148 |
+
border: 4px solid #ddd;
|
| 149 |
+
border-top-color: #2563b1;
|
| 150 |
+
border-radius: 50%;
|
| 151 |
+
animation: spin 0.8s linear infinite;
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
@keyframes spin { to { transform: rotate(360deg); } }
|
| 155 |
+
|
| 156 |
+
/* Preformatted JSON */
|
| 157 |
+
#tab-browser pre {
|
| 158 |
+
background: #f0f2f5;
|
| 159 |
+
padding: 15px;
|
| 160 |
+
border-radius: 8px;
|
| 161 |
+
overflow-x: auto;
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
.full-img-link {
|
| 165 |
+
margin-top: 4px;
|
| 166 |
+
font-size: 12px;
|
| 167 |
+
color: #2563b1;
|
| 168 |
+
text-decoration: none;
|
| 169 |
+
}
|
| 170 |
+
.full-img-link:hover { text-decoration: underline; }
|
| 171 |
+
|
| 172 |
+
/* Responsive */
|
| 173 |
+
@media (max-width: 700px) {
|
| 174 |
+
#layout {
|
| 175 |
+
grid-template-columns: 1fr;
|
| 176 |
+
}
|
| 177 |
+
}
|
index.html
CHANGED
|
@@ -1,19 +1,296 @@
|
|
| 1 |
-
<!
|
| 2 |
-
<html>
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
</html>
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="utf-8" />
|
| 5 |
+
<title>LudoBench: Board Game Reasoning Benchmark</title>
|
| 6 |
+
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
| 7 |
+
<link rel="stylesheet" href="css/main.css" />
|
| 8 |
+
<link rel="stylesheet" href="css/leaderboard.css" />
|
| 9 |
+
<link rel="stylesheet" href="css/viewer.css" />
|
| 10 |
+
</head>
|
| 11 |
+
<body>
|
| 12 |
+
|
| 13 |
+
<nav id="topNav">
|
| 14 |
+
<div class="nav-brand">LudoBench</div>
|
| 15 |
+
<div class="nav-tabs">
|
| 16 |
+
<button class="tab-btn active" data-tab="about">LudoBench Overview</button>
|
| 17 |
+
<button class="tab-btn" data-tab="leaderboard">Leaderboard</button>
|
| 18 |
+
<button class="tab-btn" data-tab="browser">Dataset Browser</button>
|
| 19 |
+
</div>
|
| 20 |
+
<div class="nav-links">
|
| 21 |
+
<a href="https://openreview.net/forum?id=TOgQ00DEek" target="_blank" class="nav-badge">
|
| 22 |
+
<img src="https://upload.wikimedia.org/wikipedia/commons/b/bc/ArXiv_logo_2022.svg" alt="arXiv" class="nav-icon" />
|
| 23 |
+
Paper
|
| 24 |
+
</a>
|
| 25 |
+
<a href="https://huggingface.co/datasets/launch/LudoBench" target="_blank" class="nav-badge">
|
| 26 |
+
<img src="https://huggingface.co/front/assets/huggingface_logo.svg" alt="HF" class="nav-icon" />
|
| 27 |
+
Dataset
|
| 28 |
+
</a>
|
| 29 |
+
<a href="https://github.com/jpeper/LudoBench" target="_blank" class="nav-badge">
|
| 30 |
+
<svg class="nav-icon" viewBox="0 0 16 16" fill="currentColor"><path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"/></svg>
|
| 31 |
+
GitHub
|
| 32 |
+
</a>
|
| 33 |
+
<button class="nav-badge bibtex-btn" onclick="toggleBibtex()">
|
| 34 |
+
<svg class="nav-icon" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M14 2H6a2 2 0 00-2 2v16a2 2 0 002 2h12a2 2 0 002-2V8z"/><polyline points="14 2 14 8 20 8"/><line x1="16" y1="13" x2="8" y2="13"/><line x1="16" y1="17" x2="8" y2="17"/><polyline points="10 9 9 9 8 9"/></svg>
|
| 35 |
+
BibTeX
|
| 36 |
+
</button>
|
| 37 |
+
</div>
|
| 38 |
+
</nav>
|
| 39 |
+
|
| 40 |
+
<!-- BibTeX Modal -->
|
| 41 |
+
<div id="bibtexModal" class="bibtex-modal" onclick="if(event.target===this)toggleBibtex()">
|
| 42 |
+
<div class="bibtex-modal-content">
|
| 43 |
+
<div class="bibtex-modal-header">
|
| 44 |
+
<span>BibTeX Citation</span>
|
| 45 |
+
<button class="bibtex-close" onclick="toggleBibtex()">×</button>
|
| 46 |
+
</div>
|
| 47 |
+
<pre id="bibtexCode" class="bibtex-code">@inproceedings{peper2026ludobench,
|
| 48 |
+
title={{LLMs} as Rules Oracles: Exploring Real-World Multimodal Reasoning in Tabletop Strategy Game Environments},
|
| 49 |
+
author={Peper, Joseph J. and Gandra, Sai Krishna and Zhang, Yunxiang and Chennareddy, Vaibhav and Jha, Shloki and Payani, Ali and Wang, Lu},
|
| 50 |
+
booktitle={Proceedings of the Fourteenth International Conference on Learning Representations (ICLR)},
|
| 51 |
+
year={2026},
|
| 52 |
+
address={Rio de Janeiro, Brazil}
|
| 53 |
+
}</pre>
|
| 54 |
+
<button class="bibtex-copy-btn" onclick="copyBibtex()">Copy to Clipboard</button>
|
| 55 |
+
</div>
|
| 56 |
+
</div>
|
| 57 |
+
|
| 58 |
+
<main>
|
| 59 |
+
|
| 60 |
+
<!-- ==================== LEADERBOARD TAB ==================== -->
|
| 61 |
+
<section id="tab-leaderboard" class="tab-panel">
|
| 62 |
+
<h2>Leaderboard</h2>
|
| 63 |
+
<p class="tab-subtitle">
|
| 64 |
+
Accuracy of multimodal models across <strong>5 board games</strong>,
|
| 65 |
+
<strong>3 reasoning tiers</strong>, and <strong>3 rulebook modalities</strong>.
|
| 66 |
+
Click any column header to sort.
|
| 67 |
+
</p>
|
| 68 |
+
|
| 69 |
+
<div id="leaderboard-filters">
|
| 70 |
+
<div class="filter-group">
|
| 71 |
+
<span class="filter-label">Tier:</span>
|
| 72 |
+
<div class="filter-buttons" data-filter="tier">
|
| 73 |
+
<button class="filter-btn active" data-value="all">All</button>
|
| 74 |
+
<button class="filter-btn" data-value="T1">T1</button>
|
| 75 |
+
<button class="filter-btn" data-value="T2">T2</button>
|
| 76 |
+
<button class="filter-btn" data-value="T3">T3</button>
|
| 77 |
+
</div>
|
| 78 |
+
</div>
|
| 79 |
+
<div class="filter-group">
|
| 80 |
+
<span class="filter-label">Game:</span>
|
| 81 |
+
<div class="filter-buttons" data-filter="game">
|
| 82 |
+
<button class="filter-btn active" data-value="all">All</button>
|
| 83 |
+
<button class="filter-btn" data-value="KingD">Kingdomino</button>
|
| 84 |
+
<button class="filter-btn" data-value="Res Arcana">Res Arcana</button>
|
| 85 |
+
<button class="filter-btn" data-value="Pax Ren.">Pax Ren.</button>
|
| 86 |
+
<button class="filter-btn" data-value="Carca.">Carcassonne</button>
|
| 87 |
+
<button class="filter-btn" data-value="Catan">Catan</button>
|
| 88 |
+
</div>
|
| 89 |
+
</div>
|
| 90 |
+
<div class="filter-group">
|
| 91 |
+
<span class="filter-label">Rules Modality:</span>
|
| 92 |
+
<div class="filter-buttons" data-filter="modality">
|
| 93 |
+
<button class="filter-btn active" data-value="all">All</button>
|
| 94 |
+
<button class="filter-btn" data-value="None">None</button>
|
| 95 |
+
<button class="filter-btn" data-value="Text">Text</button>
|
| 96 |
+
<button class="filter-btn" data-value="Image">Image</button>
|
| 97 |
+
</div>
|
| 98 |
+
</div>
|
| 99 |
+
</div>
|
| 100 |
+
|
| 101 |
+
<div id="leaderboard-table-wrap">
|
| 102 |
+
<table id="leaderboard-table">
|
| 103 |
+
<thead id="leaderboard-thead"></thead>
|
| 104 |
+
<tbody id="leaderboard-tbody"></tbody>
|
| 105 |
+
</table>
|
| 106 |
+
</div>
|
| 107 |
+
</section>
|
| 108 |
+
|
| 109 |
+
<!-- ==================== DATASET BROWSER TAB ==================== -->
|
| 110 |
+
<section id="tab-browser" class="tab-panel">
|
| 111 |
+
<h2>Dataset Browser</h2>
|
| 112 |
+
<p class="tab-subtitle">Browse 638 annotated QA examples across 5 games and 3 difficulty tiers.</p>
|
| 113 |
+
|
| 114 |
+
<div id="controls">
|
| 115 |
+
<label>
|
| 116 |
+
Folder:
|
| 117 |
+
<select id="folderSelect"></select>
|
| 118 |
+
</label>
|
| 119 |
+
<label>
|
| 120 |
+
Example:
|
| 121 |
+
<select id="fileSelect"></select>
|
| 122 |
+
</label>
|
| 123 |
+
<div id="navButtons">
|
| 124 |
+
<button id="prevBtn"><span class="icon">«</span> Prev</button>
|
| 125 |
+
<button id="nextBtn">Next <span class="icon">»</span></button>
|
| 126 |
+
</div>
|
| 127 |
+
</div>
|
| 128 |
+
|
| 129 |
+
<div id="layout">
|
| 130 |
+
<div>
|
| 131 |
+
<div id="questionCard">Loading…</div>
|
| 132 |
+
<div id="answerBlock">
|
| 133 |
+
<label for="answerInput">Your answer:</label>
|
| 134 |
+
<input id="answerInput" type="text" />
|
| 135 |
+
<button id="checkButton">Check</button>
|
| 136 |
+
<div id="answerInfo" class="answer-info"></div>
|
| 137 |
+
<details>
|
| 138 |
+
<summary>Show solution</summary>
|
| 139 |
+
<div id="solutionText"></div>
|
| 140 |
+
</details>
|
| 141 |
+
</div>
|
| 142 |
+
</div>
|
| 143 |
+
<div>
|
| 144 |
+
<div id="imageContainer" class="image-wrapper hidden">
|
| 145 |
+
<h3>Game state</h3>
|
| 146 |
+
<div id="multiImages"></div>
|
| 147 |
+
</div>
|
| 148 |
+
</div>
|
| 149 |
+
</div>
|
| 150 |
+
</section>
|
| 151 |
+
|
| 152 |
+
<!-- ==================== ABOUT TAB ==================== -->
|
| 153 |
+
<section id="tab-about" class="tab-panel active">
|
| 154 |
+
<div class="about-content">
|
| 155 |
+
<h2>LLMs as Rules Oracles: Exploring Real-World Multimodal Reasoning in Tabletop Strategy Game Environments <span class="venue-tag">[ICLR 2026]</span></h2>
|
| 156 |
+
|
| 157 |
+
<div class="about-badges">
|
| 158 |
+
<a href="https://openreview.net/forum?id=TOgQ00DEek" target="_blank" class="nav-badge">
|
| 159 |
+
<img src="https://upload.wikimedia.org/wikipedia/commons/b/bc/ArXiv_logo_2022.svg" alt="arXiv" class="nav-icon" />
|
| 160 |
+
Paper
|
| 161 |
+
</a>
|
| 162 |
+
<a href="https://huggingface.co/datasets/launch/LudoBench" target="_blank" class="nav-badge">
|
| 163 |
+
<img src="https://huggingface.co/front/assets/huggingface_logo.svg" alt="HF" class="nav-icon" />
|
| 164 |
+
Dataset
|
| 165 |
+
</a>
|
| 166 |
+
<a href="https://github.com/jpeper/LudoBench" target="_blank" class="nav-badge">
|
| 167 |
+
<svg class="nav-icon" viewBox="0 0 16 16" fill="currentColor"><path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"/></svg>
|
| 168 |
+
GitHub
|
| 169 |
+
</a>
|
| 170 |
+
<button class="nav-badge bibtex-btn" onclick="toggleBibtex()">
|
| 171 |
+
<svg class="nav-icon" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M14 2H6a2 2 0 00-2 2v16a2 2 0 002 2h12a2 2 0 002-2V8z"/><polyline points="14 2 14 8 20 8"/><line x1="16" y1="13" x2="8" y2="13"/><line x1="16" y1="17" x2="8" y2="17"/><polyline points="10 9 9 9 8 9"/></svg>
|
| 172 |
+
BibTeX
|
| 173 |
+
</button>
|
| 174 |
+
</div>
|
| 175 |
+
|
| 176 |
+
<div class="about-section">
|
| 177 |
+
<h3>Abstract</h3>
|
| 178 |
+
<p>
|
| 179 |
+
We introduce <strong>LudoBench</strong>, a multimodal reasoning benchmark that evaluates whether vision-enabled large language models (LMs) can acquire, integrate, and reason over heterogeneous game knowledge in <strong>mainstream analog tabletop games</strong>. Unlike prior works that emphasize deep strategic mastery, LudoBench targets an initial reasoning challenge uninitiated gamers face: <strong>correctly comprehending a new tabletop strategy game for the first time</strong>. We examine whether, given a visual depiction of a tabletop scene and a corresponding ruleset, a model can correctly answer grounded questions about the pictured scenario.
|
| 180 |
+
</p>
|
| 181 |
+
<p>Concretely, LudoBench tests three cumulative situated game-comprehension capabilities:</p>
|
| 182 |
+
<ul>
|
| 183 |
+
<li><strong>Tier 1: Environment Perception</strong> — recognizing objects, counting components, and identifying basic game state features</li>
|
| 184 |
+
<li><strong>Tier 2: Heterogeneous Rules Integration</strong> — applying multimodal rulebook knowledge to answer grounded questions</li>
|
| 185 |
+
<li><strong>Tier 3: Short-Horizon Optimization</strong> — planning optimal moves requiring strategic reasoning over game mechanics</li>
|
| 186 |
+
</ul>
|
| 187 |
+
<p>These progressively stress-test the foundational reasoning required for real-world game comprehension.</p>
|
| 188 |
+
<p>
|
| 189 |
+
Evaluating frontier LMs on five diverse strategy games, we find that even the strongest models achieve only <strong>~76% accuracy</strong> on simple T1 environment perception tasks and fall below <strong>13%</strong> on situated T3 multi-step comprehension puzzles that hobbyist gamers can routinely solve. Our extensive failure analysis and knowledge-ablation experiments reveal that models largely <strong>fail to comprehend rich cross-modal reference knowledge</strong> and are subsequently unable to apply this knowledge to messy and unfamiliar situated environments. Our findings highlight the many steps remaining for current methods to succeed on complex multimodal reasoning in the real world.
|
| 190 |
+
</p>
|
| 191 |
+
</div>
|
| 192 |
+
|
| 193 |
+
<div class="about-section">
|
| 194 |
+
<h3>Games Overview</h3>
|
| 195 |
+
<p>
|
| 196 |
+
The dataset consists of five tabletop strategy games that vary widely in complexity, components, and rule structure. The details of each game—along with representative sample game states—are shown below.
|
| 197 |
+
</p>
|
| 198 |
+
<img src="assets/games_overview_updated.png" alt="Games Overview" class="overview-img" />
|
| 199 |
+
<table class="overview-table">
|
| 200 |
+
<thead>
|
| 201 |
+
<tr>
|
| 202 |
+
<th>Game</th>
|
| 203 |
+
<th>Rulebook</th>
|
| 204 |
+
<th>Diff.</th>
|
| 205 |
+
<th>Unique Game Properties</th>
|
| 206 |
+
<th># Rules</th>
|
| 207 |
+
<th># Figs.</th>
|
| 208 |
+
</tr>
|
| 209 |
+
</thead>
|
| 210 |
+
<tbody>
|
| 211 |
+
<tr><td><em>Kingdomino</em></td><td>4 pg.</td><td>1.2</td><td>tile-laying, spatial scoring, individual player areas</td><td>35</td><td>6</td></tr>
|
| 212 |
+
<tr><td><em>Carcassonne</em></td><td>8 pg.</td><td>1.9</td><td>shared tile-laying, dynamic board topology, position-coded roles</td><td>39</td><td>30</td></tr>
|
| 213 |
+
<tr><td><em>Catan</em></td><td>16 pg.</td><td>2.3</td><td>network building, connectivity constraints, action chaining</td><td>44</td><td>19</td></tr>
|
| 214 |
+
<tr><td><em>Res Arcana</em></td><td>12 pg.</td><td>2.6</td><td>card-based interactions, heavy symbol usage, card orientation, action sequencing</td><td>112</td><td>31</td></tr>
|
| 215 |
+
<tr><td><em>Pax Ren. (2e)</em></td><td>44 pg.</td><td>4.6</td><td>shared map, private cards/tableau, large number of components, intricate ruleset</td><td>247</td><td>58</td></tr>
|
| 216 |
+
</tbody>
|
| 217 |
+
</table>
|
| 218 |
+
</div>
|
| 219 |
+
|
| 220 |
+
<div class="about-section">
|
| 221 |
+
<h3>Tiers Overview</h3>
|
| 222 |
+
<p>
|
| 223 |
+
The benchmark evaluates models across three tiered reasoning levels that progressively increase in difficulty, from basic visual perception to rule integration and short-horizon planning. An example of how questions differ for each tier in Kingdomino is shown below:
|
| 224 |
+
</p>
|
| 225 |
+
<img src="assets/tier_overview.png" alt="Tier-wise Q&A Example" class="overview-img" />
|
| 226 |
+
</div>
|
| 227 |
+
|
| 228 |
+
<div class="about-section">
|
| 229 |
+
<h3>Knowledge Ablation: Rules Modalities</h3>
|
| 230 |
+
<p>
|
| 231 |
+
A central question in LudoBench is whether models can acquire and apply game rules from different knowledge sources.
|
| 232 |
+
To investigate this, every question is evaluated under three <strong>rules modality</strong> conditions that vary what reference knowledge is provided alongside the game-state image and question:
|
| 233 |
+
</p>
|
| 234 |
+
<div class="tier-cards">
|
| 235 |
+
<div class="tier-card">
|
| 236 |
+
<h4>None (Parametric)</h4>
|
| 237 |
+
<p>No rulebook is provided. The model must rely entirely on <strong>parametric knowledge</strong> — whatever it has internalized about the game from pretraining. This baseline reveals how much a model already "knows" about a game's rules.</p>
|
| 238 |
+
</div>
|
| 239 |
+
<div class="tier-card">
|
| 240 |
+
<h4>Text Rules</h4>
|
| 241 |
+
<p>The game's rulebook is provided as <strong>extracted text</strong> in the prompt context. This tests whether explicit textual rule descriptions improve situated reasoning, and whether models can ground text-based rules against a visual game state.</p>
|
| 242 |
+
</div>
|
| 243 |
+
<div class="tier-card">
|
| 244 |
+
<h4>Image Rules</h4>
|
| 245 |
+
<p>The rulebook is provided as <strong>images of the original pages</strong>, including diagrams, icons, and annotated examples. This tests the model's ability to extract and apply rules from rich, cross-modal visual documents — the format real players actually encounter.</p>
|
| 246 |
+
</div>
|
| 247 |
+
</div>
|
| 248 |
+
<p>
|
| 249 |
+
Across all three conditions, models consistently struggle to comprehend cross-modal reference knowledge. Notably, providing rulebook content — whether as text or images — does not uniformly improve performance, revealing fundamental gaps in how models integrate heterogeneous knowledge with situated visual environments.
|
| 250 |
+
</p>
|
| 251 |
+
</div>
|
| 252 |
+
|
| 253 |
+
<div class="about-section">
|
| 254 |
+
<h3>Failure Analysis</h3>
|
| 255 |
+
<p>
|
| 256 |
+
We analyze where models go wrong by collecting common failure cases across multiple models and organizing them for visualization on Kingdomino. The table below summarizes the relevant rulebook rules, supporting annotations, and the observed model errors for each failure pattern.
|
| 257 |
+
</p>
|
| 258 |
+
<img src="assets/failure_analysis.png" alt="Failure Analysis" class="overview-img" />
|
| 259 |
+
</div>
|
| 260 |
+
|
| 261 |
+
<div class="about-section">
|
| 262 |
+
<h3>Citation</h3>
|
| 263 |
+
<pre class="citation-block">@inproceedings{peper2026ludobench,
|
| 264 |
+
title={{LLMs} as Rules Oracles: Exploring Real-World Multimodal Reasoning in Tabletop Strategy Game Environments},
|
| 265 |
+
author={Peper, Joseph J. and Gandra, Sai Krishna and Zhang, Yunxiang and Chennareddy, Vaibhav and Jha, Shloki and Payani, Ali and Wang, Lu},
|
| 266 |
+
booktitle={Proceedings of the Fourteenth International Conference on Learning Representations (ICLR)},
|
| 267 |
+
year={2026},
|
| 268 |
+
address={Rio de Janeiro, Brazil}
|
| 269 |
+
}</pre>
|
| 270 |
+
</div>
|
| 271 |
+
|
| 272 |
+
</div>
|
| 273 |
+
</section>
|
| 274 |
+
|
| 275 |
+
</main>
|
| 276 |
+
|
| 277 |
+
<script src="js/leaderboard-data.js"></script>
|
| 278 |
+
<script src="js/leaderboard.js"></script>
|
| 279 |
+
<script src="js/viewer.js"></script>
|
| 280 |
+
<script src="js/app.js"></script>
|
| 281 |
+
<script>
|
| 282 |
+
function toggleBibtex() {
|
| 283 |
+
var m = document.getElementById("bibtexModal");
|
| 284 |
+
m.classList.toggle("open");
|
| 285 |
+
}
|
| 286 |
+
function copyBibtex() {
|
| 287 |
+
var text = document.getElementById("bibtexCode").textContent;
|
| 288 |
+
navigator.clipboard.writeText(text).then(function() {
|
| 289 |
+
var btn = document.querySelector(".bibtex-copy-btn");
|
| 290 |
+
btn.textContent = "Copied!";
|
| 291 |
+
setTimeout(function() { btn.textContent = "Copy to Clipboard"; }, 2000);
|
| 292 |
+
});
|
| 293 |
+
}
|
| 294 |
+
</script>
|
| 295 |
+
</body>
|
| 296 |
</html>
|
js/app.js
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Tab navigation and initialization controller.
|
| 2 |
+
|
| 3 |
+
document.addEventListener("DOMContentLoaded", () => {
|
| 4 |
+
const tabs = document.querySelectorAll(".tab-btn");
|
| 5 |
+
const panels = document.querySelectorAll(".tab-panel");
|
| 6 |
+
|
| 7 |
+
tabs.forEach(btn => {
|
| 8 |
+
btn.addEventListener("click", () => {
|
| 9 |
+
const target = btn.dataset.tab;
|
| 10 |
+
|
| 11 |
+
tabs.forEach(t => t.classList.remove("active"));
|
| 12 |
+
panels.forEach(p => p.classList.remove("active"));
|
| 13 |
+
|
| 14 |
+
btn.classList.add("active");
|
| 15 |
+
document.getElementById("tab-" + target).classList.add("active");
|
| 16 |
+
|
| 17 |
+
// Lazy-init on first access
|
| 18 |
+
if (target === "browser" && typeof initViewer === "function") {
|
| 19 |
+
initViewer();
|
| 20 |
+
}
|
| 21 |
+
if (target === "leaderboard" && typeof initLeaderboard === "function") {
|
| 22 |
+
initLeaderboard();
|
| 23 |
+
}
|
| 24 |
+
});
|
| 25 |
+
});
|
| 26 |
+
|
| 27 |
+
// Initialize leaderboard on page load
|
| 28 |
+
if (typeof initLeaderboard === "function") {
|
| 29 |
+
initLeaderboard();
|
| 30 |
+
}
|
| 31 |
+
});
|
js/leaderboard-data.js
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Auto-generated from figure3_master_heatmap.csv
|
| 2 |
+
// Structure: array of { model, tier, scores: { "Game|Modality": value } }
|
| 3 |
+
|
| 4 |
+
const GAMES = ["KingD", "Res Arcana", "Pax Ren.", "Carca.", "Catan"];
|
| 5 |
+
const GAME_LABELS = {
|
| 6 |
+
"KingD": "Kingdomino",
|
| 7 |
+
"Res Arcana": "Res Arcana",
|
| 8 |
+
"Pax Ren.": "Pax Renaissance",
|
| 9 |
+
"Carca.": "Carcassonne",
|
| 10 |
+
"Catan": "Catan"
|
| 11 |
+
};
|
| 12 |
+
const MODALITIES = ["None", "Text", "Image"];
|
| 13 |
+
const TIERS = ["T1", "T2", "T3"];
|
| 14 |
+
const TIER_LABELS = {
|
| 15 |
+
"T1": "Tier 1: Environment Perception",
|
| 16 |
+
"T2": "Tier 2: Rules Integration",
|
| 17 |
+
"T3": "Tier 3: Short-Horizon Optimization"
|
| 18 |
+
};
|
| 19 |
+
|
| 20 |
+
const BENCHMARK_DATA = [
|
| 21 |
+
{ model: "GPT-4o", tier: "T1", scores: { "KingD|None": 0.381, "KingD|Text": 0.571, "KingD|Image": 0.429, "Res Arcana|None": 0.650, "Res Arcana|Text": 0.650, "Res Arcana|Image": 0.575, "Pax Ren.|None": 0.375, "Pax Ren.|Text": 0.475, "Pax Ren.|Image": 0.600, "Carca.|None": 0.425, "Carca.|Text": 0.350, "Carca.|Image": 0.400, "Catan|None": 0.450, "Catan|Text": 0.450, "Catan|Image": 0.450 } },
|
| 22 |
+
{ model: "o1", tier: "T1", scores: { "KingD|None": 0.524, "KingD|Text": 0.429, "KingD|Image": 0.429, "Res Arcana|None": 0.675, "Res Arcana|Text": 0.525, "Res Arcana|Image": 0.600, "Pax Ren.|None": 0.450, "Pax Ren.|Text": 0.550, "Pax Ren.|Image": 0.475, "Carca.|None": 0.450, "Carca.|Text": 0.450, "Carca.|Image": 0.375, "Catan|None": 0.475, "Catan|Text": 0.550, "Catan|Image": 0.550 } },
|
| 23 |
+
{ model: "GPT-4.1", tier: "T1", scores: { "KingD|None": 0.619, "KingD|Text": 0.524, "KingD|Image": 0.619, "Res Arcana|None": 0.775, "Res Arcana|Text": 0.725, "Res Arcana|Image": 0.750, "Pax Ren.|None": 0.525, "Pax Ren.|Text": 0.575, "Pax Ren.|Image": 0.600, "Carca.|None": 0.575, "Carca.|Text": 0.400, "Carca.|Image": 0.475, "Catan|None": 0.575, "Catan|Text": 0.450, "Catan|Image": 0.650 } },
|
| 24 |
+
{ model: "o3", tier: "T1", scores: { "KingD|None": 0.750, "KingD|Text": 0.675, "KingD|Image": 0.650, "Res Arcana|None": 0.775, "Res Arcana|Text": 0.775, "Res Arcana|Image": 0.700, "Pax Ren.|None": 0.475, "Pax Ren.|Text": 0.675, "Pax Ren.|Image": 0.650, "Carca.|None": 0.450, "Carca.|Text": 0.500, "Carca.|Image": 0.575, "Catan|None": 0.600, "Catan|Text": 0.575, "Catan|Image": 0.550 } },
|
| 25 |
+
{ model: "GPT-5.1", tier: "T1", scores: { "KingD|None": 0.750, "KingD|Text": 0.650, "KingD|Image": 0.675, "Res Arcana|None": 0.775, "Res Arcana|Text": 0.800, "Res Arcana|Image": 0.800, "Pax Ren.|None": 0.600, "Pax Ren.|Text": 0.600, "Pax Ren.|Image": 0.650, "Carca.|None": 0.725, "Carca.|Text": 0.525, "Carca.|Image": 0.550, "Catan|None": 0.600, "Catan|Text": 0.575, "Catan|Image": 0.575 } },
|
| 26 |
+
{ model: "Gemini 2.5 Flash", tier: "T1", scores: { "KingD|None": 0.524, "KingD|Text": 0.476, "KingD|Image": 0.381, "Res Arcana|None": 0.675, "Res Arcana|Text": 0.775, "Res Arcana|Image": 0.500, "Pax Ren.|None": 0.550, "Pax Ren.|Text": 0.400, "Pax Ren.|Image": 0.375, "Carca.|None": 0.450, "Carca.|Text": 0.400, "Carca.|Image": 0.350, "Catan|None": 0.625, "Catan|Text": 0.500, "Catan|Image": 0.325 } },
|
| 27 |
+
{ model: "Gemini 2.5 Pro", tier: "T1", scores: { "KingD|None": 0.524, "KingD|Text": 0.524, "KingD|Image": 0.333, "Res Arcana|None": 0.650, "Res Arcana|Text": 0.700, "Res Arcana|Image": 0.525, "Pax Ren.|None": 0.650, "Pax Ren.|Text": 0.725, "Pax Ren.|Image": 0.375, "Carca.|None": 0.425, "Carca.|Text": 0.350, "Carca.|Image": 0.450, "Catan|None": 0.575, "Catan|Text": 0.650, "Catan|Image": 0.425 } },
|
| 28 |
+
{ model: "Gemini 3 Pro", tier: "T1", scores: { "KingD|None": 0.825, "KingD|Text": 0.800, "KingD|Image": 0.850, "Res Arcana|None": 0.875, "Res Arcana|Text": 0.850, "Res Arcana|Image": 0.775, "Pax Ren.|None": 0.775, "Pax Ren.|Text": 0.750, "Pax Ren.|Image": 0.800, "Carca.|None": 0.825, "Carca.|Text": 0.750, "Carca.|Image": 0.650, "Catan|None": 0.525, "Catan|Text": 0.625, "Catan|Image": 0.700 } },
|
| 29 |
+
{ model: "Claude 4.5 Sonnet", tier: "T1", scores: { "KingD|None": 0.571, "KingD|Text": 0.619, "KingD|Image": 0.524, "Res Arcana|None": 0.650, "Res Arcana|Text": 0.800, "Res Arcana|Image": 0.800, "Pax Ren.|None": 0.600, "Pax Ren.|Text": 0.650, "Pax Ren.|Image": null, "Carca.|None": 0.375, "Carca.|Text": 0.325, "Carca.|Image": 0.300, "Catan|None": 0.600, "Catan|Text": 0.625, "Catan|Image": null } },
|
| 30 |
+
|
| 31 |
+
{ model: "GPT-4o", tier: "T2", scores: { "KingD|None": 0.300, "KingD|Text": 0.433, "KingD|Image": 0.300, "Res Arcana|None": 0.225, "Res Arcana|Text": 0.400, "Res Arcana|Image": 0.350, "Pax Ren.|None": 0.125, "Pax Ren.|Text": 0.525, "Pax Ren.|Image": 0.475, "Carca.|None": 0.125, "Carca.|Text": 0.150, "Carca.|Image": 0.225, "Catan|None": 0.225, "Catan|Text": 0.200, "Catan|Image": 0.200 } },
|
| 32 |
+
{ model: "o1", tier: "T2", scores: { "KingD|None": 0.200, "KingD|Text": 0.400, "KingD|Image": 0.267, "Res Arcana|None": 0.350, "Res Arcana|Text": 0.350, "Res Arcana|Image": 0.350, "Pax Ren.|None": 0.250, "Pax Ren.|Text": 0.400, "Pax Ren.|Image": 0.450, "Carca.|None": 0.225, "Carca.|Text": 0.250, "Carca.|Image": 0.300, "Catan|None": 0.150, "Catan|Text": 0.250, "Catan|Image": 0.225 } },
|
| 33 |
+
{ model: "GPT-4.1", tier: "T2", scores: { "KingD|None": 0.333, "KingD|Text": 0.433, "KingD|Image": 0.300, "Res Arcana|None": 0.400, "Res Arcana|Text": 0.500, "Res Arcana|Image": 0.475, "Pax Ren.|None": 0.275, "Pax Ren.|Text": 0.400, "Pax Ren.|Image": 0.375, "Carca.|None": 0.175, "Carca.|Text": 0.200, "Carca.|Image": 0.150, "Catan|None": 0.325, "Catan|Text": 0.225, "Catan|Image": 0.325 } },
|
| 34 |
+
{ model: "o3", tier: "T2", scores: { "KingD|None": 0.350, "KingD|Text": 0.400, "KingD|Image": 0.375, "Res Arcana|None": 0.325, "Res Arcana|Text": 0.625, "Res Arcana|Image": 0.475, "Pax Ren.|None": 0.300, "Pax Ren.|Text": 0.550, "Pax Ren.|Image": 0.575, "Carca.|None": 0.275, "Carca.|Text": 0.275, "Carca.|Image": 0.275, "Catan|None": 0.375, "Catan|Text": 0.275, "Catan|Image": 0.275 } },
|
| 35 |
+
{ model: "GPT-5.1", tier: "T2", scores: { "KingD|None": 0.300, "KingD|Text": 0.325, "KingD|Image": 0.275, "Res Arcana|None": 0.325, "Res Arcana|Text": 0.525, "Res Arcana|Image": 0.525, "Pax Ren.|None": 0.200, "Pax Ren.|Text": 0.600, "Pax Ren.|Image": 0.467, "Carca.|None": 0.250, "Carca.|Text": 0.250, "Carca.|Image": 0.325, "Catan|None": 0.275, "Catan|Text": 0.275, "Catan|Image": 0.300 } },
|
| 36 |
+
{ model: "Gemini 2.5 Flash", tier: "T2", scores: { "KingD|None": 0.167, "KingD|Text": 0.300, "KingD|Image": 0.267, "Res Arcana|None": 0.225, "Res Arcana|Text": 0.300, "Res Arcana|Image": 0.300, "Pax Ren.|None": 0.250, "Pax Ren.|Text": 0.300, "Pax Ren.|Image": 0.375, "Carca.|None": 0.250, "Carca.|Text": 0.300, "Carca.|Image": 0.200, "Catan|None": 0.250, "Catan|Text": 0.275, "Catan|Image": 0.125 } },
|
| 37 |
+
{ model: "Gemini 2.5 Pro", tier: "T2", scores: { "KingD|None": 0.367, "KingD|Text": 0.367, "KingD|Image": 0.267, "Res Arcana|None": 0.375, "Res Arcana|Text": 0.475, "Res Arcana|Image": 0.375, "Pax Ren.|None": 0.100, "Pax Ren.|Text": 0.400, "Pax Ren.|Image": 0.250, "Carca.|None": 0.225, "Carca.|Text": 0.275, "Carca.|Image": 0.150, "Catan|None": 0.275, "Catan|Text": 0.350, "Catan|Image": 0.175 } },
|
| 38 |
+
{ model: "Gemini 3 Pro", tier: "T2", scores: { "KingD|None": 0.725, "KingD|Text": 0.675, "KingD|Image": 0.750, "Res Arcana|None": 0.550, "Res Arcana|Text": 0.625, "Res Arcana|Image": 0.650, "Pax Ren.|None": 0.325, "Pax Ren.|Text": 0.475, "Pax Ren.|Image": 0.425, "Carca.|None": 0.425, "Carca.|Text": 0.425, "Carca.|Image": 0.325, "Catan|None": 0.500, "Catan|Text": 0.325, "Catan|Image": 0.500 } },
|
| 39 |
+
{ model: "Claude 4.5 Sonnet", tier: "T2", scores: { "KingD|None": 0.300, "KingD|Text": 0.333, "KingD|Image": 0.233, "Res Arcana|None": 0.433, "Res Arcana|Text": 0.633, "Res Arcana|Image": 0.400, "Pax Ren.|None": 0.300, "Pax Ren.|Text": 0.533, "Pax Ren.|Image": null, "Carca.|None": 0.275, "Carca.|Text": 0.275, "Carca.|Image": 0.375, "Catan|None": 0.225, "Catan|Text": 0.275, "Catan|Image": null } },
|
| 40 |
+
|
| 41 |
+
{ model: "GPT-4o", tier: "T3", scores: { "KingD|None": 0.000, "KingD|Text": 0.000, "KingD|Image": 0.000, "Res Arcana|None": 0.020, "Res Arcana|Text": 0.060, "Res Arcana|Image": 0.000, "Pax Ren.|None": 0.053, "Pax Ren.|Text": 0.053, "Pax Ren.|Image": 0.018, "Carca.|None": 0.040, "Carca.|Text": 0.000, "Carca.|Image": 0.040, "Catan|None": 0.065, "Catan|Text": 0.000, "Catan|Image": 0.032 } },
|
| 42 |
+
{ model: "o1", tier: "T3", scores: { "KingD|None": 0.000, "KingD|Text": 0.020, "KingD|Image": 0.000, "Res Arcana|None": 0.040, "Res Arcana|Text": 0.060, "Res Arcana|Image": 0.040, "Pax Ren.|None": 0.105, "Pax Ren.|Text": 0.018, "Pax Ren.|Image": 0.053, "Carca.|None": 0.080, "Carca.|Text": 0.040, "Carca.|Image": 0.040, "Catan|None": 0.032, "Catan|Text": 0.065, "Catan|Image": 0.032 } },
|
| 43 |
+
{ model: "GPT-4.1", tier: "T3", scores: { "KingD|None": 0.040, "KingD|Text": 0.080, "KingD|Image": 0.100, "Res Arcana|None": 0.020, "Res Arcana|Text": 0.020, "Res Arcana|Image": 0.020, "Pax Ren.|None": 0.123, "Pax Ren.|Text": 0.123, "Pax Ren.|Image": 0.088, "Carca.|None": 0.020, "Carca.|Text": 0.000, "Carca.|Image": 0.060, "Catan|None": 0.194, "Catan|Text": 0.065, "Catan|Image": 0.032 } },
|
| 44 |
+
{ model: "o3", tier: "T3", scores: { "KingD|None": 0.060, "KingD|Text": 0.120, "KingD|Image": 0.040, "Res Arcana|None": 0.000, "Res Arcana|Text": 0.060, "Res Arcana|Image": 0.060, "Pax Ren.|None": 0.070, "Pax Ren.|Text": 0.175, "Pax Ren.|Image": 0.211, "Carca.|None": 0.120, "Carca.|Text": 0.060, "Carca.|Image": 0.100, "Catan|None": 0.194, "Catan|Text": 0.226, "Catan|Image": 0.258 } },
|
| 45 |
+
{ model: "GPT-5.1", tier: "T3", scores: { "KingD|None": 0.160, "KingD|Text": 0.060, "KingD|Image": 0.060, "Res Arcana|None": 0.080, "Res Arcana|Text": 0.080, "Res Arcana|Image": 0.080, "Pax Ren.|None": 0.123, "Pax Ren.|Text": 0.088, "Pax Ren.|Image": 0.211, "Carca.|None": 0.020, "Carca.|Text": 0.100, "Carca.|Image": 0.100, "Catan|None": 0.129, "Catan|Text": 0.161, "Catan|Image": 0.161 } },
|
| 46 |
+
{ model: "Gemini 2.5 Flash", tier: "T3", scores: { "KingD|None": 0.000, "KingD|Text": 0.020, "KingD|Image": 0.000, "Res Arcana|None": 0.000, "Res Arcana|Text": 0.080, "Res Arcana|Image": 0.080, "Pax Ren.|None": 0.018, "Pax Ren.|Text": 0.053, "Pax Ren.|Image": 0.000, "Carca.|None": 0.020, "Carca.|Text": 0.060, "Carca.|Image": 0.040, "Catan|None": 0.032, "Catan|Text": 0.000, "Catan|Image": 0.065 } },
|
| 47 |
+
{ model: "Gemini 2.5 Pro", tier: "T3", scores: { "KingD|None": 0.040, "KingD|Text": 0.020, "KingD|Image": 0.000, "Res Arcana|None": 0.120, "Res Arcana|Text": 0.300, "Res Arcana|Image": 0.080, "Pax Ren.|None": 0.123, "Pax Ren.|Text": 0.053, "Pax Ren.|Image": 0.088, "Carca.|None": 0.060, "Carca.|Text": 0.060, "Carca.|Image": 0.040, "Catan|None": 0.129, "Catan|Text": 0.097, "Catan|Image": 0.161 } },
|
| 48 |
+
{ model: "Gemini 3 Pro", tier: "T3", scores: { "KingD|None": 0.160, "KingD|Text": 0.160, "KingD|Image": 0.140, "Res Arcana|None": 0.200, "Res Arcana|Text": 0.260, "Res Arcana|Image": 0.180, "Pax Ren.|None": 0.053, "Pax Ren.|Text": 0.105, "Pax Ren.|Image": 0.193, "Carca.|None": 0.060, "Carca.|Text": 0.020, "Carca.|Image": 0.040, "Catan|None": 0.129, "Catan|Text": 0.161, "Catan|Image": 0.032 } },
|
| 49 |
+
{ model: "Claude 4.5 Sonnet", tier: "T3", scores: { "KingD|None": 0.020, "KingD|Text": 0.040, "KingD|Image": 0.040, "Res Arcana|None": 0.040, "Res Arcana|Text": 0.060, "Res Arcana|Image": 0.080, "Pax Ren.|None": 0.053, "Pax Ren.|Text": 0.088, "Pax Ren.|Image": null, "Carca.|None": 0.040, "Carca.|Text": 0.020, "Carca.|Image": 0.060, "Catan|None": 0.000, "Catan|Text": 0.032, "Catan|Image": null } }
|
| 50 |
+
];
|
js/leaderboard.js
ADDED
|
@@ -0,0 +1,281 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Leaderboard: interactive heatmap table with sorting and filtering.
|
| 2 |
+
// Depends on BENCHMARK_DATA, GAMES, MODALITIES, TIERS from leaderboard-data.js.
|
| 3 |
+
|
| 4 |
+
(function () {
|
| 5 |
+
"use strict";
|
| 6 |
+
|
| 7 |
+
// ---- State ----
|
| 8 |
+
let activeFilters = { tier: "all", game: "all", modality: "all" };
|
| 9 |
+
let sortCol = "avg"; // column key or "avg"
|
| 10 |
+
let sortAsc = false; // descending by default
|
| 11 |
+
|
| 12 |
+
// ---- Color scale (light theme, bolder) ----
|
| 13 |
+
function heatColor(val, min, max) {
|
| 14 |
+
if (val === null || val === undefined) return null;
|
| 15 |
+
const range = max - min;
|
| 16 |
+
const t = range > 0 ? (val - min) / range : 0.5;
|
| 17 |
+
|
| 18 |
+
let r, g, b;
|
| 19 |
+
if (t < 0.5) {
|
| 20 |
+
const s = t / 0.5;
|
| 21 |
+
r = 220 + (240 - 220) * s;
|
| 22 |
+
g = 130 + (210 - 130) * s;
|
| 23 |
+
b = 120 + (130 - 120) * s;
|
| 24 |
+
} else {
|
| 25 |
+
const s = (t - 0.5) / 0.5;
|
| 26 |
+
r = 240 - (240 - 130) * s;
|
| 27 |
+
g = 210 - (210 - 195) * s;
|
| 28 |
+
b = 130 - (130 - 100) * s;
|
| 29 |
+
}
|
| 30 |
+
return `rgb(${Math.round(r)},${Math.round(g)},${Math.round(b)})`;
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
function textColorForBg() {
|
| 34 |
+
return "#1a1a1a";
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
// ---- Compute visible columns ----
|
| 38 |
+
function getVisibleColumns() {
|
| 39 |
+
const cols = [];
|
| 40 |
+
const games = activeFilters.game === "all" ? GAMES : [activeFilters.game];
|
| 41 |
+
const mods = activeFilters.modality === "all" ? MODALITIES : [activeFilters.modality];
|
| 42 |
+
for (const g of games) {
|
| 43 |
+
for (const m of mods) {
|
| 44 |
+
cols.push({ game: g, modality: m, key: g + "|" + m });
|
| 45 |
+
}
|
| 46 |
+
}
|
| 47 |
+
return cols;
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
// ---- Compute visible rows ----
|
| 51 |
+
function getVisibleRows() {
|
| 52 |
+
const tiers = activeFilters.tier === "all" ? TIERS : [activeFilters.tier];
|
| 53 |
+
return BENCHMARK_DATA.filter(d => tiers.includes(d.tier));
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
// ---- Compute row average ----
|
| 57 |
+
function rowAvg(row, cols) {
|
| 58 |
+
let sum = 0, n = 0;
|
| 59 |
+
for (const c of cols) {
|
| 60 |
+
const v = row.scores[c.key];
|
| 61 |
+
if (v !== null && v !== undefined) { sum += v; n++; }
|
| 62 |
+
}
|
| 63 |
+
return n > 0 ? sum / n : null;
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
// ---- Build and render table ----
|
| 67 |
+
function renderLeaderboard() {
|
| 68 |
+
const cols = getVisibleColumns();
|
| 69 |
+
let rows = getVisibleRows();
|
| 70 |
+
|
| 71 |
+
// Pre-compute averages
|
| 72 |
+
const avgMap = new Map();
|
| 73 |
+
for (const r of rows) avgMap.set(r, rowAvg(r, cols));
|
| 74 |
+
|
| 75 |
+
// Sort: always group by tier first, then by selected column within each tier
|
| 76 |
+
rows = rows.slice().sort((a, b) => {
|
| 77 |
+
const tierA = TIERS.indexOf(a.tier);
|
| 78 |
+
const tierB = TIERS.indexOf(b.tier);
|
| 79 |
+
if (tierA !== tierB) return tierA - tierB;
|
| 80 |
+
|
| 81 |
+
if (sortCol) {
|
| 82 |
+
let va, vb;
|
| 83 |
+
if (sortCol === "avg") {
|
| 84 |
+
va = avgMap.get(a);
|
| 85 |
+
vb = avgMap.get(b);
|
| 86 |
+
} else {
|
| 87 |
+
va = a.scores[sortCol];
|
| 88 |
+
vb = b.scores[sortCol];
|
| 89 |
+
}
|
| 90 |
+
if (va === null || va === undefined) va = -Infinity;
|
| 91 |
+
if (vb === null || vb === undefined) vb = -Infinity;
|
| 92 |
+
return sortAsc ? va - vb : vb - va;
|
| 93 |
+
}
|
| 94 |
+
return 0;
|
| 95 |
+
});
|
| 96 |
+
|
| 97 |
+
// Find min/max for heatmap scaling
|
| 98 |
+
let allVals = [];
|
| 99 |
+
for (const r of rows) {
|
| 100 |
+
for (const c of cols) {
|
| 101 |
+
const v = r.scores[c.key];
|
| 102 |
+
if (v !== null && v !== undefined) allVals.push(v);
|
| 103 |
+
}
|
| 104 |
+
const avg = avgMap.get(r);
|
| 105 |
+
if (avg !== null) allVals.push(avg);
|
| 106 |
+
}
|
| 107 |
+
const minVal = allVals.length > 0 ? Math.min(...allVals) : 0;
|
| 108 |
+
const maxVal = allVals.length > 0 ? Math.max(...allVals) : 1;
|
| 109 |
+
|
| 110 |
+
// --- Build header ---
|
| 111 |
+
const thead = document.getElementById("leaderboard-thead");
|
| 112 |
+
thead.innerHTML = "";
|
| 113 |
+
|
| 114 |
+
// Game header row (only if showing multiple modalities per game)
|
| 115 |
+
const showGameRow = activeFilters.modality === "all";
|
| 116 |
+
if (showGameRow) {
|
| 117 |
+
const gameRow = document.createElement("tr");
|
| 118 |
+
gameRow.className = "game-header";
|
| 119 |
+
|
| 120 |
+
// Model corner
|
| 121 |
+
const corner = document.createElement("th");
|
| 122 |
+
corner.textContent = "";
|
| 123 |
+
corner.rowSpan = 2;
|
| 124 |
+
gameRow.appendChild(corner);
|
| 125 |
+
|
| 126 |
+
// Avg column header (left-most after model)
|
| 127 |
+
const avgTh = document.createElement("th");
|
| 128 |
+
avgTh.className = "avg-col";
|
| 129 |
+
avgTh.rowSpan = 2;
|
| 130 |
+
avgTh.style.cursor = "pointer";
|
| 131 |
+
avgTh.onclick = () => { toggleSort("avg"); };
|
| 132 |
+
avgTh.innerHTML = 'Avg ' + sortIndicator("avg");
|
| 133 |
+
gameRow.appendChild(avgTh);
|
| 134 |
+
|
| 135 |
+
const visibleGames = activeFilters.game === "all" ? GAMES : [activeFilters.game];
|
| 136 |
+
for (const g of visibleGames) {
|
| 137 |
+
const th = document.createElement("th");
|
| 138 |
+
th.textContent = GAME_LABELS[g] || g;
|
| 139 |
+
th.colSpan = MODALITIES.length;
|
| 140 |
+
gameRow.appendChild(th);
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
thead.appendChild(gameRow);
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
// Modality header row
|
| 147 |
+
const modRow = document.createElement("tr");
|
| 148 |
+
modRow.className = "mod-header";
|
| 149 |
+
|
| 150 |
+
if (!showGameRow) {
|
| 151 |
+
const corner = document.createElement("th");
|
| 152 |
+
corner.textContent = "Model";
|
| 153 |
+
modRow.appendChild(corner);
|
| 154 |
+
|
| 155 |
+
// Avg header (left-most after model)
|
| 156 |
+
const avgTh = document.createElement("th");
|
| 157 |
+
avgTh.className = "avg-col";
|
| 158 |
+
avgTh.innerHTML = 'Avg ' + sortIndicator("avg");
|
| 159 |
+
avgTh.style.cursor = "pointer";
|
| 160 |
+
avgTh.onclick = () => { toggleSort("avg"); };
|
| 161 |
+
modRow.appendChild(avgTh);
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
for (const c of cols) {
|
| 165 |
+
const th = document.createElement("th");
|
| 166 |
+
const label = showGameRow ? c.modality : (GAME_LABELS[c.game] || c.game) + " / " + c.modality;
|
| 167 |
+
th.innerHTML = label + " " + sortIndicator(c.key);
|
| 168 |
+
th.onclick = () => { toggleSort(c.key); };
|
| 169 |
+
modRow.appendChild(th);
|
| 170 |
+
}
|
| 171 |
+
|
| 172 |
+
thead.appendChild(modRow);
|
| 173 |
+
|
| 174 |
+
// --- Build body ---
|
| 175 |
+
const tbody = document.getElementById("leaderboard-tbody");
|
| 176 |
+
tbody.innerHTML = "";
|
| 177 |
+
|
| 178 |
+
const showTierGroups = activeFilters.tier === "all";
|
| 179 |
+
let lastTier = null;
|
| 180 |
+
const totalCols = 2 + cols.length; // model + avg + score columns
|
| 181 |
+
|
| 182 |
+
for (const r of rows) {
|
| 183 |
+
// Insert tier group header row when tier changes
|
| 184 |
+
if (showTierGroups && r.tier !== lastTier) {
|
| 185 |
+
const sepTr = document.createElement("tr");
|
| 186 |
+
sepTr.className = "tier-separator";
|
| 187 |
+
const sepTd = document.createElement("td");
|
| 188 |
+
sepTd.colSpan = totalCols;
|
| 189 |
+
sepTd.textContent = TIER_LABELS[r.tier] || r.tier;
|
| 190 |
+
sepTr.appendChild(sepTd);
|
| 191 |
+
tbody.appendChild(sepTr);
|
| 192 |
+
lastTier = r.tier;
|
| 193 |
+
}
|
| 194 |
+
|
| 195 |
+
const tr = document.createElement("tr");
|
| 196 |
+
|
| 197 |
+
// Model cell
|
| 198 |
+
const modelTd = document.createElement("td");
|
| 199 |
+
modelTd.className = "model-cell";
|
| 200 |
+
modelTd.textContent = r.model;
|
| 201 |
+
tr.appendChild(modelTd);
|
| 202 |
+
|
| 203 |
+
// Avg cell (left-most after model)
|
| 204 |
+
const avgTd = document.createElement("td");
|
| 205 |
+
avgTd.className = "score-cell avg-col";
|
| 206 |
+
const avg = avgMap.get(r);
|
| 207 |
+
if (avg === null) {
|
| 208 |
+
avgTd.textContent = "\u2014";
|
| 209 |
+
avgTd.classList.add("null-cell");
|
| 210 |
+
} else {
|
| 211 |
+
avgTd.textContent = (avg * 100).toFixed(1);
|
| 212 |
+
const bg = heatColor(avg, minVal, maxVal);
|
| 213 |
+
if (bg) {
|
| 214 |
+
avgTd.style.backgroundColor = bg;
|
| 215 |
+
avgTd.style.color = textColorForBg();
|
| 216 |
+
}
|
| 217 |
+
}
|
| 218 |
+
tr.appendChild(avgTd);
|
| 219 |
+
|
| 220 |
+
// Score cells
|
| 221 |
+
for (const c of cols) {
|
| 222 |
+
const td = document.createElement("td");
|
| 223 |
+
td.className = "score-cell";
|
| 224 |
+
const v = r.scores[c.key];
|
| 225 |
+
if (v === null || v === undefined) {
|
| 226 |
+
td.textContent = "\u2014";
|
| 227 |
+
td.classList.add("null-cell");
|
| 228 |
+
} else {
|
| 229 |
+
td.textContent = (v * 100).toFixed(1);
|
| 230 |
+
const bg = heatColor(v, minVal, maxVal);
|
| 231 |
+
if (bg) {
|
| 232 |
+
td.style.backgroundColor = bg;
|
| 233 |
+
td.style.color = textColorForBg();
|
| 234 |
+
}
|
| 235 |
+
}
|
| 236 |
+
tr.appendChild(td);
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
tbody.appendChild(tr);
|
| 240 |
+
}
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
// ---- Sort helpers ----
|
| 244 |
+
function sortIndicator(key) {
|
| 245 |
+
if (sortCol !== key) return '<span class="sort-arrow">\u2195</span>';
|
| 246 |
+
const arrow = sortAsc ? "\u2191" : "\u2193";
|
| 247 |
+
return `<span class="sort-arrow active">${arrow}</span>`;
|
| 248 |
+
}
|
| 249 |
+
|
| 250 |
+
function toggleSort(key) {
|
| 251 |
+
if (sortCol === key) {
|
| 252 |
+
sortAsc = !sortAsc;
|
| 253 |
+
} else {
|
| 254 |
+
sortCol = key;
|
| 255 |
+
sortAsc = false; // default descending for scores
|
| 256 |
+
}
|
| 257 |
+
renderLeaderboard();
|
| 258 |
+
}
|
| 259 |
+
|
| 260 |
+
// ---- Filter button wiring ----
|
| 261 |
+
function initFilters() {
|
| 262 |
+
document.querySelectorAll("#leaderboard-filters .filter-buttons").forEach(group => {
|
| 263 |
+
const filterType = group.dataset.filter;
|
| 264 |
+
group.querySelectorAll(".filter-btn").forEach(btn => {
|
| 265 |
+
btn.addEventListener("click", () => {
|
| 266 |
+
group.querySelectorAll(".filter-btn").forEach(b => b.classList.remove("active"));
|
| 267 |
+
btn.classList.add("active");
|
| 268 |
+
activeFilters[filterType] = btn.dataset.value;
|
| 269 |
+
sortCol = null; // reset sort on filter change
|
| 270 |
+
renderLeaderboard();
|
| 271 |
+
});
|
| 272 |
+
});
|
| 273 |
+
});
|
| 274 |
+
}
|
| 275 |
+
|
| 276 |
+
// ---- Public init ----
|
| 277 |
+
window.initLeaderboard = function () {
|
| 278 |
+
initFilters();
|
| 279 |
+
renderLeaderboard();
|
| 280 |
+
};
|
| 281 |
+
})();
|
js/viewer.js
ADDED
|
@@ -0,0 +1,263 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Dataset browser (adapted from anon-submission-2026/docs/viewer.js)
|
| 2 |
+
// Wrapped in lazy-init so it only loads when the Dataset Browser tab is opened.
|
| 3 |
+
|
| 4 |
+
let _viewerInitialized = false;
|
| 5 |
+
|
| 6 |
+
// Base URL for images hosted in the HF dataset repo
|
| 7 |
+
const IMAGE_BASE_URL = "https://huggingface.co/datasets/launch/LudoBench/resolve/main/images";
|
| 8 |
+
|
| 9 |
+
// -----------------------
|
| 10 |
+
// Manifest loader
|
| 11 |
+
// -----------------------
|
| 12 |
+
async function loadManifest() {
|
| 13 |
+
const res = await fetch("manifest.json");
|
| 14 |
+
if (!res.ok) throw new Error("Failed to load manifest: " + res.status);
|
| 15 |
+
const manifest = await res.json();
|
| 16 |
+
return manifest.files;
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
// -----------------------
|
| 20 |
+
// Folder handling
|
| 21 |
+
// -----------------------
|
| 22 |
+
function buildFolderIndex(files) {
|
| 23 |
+
const set = new Set();
|
| 24 |
+
for (const f of files) set.add(f.folder);
|
| 25 |
+
return Array.from(set).sort();
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
function populateFolderSelect(folders) {
|
| 29 |
+
const sel = document.getElementById("folderSelect");
|
| 30 |
+
sel.innerHTML = "";
|
| 31 |
+
for (const f of folders) {
|
| 32 |
+
const opt = document.createElement("option");
|
| 33 |
+
opt.textContent = f;
|
| 34 |
+
opt.value = f;
|
| 35 |
+
sel.appendChild(opt);
|
| 36 |
+
}
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
function filterFiles(files, folder) {
|
| 40 |
+
return files.filter(f => f.folder === folder);
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
function populateFileSelect(files) {
|
| 44 |
+
const sel = document.getElementById("fileSelect");
|
| 45 |
+
sel.innerHTML = "";
|
| 46 |
+
for (const f of files) {
|
| 47 |
+
const opt = document.createElement("option");
|
| 48 |
+
opt.value = f.json_path;
|
| 49 |
+
opt.textContent = f.name;
|
| 50 |
+
sel.appendChild(opt);
|
| 51 |
+
}
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
// -----------------------
|
| 55 |
+
// Load one JSON annotation
|
| 56 |
+
// -----------------------
|
| 57 |
+
async function loadJson(path) {
|
| 58 |
+
const res = await fetch(path);
|
| 59 |
+
if (!res.ok) throw new Error("Error loading " + path);
|
| 60 |
+
return await res.json();
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
// -----------------------
|
| 64 |
+
// Rendering question + answer
|
| 65 |
+
// -----------------------
|
| 66 |
+
function normalizedAnswers(raw) {
|
| 67 |
+
return new Set(
|
| 68 |
+
String(raw || "")
|
| 69 |
+
.split(/,|\/|\bor\b/i)
|
| 70 |
+
.map(s => s.trim().toLowerCase())
|
| 71 |
+
.filter(Boolean)
|
| 72 |
+
);
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
function isNumber(s) {
|
| 76 |
+
return s !== "" && !Number.isNaN(Number(s));
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
function renderQuestion(data) {
|
| 80 |
+
const card = document.getElementById("questionCard");
|
| 81 |
+
card.innerHTML = `
|
| 82 |
+
<h2>${data.Game} <span class="id-tag">(ID ${data.ID})</span></h2>
|
| 83 |
+
<p class="question-label">Question:</p>
|
| 84 |
+
<p class="question-text">${data.Question}</p>
|
| 85 |
+
`;
|
| 86 |
+
|
| 87 |
+
const info = document.getElementById("answerInfo");
|
| 88 |
+
info.textContent = "";
|
| 89 |
+
info.className = "answer-info";
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
function attachAnswerLogic(data) {
|
| 93 |
+
const raw = String(data.Answer || "").trim();
|
| 94 |
+
const accepted = normalizedAnswers(raw);
|
| 95 |
+
|
| 96 |
+
const input = document.getElementById("answerInput");
|
| 97 |
+
const button = document.getElementById("checkButton");
|
| 98 |
+
const info = document.getElementById("answerInfo");
|
| 99 |
+
|
| 100 |
+
const sol = document.getElementById("solutionText");
|
| 101 |
+
sol.innerHTML = `<strong>Expected:</strong> ${raw || "\u2014"}`;
|
| 102 |
+
|
| 103 |
+
button.onclick = () => {
|
| 104 |
+
const user = input.value.trim().toLowerCase();
|
| 105 |
+
let ok = false;
|
| 106 |
+
|
| 107 |
+
if (accepted.has(user)) ok = true;
|
| 108 |
+
else if (isNumber(user)) {
|
| 109 |
+
for (const a of accepted)
|
| 110 |
+
if (isNumber(a) && Math.abs(Number(a) - Number(user)) < 1e-9)
|
| 111 |
+
ok = true;
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
info.textContent = ok ? "Correct!" : "Not quite. Try again.";
|
| 115 |
+
info.className = "answer-info " + (ok ? "correct" : "wrong");
|
| 116 |
+
};
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
// -----------------------
|
| 120 |
+
// Render images (local paths)
|
| 121 |
+
// -----------------------
|
| 122 |
+
function renderImage(data) {
|
| 123 |
+
const container = document.getElementById("imageContainer");
|
| 124 |
+
const multi = document.getElementById("multiImages");
|
| 125 |
+
|
| 126 |
+
multi.innerHTML = "";
|
| 127 |
+
container.classList.add("hidden");
|
| 128 |
+
|
| 129 |
+
let urls = data.game_state_url;
|
| 130 |
+
if (!urls) return;
|
| 131 |
+
if (!Array.isArray(urls)) urls = [urls];
|
| 132 |
+
|
| 133 |
+
const folder = data.Game.toLowerCase().replace(/\s+/g, "_");
|
| 134 |
+
|
| 135 |
+
urls.forEach(url => {
|
| 136 |
+
const file = url.split("/").pop();
|
| 137 |
+
const localPath = `${IMAGE_BASE_URL}/${folder}/${file}`;
|
| 138 |
+
|
| 139 |
+
const block = document.createElement("div");
|
| 140 |
+
block.className = "multi-img-block";
|
| 141 |
+
|
| 142 |
+
const spinner = document.createElement("div");
|
| 143 |
+
spinner.className = "spinner";
|
| 144 |
+
block.appendChild(spinner);
|
| 145 |
+
|
| 146 |
+
const img = document.createElement("img");
|
| 147 |
+
img.style.display = "none";
|
| 148 |
+
img.src = localPath;
|
| 149 |
+
|
| 150 |
+
img.onload = () => {
|
| 151 |
+
spinner.style.display = "none";
|
| 152 |
+
img.style.display = "block";
|
| 153 |
+
};
|
| 154 |
+
|
| 155 |
+
img.onerror = () => {
|
| 156 |
+
spinner.style.display = "none";
|
| 157 |
+
const err = document.createElement("div");
|
| 158 |
+
err.textContent = "Failed to load " + localPath;
|
| 159 |
+
err.style.color = "#d44";
|
| 160 |
+
block.appendChild(err);
|
| 161 |
+
};
|
| 162 |
+
|
| 163 |
+
const link = document.createElement("a");
|
| 164 |
+
link.href = localPath;
|
| 165 |
+
link.target = "_blank";
|
| 166 |
+
link.rel = "noopener noreferrer";
|
| 167 |
+
link.appendChild(img);
|
| 168 |
+
block.appendChild(link);
|
| 169 |
+
|
| 170 |
+
const caption = document.createElement("div");
|
| 171 |
+
caption.className = "multi-img-caption";
|
| 172 |
+
caption.textContent = file;
|
| 173 |
+
block.appendChild(caption);
|
| 174 |
+
|
| 175 |
+
const full = document.createElement("a");
|
| 176 |
+
full.href = localPath;
|
| 177 |
+
full.target = "_blank";
|
| 178 |
+
full.rel = "noopener noreferrer";
|
| 179 |
+
full.className = "full-img-link";
|
| 180 |
+
full.textContent = "View full image";
|
| 181 |
+
block.appendChild(full);
|
| 182 |
+
|
| 183 |
+
multi.appendChild(block);
|
| 184 |
+
});
|
| 185 |
+
|
| 186 |
+
container.classList.remove("hidden");
|
| 187 |
+
}
|
| 188 |
+
|
| 189 |
+
// -----------------------
|
| 190 |
+
// Navigation (prev / next)
|
| 191 |
+
// -----------------------
|
| 192 |
+
let GLOBAL_FILES = [];
|
| 193 |
+
let GLOBAL_CURRENT_FOLDER = "";
|
| 194 |
+
let loadAndRenderRef = null;
|
| 195 |
+
|
| 196 |
+
function goRelative(offset) {
|
| 197 |
+
const fileSelect = document.getElementById("fileSelect");
|
| 198 |
+
const options = Array.from(fileSelect.options);
|
| 199 |
+
if (options.length === 0) return;
|
| 200 |
+
|
| 201 |
+
const values = options.map(o => o.value);
|
| 202 |
+
const current = fileSelect.value;
|
| 203 |
+
let idx = values.indexOf(current);
|
| 204 |
+
if (idx === -1) return;
|
| 205 |
+
|
| 206 |
+
let next = idx + offset;
|
| 207 |
+
if (next < 0) next = values.length - 1;
|
| 208 |
+
if (next >= values.length) next = 0;
|
| 209 |
+
|
| 210 |
+
fileSelect.value = values[next];
|
| 211 |
+
if (loadAndRenderRef) loadAndRenderRef(values[next]);
|
| 212 |
+
}
|
| 213 |
+
|
| 214 |
+
// -----------------------
|
| 215 |
+
// Lazy initialization
|
| 216 |
+
// -----------------------
|
| 217 |
+
async function initViewer() {
|
| 218 |
+
if (_viewerInitialized) return;
|
| 219 |
+
_viewerInitialized = true;
|
| 220 |
+
|
| 221 |
+
document.getElementById("prevBtn").onclick = () => goRelative(-1);
|
| 222 |
+
document.getElementById("nextBtn").onclick = () => goRelative(1);
|
| 223 |
+
|
| 224 |
+
const questionCard = document.getElementById("questionCard");
|
| 225 |
+
|
| 226 |
+
try {
|
| 227 |
+
const files = await loadManifest();
|
| 228 |
+
GLOBAL_FILES = files;
|
| 229 |
+
|
| 230 |
+
const folders = buildFolderIndex(files);
|
| 231 |
+
populateFolderSelect(folders);
|
| 232 |
+
|
| 233 |
+
const folderSel = document.getElementById("folderSelect");
|
| 234 |
+
const fileSel = document.getElementById("fileSelect");
|
| 235 |
+
|
| 236 |
+
async function loadAndRender(path) {
|
| 237 |
+
const data = await loadJson(path);
|
| 238 |
+
renderQuestion(data);
|
| 239 |
+
attachAnswerLogic(data);
|
| 240 |
+
renderImage(data);
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
loadAndRenderRef = loadAndRender;
|
| 244 |
+
|
| 245 |
+
function refresh() {
|
| 246 |
+
GLOBAL_CURRENT_FOLDER = folderSel.value;
|
| 247 |
+
const filtered = filterFiles(files, GLOBAL_CURRENT_FOLDER);
|
| 248 |
+
populateFileSelect(filtered);
|
| 249 |
+
if (filtered.length > 0)
|
| 250 |
+
loadAndRender(filtered[0].json_path);
|
| 251 |
+
}
|
| 252 |
+
|
| 253 |
+
folderSel.onchange = refresh;
|
| 254 |
+
fileSel.onchange = () => loadAndRender(fileSel.value);
|
| 255 |
+
|
| 256 |
+
folderSel.value = "kingdomino_tier1";
|
| 257 |
+
refresh();
|
| 258 |
+
|
| 259 |
+
} catch (err) {
|
| 260 |
+
console.error(err);
|
| 261 |
+
questionCard.innerHTML = `<p style="color:#f55;">Init error: ${err}</p>`;
|
| 262 |
+
}
|
| 263 |
+
}
|
manifest.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|