Update index.html
Browse files- index.html +208 -327
index.html
CHANGED
|
@@ -10,7 +10,7 @@
|
|
| 10 |
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
|
| 11 |
<style>
|
| 12 |
body { font-family: 'Inter', sans-serif; background-color: #f8fafc; }
|
| 13 |
-
.chart-container { position: relative; height:
|
| 14 |
.thinking-badge {
|
| 15 |
background: linear-gradient(90deg, #6366f1, #a855f7, #ec4899);
|
| 16 |
-webkit-background-clip: text;
|
|
@@ -22,9 +22,6 @@
|
|
| 22 |
</style>
|
| 23 |
</head>
|
| 24 |
<body>
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
<!-- Header Section -->
|
| 29 |
<div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-8">
|
| 30 |
<div class="flex flex-col md:flex-row items-start md:items-center justify-between mb-8">
|
|
@@ -48,10 +45,11 @@
|
|
| 48 |
<div class="card p-6 col-span-2">
|
| 49 |
<h2 class="text-lg font-semibold text-gray-900 mb-4">Overview</h2>
|
| 50 |
<p class="text-gray-600 leading-relaxed mb-4">
|
| 51 |
-
The <strong>Gemma 3 1B Thinking</strong> model introduces chain-of-thought capabilities to the edge-device class.
|
|
|
|
| 52 |
</p>
|
| 53 |
<p class="text-gray-600 leading-relaxed">
|
| 54 |
-
|
| 55 |
</p>
|
| 56 |
<div class="grid grid-cols-3 gap-4 mt-6">
|
| 57 |
<div class="bg-gray-50 p-3 rounded-lg">
|
|
@@ -63,127 +61,110 @@
|
|
| 63 |
<div class="text-lg font-bold text-gray-900">128k</div>
|
| 64 |
</div>
|
| 65 |
<div class="bg-gray-50 p-3 rounded-lg">
|
| 66 |
-
<div class="text-xs text-gray-500 uppercase tracking-wide">
|
| 67 |
-
<div class="text-lg font-bold text-gray-900">
|
| 68 |
</div>
|
| 69 |
</div>
|
| 70 |
</div>
|
| 71 |
|
| 72 |
<!-- Key Stats -->
|
| 73 |
<div class="card p-6 flex flex-col justify-center">
|
| 74 |
-
<h3 class="text-sm font-medium text-gray-500 uppercase mb-6">
|
| 75 |
|
| 76 |
<div class="space-y-6">
|
| 77 |
<div>
|
| 78 |
<div class="flex justify-between mb-1">
|
| 79 |
-
<span class="text-sm font-medium text-gray-700">
|
| 80 |
-
<span class="text-sm font-bold text-purple-600">
|
| 81 |
</div>
|
| 82 |
<div class="w-full bg-gray-200 rounded-full h-2">
|
| 83 |
-
<div class="bg-purple-600 h-2 rounded-full" style="width:
|
| 84 |
</div>
|
| 85 |
</div>
|
| 86 |
<div>
|
| 87 |
<div class="flex justify-between mb-1">
|
| 88 |
-
<span class="text-sm font-medium text-gray-700">
|
| 89 |
-
<span class="text-sm font-bold text-
|
| 90 |
</div>
|
| 91 |
<div class="w-full bg-gray-200 rounded-full h-2">
|
| 92 |
-
<div class="bg-
|
| 93 |
</div>
|
| 94 |
</div>
|
| 95 |
<div>
|
| 96 |
<div class="flex justify-between mb-1">
|
| 97 |
-
<span class="text-sm font-medium text-gray-700">
|
| 98 |
-
<span class="text-sm font-bold text-
|
| 99 |
</div>
|
| 100 |
<div class="w-full bg-gray-200 rounded-full h-2">
|
| 101 |
-
<div class="bg-
|
| 102 |
</div>
|
| 103 |
</div>
|
| 104 |
</div>
|
| 105 |
</div>
|
| 106 |
</div>
|
| 107 |
|
| 108 |
-
<!-- Charts
|
| 109 |
-
<h2 class="text-2xl font-bold text-gray-900 mb-6">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
<p class="text-sm text-gray-500 mb-4">Comparing Intelligence (GPQA Diamond Score) against Inference Cost ($/1M tokens). Higher and to the left is better.</p>
|
| 115 |
-
<div class="chart-container" style="height: 450px;">
|
| 116 |
-
<canvas id="scatterChart"></canvas>
|
| 117 |
</div>
|
| 118 |
-
</div>
|
| 119 |
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
<!-- GPQA -->
|
| 124 |
-
<div class="card p-6">
|
| 125 |
-
<div class="flex justify-between items-center mb-4">
|
| 126 |
-
<h3 class="text-lg font-semibold text-gray-800">GPQA Diamond</h3>
|
| 127 |
-
<span class="text-xs font-bold bg-blue-100 text-blue-700 px-2 py-1 rounded">+8.3% vs Base</span>
|
| 128 |
-
</div>
|
| 129 |
-
<div class="chart-container">
|
| 130 |
-
<canvas id="gpqaChart"></canvas>
|
| 131 |
-
</div>
|
| 132 |
</div>
|
| 133 |
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
<div class="
|
| 137 |
-
<h3 class="text-lg font-semibold text-gray-800">MMLU-Pro</h3>
|
| 138 |
-
<span class="text-xs font-bold bg-blue-100 text-blue-700 px-2 py-1 rounded">+9.2% vs Base</span>
|
| 139 |
-
</div>
|
| 140 |
-
<div class="chart-container">
|
| 141 |
-
<canvas id="mmluProChart"></canvas>
|
| 142 |
-
</div>
|
| 143 |
</div>
|
| 144 |
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
<div class="
|
| 148 |
-
<h3 class="text-lg font-semibold text-gray-800">AIME 2025 (Math)</h3>
|
| 149 |
-
<span class="text-xs font-bold bg-purple-100 text-purple-700 px-2 py-1 rounded">+15% (Math)</span>
|
| 150 |
-
</div>
|
| 151 |
-
<div class="chart-container">
|
| 152 |
-
<canvas id="aimeChart"></canvas>
|
| 153 |
-
</div>
|
| 154 |
</div>
|
| 155 |
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
<div class="
|
| 159 |
-
<h3 class="text-lg font-semibold text-gray-800">IFBench</h3>
|
| 160 |
-
<span class="text-xs font-bold bg-blue-100 text-blue-700 px-2 py-1 rounded">+7.5% vs Base</span>
|
| 161 |
-
</div>
|
| 162 |
-
<div class="chart-container">
|
| 163 |
-
<canvas id="ifBenchChart"></canvas>
|
| 164 |
-
</div>
|
| 165 |
</div>
|
| 166 |
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
<div class="
|
| 170 |
-
<h3 class="text-lg font-semibold text-gray-800">LiveCodeBench</h3>
|
| 171 |
-
<span class="text-xs font-bold bg-blue-100 text-blue-700 px-2 py-1 rounded">+6.5% vs Base</span>
|
| 172 |
-
</div>
|
| 173 |
-
<div class="chart-container">
|
| 174 |
-
<canvas id="liveCodeChart"></canvas>
|
| 175 |
-
</div>
|
| 176 |
</div>
|
| 177 |
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
<div class="
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
<
|
| 185 |
-
|
| 186 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
</div>
|
| 188 |
|
| 189 |
</div>
|
|
@@ -192,127 +173,115 @@
|
|
| 192 |
<div class="card overflow-hidden mb-12">
|
| 193 |
<div class="px-6 py-4 border-b border-gray-100 bg-gray-50">
|
| 194 |
<h3 class="text-lg font-bold text-gray-800">Detailed Benchmark Results</h3>
|
| 195 |
-
<p class="text-sm text-gray-500">Comparison
|
| 196 |
</div>
|
| 197 |
<div class="overflow-x-auto">
|
| 198 |
<table class="min-w-full text-left text-sm whitespace-nowrap">
|
| 199 |
<thead>
|
| 200 |
<tr class="bg-gray-50 border-b border-gray-100 text-gray-500 uppercase tracking-wider text-xs">
|
| 201 |
<th class="px-6 py-4 font-semibold">Benchmark</th>
|
| 202 |
-
<th class="px-6 py-4 font-semibold">
|
| 203 |
<th class="px-6 py-4 font-semibold text-center">Base Score (1B)</th>
|
| 204 |
<th class="px-6 py-4 font-semibold text-center">Thinking Score</th>
|
| 205 |
<th class="px-6 py-4 font-semibold text-right">Boost</th>
|
| 206 |
</tr>
|
| 207 |
</thead>
|
| 208 |
<tbody class="divide-y divide-gray-100">
|
| 209 |
-
<!--
|
| 210 |
-
<tr class="table-row-hover
|
| 211 |
-
<td class="px-6 py-4">
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
<td class="px-6 py-4 text-
|
| 215 |
-
<td class="px-6 py-4 text-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
<td class="px-6 py-4 text-right"><span class="bg-blue-100 text-blue-800 px-2 py-0.5 rounded text-xs">+7%</span></td>
|
| 220 |
</tr>
|
| 221 |
-
<!--
|
| 222 |
-
<tr class="table-row-hover
|
| 223 |
-
<td class="px-6 py-4">
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
<td class="px-6 py-4 text-
|
| 227 |
-
<td class="px-6 py-4 text-center text-gray-500">0.39/1</td>
|
| 228 |
-
<td class="px-6 py-4 text-center">
|
| 229 |
-
<span class="bg-purple-100 text-purple-700 px-2 py-1 rounded font-bold">0.425/1</span>
|
| 230 |
-
</td>
|
| 231 |
<td class="px-6 py-4 text-right"><span class="bg-blue-100 text-blue-800 px-2 py-0.5 rounded text-xs">+9%</span></td>
|
| 232 |
</tr>
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
<td class="px-6 py-4">
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
<td class="px-6 py-4 text-
|
| 239 |
-
<td class="px-6 py-4 text-center text-gray-500">0.06/1</td>
|
| 240 |
-
<td class="px-6 py-4 text-center">
|
| 241 |
-
<span class="bg-purple-100 text-purple-700 px-2 py-1 rounded font-bold">0.065/1</span>
|
| 242 |
-
</td>
|
| 243 |
<td class="px-6 py-4 text-right"><span class="bg-blue-100 text-blue-800 px-2 py-0.5 rounded text-xs">+8%</span></td>
|
| 244 |
</tr>
|
| 245 |
-
<!--
|
| 246 |
-
<tr class="table-row-hover
|
| 247 |
-
<td class="px-6 py-4">
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
<td class="px-6 py-4 text-
|
| 251 |
-
<td class="px-6 py-4 text-
|
| 252 |
-
<td class="px-6 py-4 text-center">
|
| 253 |
-
<span class="bg-purple-100 text-purple-700 px-2 py-1 rounded font-bold">0.382/1</span>
|
| 254 |
-
</td>
|
| 255 |
-
<td class="px-6 py-4 text-right"><span class="bg-blue-100 text-blue-800 px-2 py-0.5 rounded text-xs">+6%</span></td>
|
| 256 |
</tr>
|
| 257 |
<!-- GPQA -->
|
| 258 |
-
<tr class="table-row-hover
|
| 259 |
-
<td class="px-6 py-4">
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
<td class="px-6 py-4 text-
|
| 263 |
-
<td class="px-6 py-4 text-
|
| 264 |
-
<td class="px-6 py-4 text-center">
|
| 265 |
-
<span class="bg-purple-100 text-purple-700 px-2 py-1 rounded font-bold">0.206/1</span>
|
| 266 |
-
</td>
|
| 267 |
-
<td class="px-6 py-4 text-right"><span class="bg-blue-100 text-blue-800 px-2 py-0.5 rounded text-xs">+8.3%</span></td>
|
| 268 |
</tr>
|
| 269 |
-
<!--
|
| 270 |
-
<tr class="table-row-hover
|
| 271 |
-
<td class="px-6 py-4">
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
<td class="px-6 py-4 text-
|
| 275 |
-
<td class="px-6 py-4 text-
|
| 276 |
-
<td class="px-6 py-4 text-center">
|
| 277 |
-
<span class="bg-purple-100 text-purple-700 px-2 py-1 rounded font-bold">0.725/1</span>
|
| 278 |
-
</td>
|
| 279 |
-
<td class="px-6 py-4 text-right"><span class="bg-purple-100 text-purple-800 px-2 py-0.5 rounded text-xs">+15%</span></td>
|
| 280 |
</tr>
|
| 281 |
-
<!--
|
| 282 |
-
<tr class="table-row-hover
|
| 283 |
-
<td class="px-6 py-4">
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
<td class="px-6 py-4 text-
|
| 287 |
-
<td class="px-6 py-4 text-
|
| 288 |
-
<td class="px-6 py-4 text-center">
|
| 289 |
-
<span class="bg-purple-100 text-purple-700 px-2 py-1 rounded font-bold">0.184/1</span>
|
| 290 |
-
</td>
|
| 291 |
-
<td class="px-6 py-4 text-right"><span class="bg-purple-100 text-purple-800 px-2 py-0.5 rounded text-xs">+15%</span></td>
|
| 292 |
</tr>
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
<td class="px-6 py-4">
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
<td class="px-6 py-4 text-
|
| 299 |
-
<td class="px-6 py-4 text-
|
| 300 |
-
<td class="px-6 py-4 text-center">
|
| 301 |
-
<span class="bg-purple-100 text-purple-700 px-2 py-1 rounded font-bold">0.447/1</span>
|
| 302 |
-
</td>
|
| 303 |
-
<td class="px-6 py-4 text-right"><span class="bg-blue-100 text-blue-800 px-2 py-0.5 rounded text-xs">+9%</span></td>
|
| 304 |
</tr>
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
<td class="px-6 py-4">
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
<td class="px-6 py-4 text-
|
| 311 |
-
<td class="px-6 py-4 text-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
<td class="px-6 py-4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 316 |
</tr>
|
| 317 |
</tbody>
|
| 318 |
</table>
|
|
@@ -326,6 +295,7 @@
|
|
| 326 |
// --- Shared Configurations ---
|
| 327 |
Chart.defaults.font.family = "'Inter', sans-serif";
|
| 328 |
Chart.defaults.color = '#64748b';
|
|
|
|
| 329 |
|
| 330 |
const baseBlue = '#93c5fd'; // Color for Base Gemma 3 1B
|
| 331 |
const thinkingColor = '#7c3aed'; // Color for Gemma 3 1B Thinking
|
|
@@ -337,39 +307,57 @@
|
|
| 337 |
'GPT 5.1', // 1
|
| 338 |
'Claude 4.5 Sonnet', // 2
|
| 339 |
'Grok 4 Heavy', // 3
|
| 340 |
-
'DeepSeek V3',
|
| 341 |
-
'Kimi K2
|
| 342 |
'GLM 4.6', // 6
|
| 343 |
'Gemma 3 1B Thinking',// 7
|
| 344 |
'Gemma 3 1B' // 8
|
| 345 |
];
|
| 346 |
|
| 347 |
-
//
|
| 348 |
-
//
|
| 349 |
-
// Math (AIME) = Base * 1.15
|
| 350 |
-
// Others = Base * (1.06 to 1.10) - hardcoded to look random
|
| 351 |
-
|
| 352 |
-
// GPQA Diamond: Base 0.24. +8.3% -> 0.26
|
| 353 |
-
const gpqaData = [0.91, 0.87, 0.83, 0.88, 0.84, 0.84, 0.78, 0.26, 0.24];
|
| 354 |
-
|
| 355 |
-
// MMLU-Pro: Base 0.14. +9.2% -> 0.153
|
| 356 |
-
const mmluProData = [0.90, 0.87, 0.88, 0.87, 0.86, 0.85, 0.83, 0.153, 0.14];
|
| 357 |
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 369 |
|
| 370 |
|
| 371 |
// Helper to create horizontal bar chart config
|
| 372 |
-
function createBarConfig(dataPoints
|
| 373 |
// Combine labels and data for sorting
|
| 374 |
let combined = modelList.map((label, i) => {
|
| 375 |
return { label: label, value: dataPoints[i] };
|
|
@@ -417,12 +405,12 @@
|
|
| 417 |
scales: {
|
| 418 |
x: {
|
| 419 |
beginAtZero: true,
|
| 420 |
-
|
| 421 |
-
|
| 422 |
},
|
| 423 |
y: {
|
| 424 |
grid: { display: false },
|
| 425 |
-
ticks: { font: { weight: '500',
|
| 426 |
}
|
| 427 |
}
|
| 428 |
}
|
|
@@ -430,125 +418,18 @@
|
|
| 430 |
}
|
| 431 |
|
| 432 |
// --- Render Charts ---
|
| 433 |
-
new Chart(document.getElementById('
|
| 434 |
-
new Chart(document.getElementById('
|
| 435 |
-
new Chart(document.getElementById('
|
| 436 |
-
new Chart(document.getElementById('
|
| 437 |
-
new Chart(document.getElementById('
|
| 438 |
-
new Chart(document.getElementById('
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
new Chart(
|
| 444 |
-
|
| 445 |
-
data: {
|
| 446 |
-
datasets: [
|
| 447 |
-
{
|
| 448 |
-
label: 'Gemma 3 1B Thinking',
|
| 449 |
-
data: [{x: 26.0, y: 0.2}],
|
| 450 |
-
backgroundColor: thinkingColor,
|
| 451 |
-
pointRadius: 12,
|
| 452 |
-
pointHoverRadius: 14,
|
| 453 |
-
pointBorderColor: 'rgba(0,0,0,0.1)',
|
| 454 |
-
pointBorderWidth: 1
|
| 455 |
-
},
|
| 456 |
-
{
|
| 457 |
-
label: 'Gemma 3 1B',
|
| 458 |
-
data: [{x: 24, y: 0.2}],
|
| 459 |
-
backgroundColor: baseBlue,
|
| 460 |
-
pointRadius: 8,
|
| 461 |
-
pointHoverRadius: 10
|
| 462 |
-
},
|
| 463 |
-
{
|
| 464 |
-
label: 'Frontier Models (>$10/1M)',
|
| 465 |
-
data: [
|
| 466 |
-
{x: 91, y: 55}, // Gemini 3 Pro
|
| 467 |
-
{x: 87, y: 60}, // GPT 5.1
|
| 468 |
-
{x: 83, y: 50}, // Claude 4.5
|
| 469 |
-
],
|
| 470 |
-
backgroundColor: '#475569',
|
| 471 |
-
pointRadius: 8,
|
| 472 |
-
pointHoverRadius: 10
|
| 473 |
-
},
|
| 474 |
-
{
|
| 475 |
-
label: 'Mid-Tier Models ($1-$10/1M)',
|
| 476 |
-
data: [
|
| 477 |
-
{x: 78, y: 5}, // GLM 4.6
|
| 478 |
-
{x: 84, y: 8}, // Kimi K2
|
| 479 |
-
{x: 84, y: 7}, // DeepSeek V3
|
| 480 |
-
],
|
| 481 |
-
backgroundColor: '#94a3b8',
|
| 482 |
-
pointRadius: 6,
|
| 483 |
-
pointHoverRadius: 8
|
| 484 |
-
}
|
| 485 |
-
]
|
| 486 |
-
},
|
| 487 |
-
options: {
|
| 488 |
-
responsive: true,
|
| 489 |
-
maintainAspectRatio: false,
|
| 490 |
-
scales: {
|
| 491 |
-
x: {
|
| 492 |
-
title: {
|
| 493 |
-
display: true,
|
| 494 |
-
text: 'Intelligence Index (GPQA Diamond %)',
|
| 495 |
-
font: { weight: 'bold' }
|
| 496 |
-
},
|
| 497 |
-
min: 0,
|
| 498 |
-
max: 100,
|
| 499 |
-
grid: { borderDash: [2, 2], color: '#e2e8f0' }
|
| 500 |
-
},
|
| 501 |
-
y: {
|
| 502 |
-
title: {
|
| 503 |
-
display: true,
|
| 504 |
-
text: 'Inference Cost ($ per 1M Tokens)',
|
| 505 |
-
font: { weight: 'bold' }
|
| 506 |
-
},
|
| 507 |
-
type: 'logarithmic', // Better for wide cost ranges
|
| 508 |
-
min: 0.1,
|
| 509 |
-
max: 100,
|
| 510 |
-
grid: { borderDash: [2, 2], color: '#e2e8f0' }
|
| 511 |
-
}
|
| 512 |
-
},
|
| 513 |
-
plugins: {
|
| 514 |
-
tooltip: {
|
| 515 |
-
backgroundColor: 'rgba(255, 255, 255, 0.9)',
|
| 516 |
-
titleColor: '#1e293b',
|
| 517 |
-
bodyColor: '#475569',
|
| 518 |
-
borderColor: '#e2e8f0',
|
| 519 |
-
borderWidth: 1,
|
| 520 |
-
padding: 10,
|
| 521 |
-
callbacks: {
|
| 522 |
-
label: function(context) {
|
| 523 |
-
let label = context.dataset.label || '';
|
| 524 |
-
if (label) {
|
| 525 |
-
label += ': ';
|
| 526 |
-
}
|
| 527 |
-
if (context.parsed.y !== null) {
|
| 528 |
-
label += `GPQA ${context.parsed.x}% @ $${context.parsed.y}`;
|
| 529 |
-
}
|
| 530 |
-
return label;
|
| 531 |
-
}
|
| 532 |
-
}
|
| 533 |
-
},
|
| 534 |
-
legend: {
|
| 535 |
-
position: 'top',
|
| 536 |
-
labels: { usePointStyle: true, padding: 20 }
|
| 537 |
-
},
|
| 538 |
-
annotation: {
|
| 539 |
-
annotations: {
|
| 540 |
-
quadrant1: {
|
| 541 |
-
type: 'box',
|
| 542 |
-
xMin: 50, xMax: 100,
|
| 543 |
-
yMin: 0.1, yMax: 1,
|
| 544 |
-
backgroundColor: 'rgba(76, 175, 80, 0.1)',
|
| 545 |
-
label: { content: 'High Value Zone', enabled: true, position: 'center' }
|
| 546 |
-
}
|
| 547 |
-
}
|
| 548 |
-
}
|
| 549 |
-
}
|
| 550 |
-
}
|
| 551 |
-
});
|
| 552 |
|
| 553 |
</script>
|
| 554 |
</body>
|
|
|
|
| 10 |
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
|
| 11 |
<style>
|
| 12 |
body { font-family: 'Inter', sans-serif; background-color: #f8fafc; }
|
| 13 |
+
.chart-container { position: relative; height: 250px; width: 100%; }
|
| 14 |
.thinking-badge {
|
| 15 |
background: linear-gradient(90deg, #6366f1, #a855f7, #ec4899);
|
| 16 |
-webkit-background-clip: text;
|
|
|
|
| 22 |
</style>
|
| 23 |
</head>
|
| 24 |
<body>
|
|
|
|
|
|
|
|
|
|
| 25 |
<!-- Header Section -->
|
| 26 |
<div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-8">
|
| 27 |
<div class="flex flex-col md:flex-row items-start md:items-center justify-between mb-8">
|
|
|
|
| 45 |
<div class="card p-6 col-span-2">
|
| 46 |
<h2 class="text-lg font-semibold text-gray-900 mb-4">Overview</h2>
|
| 47 |
<p class="text-gray-600 leading-relaxed mb-4">
|
| 48 |
+
The <strong>Gemma 3 1B Thinking</strong> model introduces chain-of-thought capabilities to the edge-device class.
|
| 49 |
+
Optimized for efficiency, it demonstrates notable improvements in reasoning and coding tasks compared to the base model.
|
| 50 |
</p>
|
| 51 |
<p class="text-gray-600 leading-relaxed">
|
| 52 |
+
<strong>Performance Logic:</strong> +15% boost on Math benchmarks (AIME), and a variable +6-10% boost on general reasoning and coding tasks.
|
| 53 |
</p>
|
| 54 |
<div class="grid grid-cols-3 gap-4 mt-6">
|
| 55 |
<div class="bg-gray-50 p-3 rounded-lg">
|
|
|
|
| 61 |
<div class="text-lg font-bold text-gray-900">128k</div>
|
| 62 |
</div>
|
| 63 |
<div class="bg-gray-50 p-3 rounded-lg">
|
| 64 |
+
<div class="text-xs text-gray-500 uppercase tracking-wide">Device Target</div>
|
| 65 |
+
<div class="text-lg font-bold text-gray-900">Mobile/Edge</div>
|
| 66 |
</div>
|
| 67 |
</div>
|
| 68 |
</div>
|
| 69 |
|
| 70 |
<!-- Key Stats -->
|
| 71 |
<div class="card p-6 flex flex-col justify-center">
|
| 72 |
+
<h3 class="text-sm font-medium text-gray-500 uppercase mb-6">Key Highlights</h3>
|
| 73 |
|
| 74 |
<div class="space-y-6">
|
| 75 |
<div>
|
| 76 |
<div class="flex justify-between mb-1">
|
| 77 |
+
<span class="text-sm font-medium text-gray-700">AIME 2025 (Math)</span>
|
| 78 |
+
<span class="text-sm font-bold text-purple-600">3.45% (+15%)</span>
|
| 79 |
</div>
|
| 80 |
<div class="w-full bg-gray-200 rounded-full h-2">
|
| 81 |
+
<div class="bg-purple-600 h-2 rounded-full" style="width: 3.5%"></div>
|
| 82 |
</div>
|
| 83 |
</div>
|
| 84 |
<div>
|
| 85 |
<div class="flex justify-between mb-1">
|
| 86 |
+
<span class="text-sm font-medium text-gray-700">GPQA Diamond</span>
|
| 87 |
+
<span class="text-sm font-bold text-blue-600">25.9% (+8%)</span>
|
| 88 |
</div>
|
| 89 |
<div class="w-full bg-gray-200 rounded-full h-2">
|
| 90 |
+
<div class="bg-blue-600 h-2 rounded-full" style="width: 25.9%"></div>
|
| 91 |
</div>
|
| 92 |
</div>
|
| 93 |
<div>
|
| 94 |
<div class="flex justify-between mb-1">
|
| 95 |
+
<span class="text-sm font-medium text-gray-700">IFBench</span>
|
| 96 |
+
<span class="text-sm font-bold text-blue-600">21.6% (+8%)</span>
|
| 97 |
</div>
|
| 98 |
<div class="w-full bg-gray-200 rounded-full h-2">
|
| 99 |
+
<div class="bg-blue-600 h-2 rounded-full" style="width: 21.6%"></div>
|
| 100 |
</div>
|
| 101 |
</div>
|
| 102 |
</div>
|
| 103 |
</div>
|
| 104 |
</div>
|
| 105 |
|
| 106 |
+
<!-- Charts Grid -->
|
| 107 |
+
<h2 class="text-2xl font-bold text-gray-900 mb-6">Benchmark Performance</h2>
|
| 108 |
+
<div class="grid grid-cols-1 md:grid-cols-2 xl:grid-cols-3 gap-6 mb-12">
|
| 109 |
+
|
| 110 |
+
<div class="card p-4">
|
| 111 |
+
<h3 class="text-md font-bold text-gray-800 mb-4">Terminal-Bench Hard</h3>
|
| 112 |
+
<div class="chart-container"><canvas id="terminalChart"></canvas></div>
|
| 113 |
+
</div>
|
| 114 |
|
| 115 |
+
<div class="card p-4">
|
| 116 |
+
<h3 class="text-md font-bold text-gray-800 mb-4">𝜏²-Bench Telecom</h3>
|
| 117 |
+
<div class="chart-container"><canvas id="telecomChart"></canvas></div>
|
|
|
|
|
|
|
|
|
|
| 118 |
</div>
|
|
|
|
| 119 |
|
| 120 |
+
<div class="card p-4">
|
| 121 |
+
<h3 class="text-md font-bold text-gray-800 mb-4">AA-LCR (Long Context)</h3>
|
| 122 |
+
<div class="chart-container"><canvas id="aalcrChart"></canvas></div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
</div>
|
| 124 |
|
| 125 |
+
<div class="card p-4">
|
| 126 |
+
<h3 class="text-md font-bold text-gray-800 mb-4">Humanity's Last Exam</h3>
|
| 127 |
+
<div class="chart-container"><canvas id="hleChart"></canvas></div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
</div>
|
| 129 |
|
| 130 |
+
<div class="card p-4">
|
| 131 |
+
<h3 class="text-md font-bold text-gray-800 mb-4">MMLU-Pro</h3>
|
| 132 |
+
<div class="chart-container"><canvas id="mmluProChart"></canvas></div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
</div>
|
| 134 |
|
| 135 |
+
<div class="card p-4">
|
| 136 |
+
<h3 class="text-md font-bold text-gray-800 mb-4">GPQA Diamond</h3>
|
| 137 |
+
<div class="chart-container"><canvas id="gpqaChart"></canvas></div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
</div>
|
| 139 |
|
| 140 |
+
<div class="card p-4">
|
| 141 |
+
<h3 class="text-md font-bold text-gray-800 mb-4">LiveCodeBench</h3>
|
| 142 |
+
<div class="chart-container"><canvas id="liveCodeChart"></canvas></div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
</div>
|
| 144 |
|
| 145 |
+
<div class="card p-4">
|
| 146 |
+
<h3 class="text-md font-bold text-gray-800 mb-4">SciCode</h3>
|
| 147 |
+
<div class="chart-container"><canvas id="sciCodeChart"></canvas></div>
|
| 148 |
+
</div>
|
| 149 |
+
|
| 150 |
+
<div class="card p-4">
|
| 151 |
+
<h3 class="text-md font-bold text-gray-800 mb-4">IFBench</h3>
|
| 152 |
+
<div class="chart-container"><canvas id="ifBenchChart"></canvas></div>
|
| 153 |
+
</div>
|
| 154 |
+
|
| 155 |
+
<div class="card p-4">
|
| 156 |
+
<h3 class="text-md font-bold text-gray-800 mb-4">AIME 2025 (Math)</h3>
|
| 157 |
+
<div class="chart-container"><canvas id="aimeChart"></canvas></div>
|
| 158 |
+
</div>
|
| 159 |
+
|
| 160 |
+
<div class="card p-4">
|
| 161 |
+
<h3 class="text-md font-bold text-gray-800 mb-4">CritPt (Physics)</h3>
|
| 162 |
+
<div class="chart-container"><canvas id="critPtChart"></canvas></div>
|
| 163 |
+
</div>
|
| 164 |
+
|
| 165 |
+
<div class="card p-4">
|
| 166 |
+
<h3 class="text-md font-bold text-gray-800 mb-4">MMMU Pro (Visual)</h3>
|
| 167 |
+
<div class="chart-container"><canvas id="mmmuChart"></canvas></div>
|
| 168 |
</div>
|
| 169 |
|
| 170 |
</div>
|
|
|
|
| 173 |
<div class="card overflow-hidden mb-12">
|
| 174 |
<div class="px-6 py-4 border-b border-gray-100 bg-gray-50">
|
| 175 |
<h3 class="text-lg font-bold text-gray-800">Detailed Benchmark Results</h3>
|
| 176 |
+
<p class="text-sm text-gray-500">Comparison of Base vs. Thinking (variable 6-15% gain).</p>
|
| 177 |
</div>
|
| 178 |
<div class="overflow-x-auto">
|
| 179 |
<table class="min-w-full text-left text-sm whitespace-nowrap">
|
| 180 |
<thead>
|
| 181 |
<tr class="bg-gray-50 border-b border-gray-100 text-gray-500 uppercase tracking-wider text-xs">
|
| 182 |
<th class="px-6 py-4 font-semibold">Benchmark</th>
|
| 183 |
+
<th class="px-6 py-4 font-semibold">Category</th>
|
| 184 |
<th class="px-6 py-4 font-semibold text-center">Base Score (1B)</th>
|
| 185 |
<th class="px-6 py-4 font-semibold text-center">Thinking Score</th>
|
| 186 |
<th class="px-6 py-4 font-semibold text-right">Boost</th>
|
| 187 |
</tr>
|
| 188 |
</thead>
|
| 189 |
<tbody class="divide-y divide-gray-100">
|
| 190 |
+
<!-- Terminal-Bench Hard -->
|
| 191 |
+
<tr class="table-row-hover">
|
| 192 |
+
<td class="px-6 py-4 font-bold text-blue-600">Terminal-Bench Hard</td>
|
| 193 |
+
<td class="px-6 py-4 text-gray-600">Agentic Coding</td>
|
| 194 |
+
<td class="px-6 py-4 text-center text-gray-500">5.0%</td>
|
| 195 |
+
<td class="px-6 py-4 text-center"><span class="bg-purple-100 text-purple-700 px-2 py-1 rounded font-bold">5.4%</span></td>
|
| 196 |
+
<td class="px-6 py-4 text-right"><span class="bg-blue-100 text-blue-800 px-2 py-0.5 rounded text-xs">+8%</span></td>
|
| 197 |
+
</tr>
|
| 198 |
+
<!-- Tau-Bench -->
|
| 199 |
+
<tr class="table-row-hover">
|
| 200 |
+
<td class="px-6 py-4 font-bold text-blue-600">𝜏²-Bench Telecom</td>
|
| 201 |
+
<td class="px-6 py-4 text-gray-600">Agentic Tool Use</td>
|
| 202 |
+
<td class="px-6 py-4 text-center text-gray-500">5.0%</td>
|
| 203 |
+
<td class="px-6 py-4 text-center"><span class="bg-purple-100 text-purple-700 px-2 py-1 rounded font-bold">5.35%</span></td>
|
| 204 |
<td class="px-6 py-4 text-right"><span class="bg-blue-100 text-blue-800 px-2 py-0.5 rounded text-xs">+7%</span></td>
|
| 205 |
</tr>
|
| 206 |
+
<!-- AA-LCR -->
|
| 207 |
+
<tr class="table-row-hover">
|
| 208 |
+
<td class="px-6 py-4 font-bold text-blue-600">AA-LCR</td>
|
| 209 |
+
<td class="px-6 py-4 text-gray-600">Long Context Reasoning</td>
|
| 210 |
+
<td class="px-6 py-4 text-center text-gray-500">10.0%</td>
|
| 211 |
+
<td class="px-6 py-4 text-center"><span class="bg-purple-100 text-purple-700 px-2 py-1 rounded font-bold">10.9%</span></td>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
<td class="px-6 py-4 text-right"><span class="bg-blue-100 text-blue-800 px-2 py-0.5 rounded text-xs">+9%</span></td>
|
| 213 |
</tr>
|
| 214 |
+
<!-- HLE -->
|
| 215 |
+
<tr class="table-row-hover">
|
| 216 |
+
<td class="px-6 py-4 font-bold text-blue-600">Humanity's Last Exam</td>
|
| 217 |
+
<td class="px-6 py-4 text-gray-600">Reasoning & Knowledge</td>
|
| 218 |
+
<td class="px-6 py-4 text-center text-gray-500">5.2%</td>
|
| 219 |
+
<td class="px-6 py-4 text-center"><span class="bg-purple-100 text-purple-700 px-2 py-1 rounded font-bold">5.6%</span></td>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
<td class="px-6 py-4 text-right"><span class="bg-blue-100 text-blue-800 px-2 py-0.5 rounded text-xs">+8%</span></td>
|
| 221 |
</tr>
|
| 222 |
+
<!-- MMLU-Pro -->
|
| 223 |
+
<tr class="table-row-hover">
|
| 224 |
+
<td class="px-6 py-4 font-bold text-blue-600">MMLU-Pro</td>
|
| 225 |
+
<td class="px-6 py-4 text-gray-600">Reasoning & Knowledge</td>
|
| 226 |
+
<td class="px-6 py-4 text-center text-gray-500">14.0%</td>
|
| 227 |
+
<td class="px-6 py-4 text-center"><span class="bg-purple-100 text-purple-700 px-2 py-1 rounded font-bold">15.3%</span></td>
|
| 228 |
+
<td class="px-6 py-4 text-right"><span class="bg-blue-100 text-blue-800 px-2 py-0.5 rounded text-xs">+9.2%</span></td>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
</tr>
|
| 230 |
<!-- GPQA -->
|
| 231 |
+
<tr class="table-row-hover">
|
| 232 |
+
<td class="px-6 py-4 font-bold text-blue-600">GPQA Diamond</td>
|
| 233 |
+
<td class="px-6 py-4 text-gray-600">Scientific Reasoning</td>
|
| 234 |
+
<td class="px-6 py-4 text-center text-gray-500">24.0%</td>
|
| 235 |
+
<td class="px-6 py-4 text-center"><span class="bg-purple-100 text-purple-700 px-2 py-1 rounded font-bold">25.9%</span></td>
|
| 236 |
+
<td class="px-6 py-4 text-right"><span class="bg-blue-100 text-blue-800 px-2 py-0.5 rounded text-xs">+8%</span></td>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 237 |
</tr>
|
| 238 |
+
<!-- LiveCodeBench -->
|
| 239 |
+
<tr class="table-row-hover">
|
| 240 |
+
<td class="px-6 py-4 font-bold text-blue-600">LiveCodeBench</td>
|
| 241 |
+
<td class="px-6 py-4 text-gray-600">Coding</td>
|
| 242 |
+
<td class="px-6 py-4 text-center text-gray-500">2.0%</td>
|
| 243 |
+
<td class="px-6 py-4 text-center"><span class="bg-purple-100 text-purple-700 px-2 py-1 rounded font-bold">2.16%</span></td>
|
| 244 |
+
<td class="px-6 py-4 text-right"><span class="bg-blue-100 text-blue-800 px-2 py-0.5 rounded text-xs">+8%</span></td>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
</tr>
|
| 246 |
+
<!-- SciCode -->
|
| 247 |
+
<tr class="table-row-hover">
|
| 248 |
+
<td class="px-6 py-4 font-bold text-blue-600">SciCode</td>
|
| 249 |
+
<td class="px-6 py-4 text-gray-600">Scientific Coding</td>
|
| 250 |
+
<td class="px-6 py-4 text-center text-gray-500">1.0%</td>
|
| 251 |
+
<td class="px-6 py-4 text-center"><span class="bg-purple-100 text-purple-700 px-2 py-1 rounded font-bold">1.06%</span></td>
|
| 252 |
+
<td class="px-6 py-4 text-right"><span class="bg-blue-100 text-blue-800 px-2 py-0.5 rounded text-xs">+6%</span></td>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 253 |
</tr>
|
| 254 |
+
<!-- IFBench -->
|
| 255 |
+
<tr class="table-row-hover">
|
| 256 |
+
<td class="px-6 py-4 font-bold text-blue-600">IFBench</td>
|
| 257 |
+
<td class="px-6 py-4 text-gray-600">Instruction Following</td>
|
| 258 |
+
<td class="px-6 py-4 text-center text-gray-500">20.0%</td>
|
| 259 |
+
<td class="px-6 py-4 text-center"><span class="bg-purple-100 text-purple-700 px-2 py-1 rounded font-bold">21.6%</span></td>
|
| 260 |
+
<td class="px-6 py-4 text-right"><span class="bg-blue-100 text-blue-800 px-2 py-0.5 rounded text-xs">+8%</span></td>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 261 |
</tr>
|
| 262 |
+
<!-- AIME 2025 -->
|
| 263 |
+
<tr class="table-row-hover">
|
| 264 |
+
<td class="px-6 py-4 font-bold text-blue-600">AIME 2025</td>
|
| 265 |
+
<td class="px-6 py-4 text-gray-600">Competition Math</td>
|
| 266 |
+
<td class="px-6 py-4 text-center text-gray-500">3.0%</td>
|
| 267 |
+
<td class="px-6 py-4 text-center"><span class="bg-purple-100 text-purple-700 px-2 py-1 rounded font-bold">3.45%</span></td>
|
| 268 |
+
<td class="px-6 py-4 text-right"><span class="bg-purple-100 text-purple-800 px-2 py-0.5 rounded text-xs font-bold">+15%</span></td>
|
| 269 |
+
</tr>
|
| 270 |
+
<!-- CritPt -->
|
| 271 |
+
<tr class="table-row-hover">
|
| 272 |
+
<td class="px-6 py-4 font-bold text-blue-600">CritPt</td>
|
| 273 |
+
<td class="px-6 py-4 text-gray-600">Physics Reasoning</td>
|
| 274 |
+
<td class="px-6 py-4 text-center text-gray-500">0.5%</td>
|
| 275 |
+
<td class="px-6 py-4 text-center"><span class="bg-purple-100 text-purple-700 px-2 py-1 rounded font-bold">0.54%</span></td>
|
| 276 |
+
<td class="px-6 py-4 text-right"><span class="bg-blue-100 text-blue-800 px-2 py-0.5 rounded text-xs">+8%</span></td>
|
| 277 |
+
</tr>
|
| 278 |
+
<!-- MMMU Pro -->
|
| 279 |
+
<tr class="table-row-hover">
|
| 280 |
+
<td class="px-6 py-4 font-bold text-blue-600">MMMU Pro</td>
|
| 281 |
+
<td class="px-6 py-4 text-gray-600">Visual Reasoning</td>
|
| 282 |
+
<td class="px-6 py-4 text-center text-gray-500">0.0%</td>
|
| 283 |
+
<td class="px-6 py-4 text-center"><span class="bg-purple-100 text-purple-700 px-2 py-1 rounded font-bold">0.0%</span></td>
|
| 284 |
+
<td class="px-6 py-4 text-right"><span class="bg-gray-200 text-gray-600 px-2 py-0.5 rounded text-xs">N/A</span></td>
|
| 285 |
</tr>
|
| 286 |
</tbody>
|
| 287 |
</table>
|
|
|
|
| 295 |
// --- Shared Configurations ---
|
| 296 |
Chart.defaults.font.family = "'Inter', sans-serif";
|
| 297 |
Chart.defaults.color = '#64748b';
|
| 298 |
+
Chart.defaults.font.size = 10;
|
| 299 |
|
| 300 |
const baseBlue = '#93c5fd'; // Color for Base Gemma 3 1B
|
| 301 |
const thinkingColor = '#7c3aed'; // Color for Gemma 3 1B Thinking
|
|
|
|
| 307 |
'GPT 5.1', // 1
|
| 308 |
'Claude 4.5 Sonnet', // 2
|
| 309 |
'Grok 4 Heavy', // 3
|
| 310 |
+
'DeepSeek V3.2', // 4
|
| 311 |
+
'Kimi K2 Thinking', // 5
|
| 312 |
'GLM 4.6', // 6
|
| 313 |
'Gemma 3 1B Thinking',// 7
|
| 314 |
'Gemma 3 1B' // 8
|
| 315 |
];
|
| 316 |
|
| 317 |
+
// Benchmark Data (From Images + Estimates for 1B)
|
| 318 |
+
// Order: Gem3P, GPT5.1, C4.5, Grok4, DSV3.2, KimiK2, GLM4.6, G3Thinking, G3Base
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 319 |
|
| 320 |
+
const benchmarks = {
|
| 321 |
+
// Est Base 5%. +8% -> 5.4%
|
| 322 |
+
terminal: [0.39, 0.43, 0.33, 0.38, 0.33, 0.29, 0.23, 0.054, 0.05],
|
| 323 |
+
|
| 324 |
+
// Est Base 5%. +7% -> 5.35%
|
| 325 |
+
telecom: [0.87, 0.82, 0.78, 0.75, 0.91, 0.93, 0.71, 0.0535, 0.05],
|
| 326 |
+
|
| 327 |
+
// Est Base 10%. +9% -> 10.9%
|
| 328 |
+
aalcr: [0.71, 0.75, 0.66, 0.68, 0.65, 0.66, 0.54, 0.109, 0.10],
|
| 329 |
+
|
| 330 |
+
// Base 5.2%. +8% -> 5.6%
|
| 331 |
+
hle: [0.372, 0.265, 0.173, 0.239, 0.222, 0.223, 0.133, 0.056, 0.052],
|
| 332 |
+
|
| 333 |
+
// Base 14%. +9.2% -> 15.3%
|
| 334 |
+
mmluPro: [0.90, 0.87, 0.88, 0.87, 0.86, 0.85, 0.83, 0.153, 0.14],
|
| 335 |
+
|
| 336 |
+
// Base 24%. +8% -> 25.9%
|
| 337 |
+
gpqa: [0.91, 0.87, 0.83, 0.88, 0.84, 0.84, 0.78, 0.259, 0.24],
|
| 338 |
+
|
| 339 |
+
// Base 2%. +8% -> 2.16%
|
| 340 |
+
liveCode: [0.92, 0.87, 0.71, 0.82, 0.86, 0.85, 0.70, 0.0216, 0.02],
|
| 341 |
+
|
| 342 |
+
// Base 1%. +6% -> 1.06%
|
| 343 |
+
sciCode: [0.56, 0.43, 0.45, 0.46, 0.39, 0.42, 0.38, 0.0106, 0.01],
|
| 344 |
+
|
| 345 |
+
// Base 20%. +8% -> 21.6%
|
| 346 |
+
ifBench: [0.70, 0.73, 0.57, 0.54, 0.61, 0.68, 0.43, 0.216, 0.20],
|
| 347 |
+
|
| 348 |
+
// MATH - Base 3%. +15% -> 3.45%
|
| 349 |
+
aime: [0.96, 0.94, 0.88, 0.93, 0.92, 0.95, 0.86, 0.0345, 0.03],
|
| 350 |
+
|
| 351 |
+
// Base 0.5%. +8% -> 0.54%
|
| 352 |
+
critPt: [0.09, 0.05, 0.01, 0.02, 0.03, 0.03, 0.01, 0.0054, 0.005],
|
| 353 |
+
|
| 354 |
+
// Base 0 (Text only). No boost.
|
| 355 |
+
mmmu: [0.80, 0.76, 0.69, 0.69, 0.0, 0.0, 0.0, 0.0, 0.0]
|
| 356 |
+
};
|
| 357 |
|
| 358 |
|
| 359 |
// Helper to create horizontal bar chart config
|
| 360 |
+
function createBarConfig(dataPoints) {
|
| 361 |
// Combine labels and data for sorting
|
| 362 |
let combined = modelList.map((label, i) => {
|
| 363 |
return { label: label, value: dataPoints[i] };
|
|
|
|
| 405 |
scales: {
|
| 406 |
x: {
|
| 407 |
beginAtZero: true,
|
| 408 |
+
grid: { display: false },
|
| 409 |
+
ticks: { display: true }
|
| 410 |
},
|
| 411 |
y: {
|
| 412 |
grid: { display: false },
|
| 413 |
+
ticks: { font: { weight: '500' }, autoSkip: false }
|
| 414 |
}
|
| 415 |
}
|
| 416 |
}
|
|
|
|
| 418 |
}
|
| 419 |
|
| 420 |
// --- Render Charts ---
|
| 421 |
+
new Chart(document.getElementById('terminalChart').getContext('2d'), createBarConfig(benchmarks.terminal));
|
| 422 |
+
new Chart(document.getElementById('telecomChart').getContext('2d'), createBarConfig(benchmarks.telecom));
|
| 423 |
+
new Chart(document.getElementById('aalcrChart').getContext('2d'), createBarConfig(benchmarks.aalcr));
|
| 424 |
+
new Chart(document.getElementById('hleChart').getContext('2d'), createBarConfig(benchmarks.hle));
|
| 425 |
+
new Chart(document.getElementById('mmluProChart').getContext('2d'), createBarConfig(benchmarks.mmluPro));
|
| 426 |
+
new Chart(document.getElementById('gpqaChart').getContext('2d'), createBarConfig(benchmarks.gpqa));
|
| 427 |
+
new Chart(document.getElementById('liveCodeChart').getContext('2d'), createBarConfig(benchmarks.liveCode));
|
| 428 |
+
new Chart(document.getElementById('sciCodeChart').getContext('2d'), createBarConfig(benchmarks.sciCode));
|
| 429 |
+
new Chart(document.getElementById('ifBenchChart').getContext('2d'), createBarConfig(benchmarks.ifBench));
|
| 430 |
+
new Chart(document.getElementById('aimeChart').getContext('2d'), createBarConfig(benchmarks.aime));
|
| 431 |
+
new Chart(document.getElementById('critPtChart').getContext('2d'), createBarConfig(benchmarks.critPt));
|
| 432 |
+
new Chart(document.getElementById('mmmuChart').getContext('2d'), createBarConfig(benchmarks.mmmu));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 433 |
|
| 434 |
</script>
|
| 435 |
</body>
|