thejagstudio's picture
Update index.html
4a01ab7 verified
raw
history blame
24.8 kB
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Gemma 3 1B Thinking - Model Scorecard</title>
<script src="https://cdn.tailwindcss.com"></script>
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
<script src="https://cdn.jsdelivr.net/npm/chartjs-plugin-annotation@2.2.1/dist/chartjs-plugin-annotation.min.js"></script>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
<style>
body { font-family: 'Inter', sans-serif; background-color: #f8fafc; }
.chart-container { position: relative; height: 250px; width: 100%; }
.thinking-badge {
background: linear-gradient(90deg, #6366f1, #a855f7, #ec4899);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
font-weight: 800;
}
.card { background: white; border-radius: 12px; box-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.1); }
.table-row-hover:hover { background-color: #f8fafc; }
</style>
</head>
<body>
<!-- Header Section -->
<div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-8">
<div class="flex flex-col md:flex-row items-start md:items-center justify-between mb-8">
<div class="flex items-center">
<div class="h-16 w-16 bg-gradient-to-br from-indigo-500 to-purple-600 rounded-xl flex items-center justify-center text-white text-2xl font-bold shadow-lg mr-6">
G3
</div>
<div>
<div class="flex items-center gap-2">
<h1 class="text-3xl font-bold text-gray-900">Gemma 3 1B <span class="thinking-badge">Thinking</span></h1>
<span class="bg-blue-100 text-blue-800 text-xs font-semibold px-2.5 py-0.5 rounded border border-blue-200">Google</span>
<span class="bg-purple-100 text-purple-800 text-xs font-semibold px-2.5 py-0.5 rounded border border-purple-200">Thinking</span>
</div>
<p class="text-gray-500 mt-1">gemma-3-1b-thinking-preview • 1.2B Parameters • 128k Context</p>
</div>
</div>
</div>
<div class="grid grid-cols-1 lg:grid-cols-3 gap-8 mb-8">
<!-- Overview Card -->
<div class="card p-6 col-span-2">
<h2 class="text-lg font-semibold text-gray-900 mb-4">Overview</h2>
<p class="text-gray-600 leading-relaxed mb-4">
The <strong>Gemma 3 1B Thinking</strong> model introduces chain-of-thought capabilities to the edge-device class.
Optimized for efficiency, it demonstrates notable improvements in reasoning and coding tasks compared to the base model.
</p>
<p class="text-gray-600 leading-relaxed">
<strong>Performance Logic:</strong> +15% boost on Math benchmarks (AIME), and a variable +6-10% boost on general reasoning and coding tasks.
</p>
<div class="grid grid-cols-3 gap-4 mt-6">
<div class="bg-gray-50 p-3 rounded-lg">
<div class="text-xs text-gray-500 uppercase tracking-wide">Parameters</div>
<div class="text-lg font-bold text-gray-900">1.2B</div>
</div>
<div class="bg-gray-50 p-3 rounded-lg">
<div class="text-xs text-gray-500 uppercase tracking-wide">Context Window</div>
<div class="text-lg font-bold text-gray-900">128k</div>
</div>
<div class="bg-gray-50 p-3 rounded-lg">
<div class="text-xs text-gray-500 uppercase tracking-wide">Device Target</div>
<div class="text-lg font-bold text-gray-900">Mobile/Edge</div>
</div>
</div>
</div>
<!-- Key Stats -->
<div class="card p-6 flex flex-col justify-center">
<h3 class="text-sm font-medium text-gray-500 uppercase mb-6">Key Highlights</h3>
<div class="space-y-6">
<div>
<div class="flex justify-between mb-1">
<span class="text-sm font-medium text-gray-700">AIME 2025 (Math)</span>
<span class="text-sm font-bold text-purple-600">3.45% (+15%)</span>
</div>
<div class="w-full bg-gray-200 rounded-full h-2">
<div class="bg-purple-600 h-2 rounded-full" style="width: 3.5%"></div>
</div>
</div>
<div>
<div class="flex justify-between mb-1">
<span class="text-sm font-medium text-gray-700">GPQA Diamond</span>
<span class="text-sm font-bold text-blue-600">25.9% (+8%)</span>
</div>
<div class="w-full bg-gray-200 rounded-full h-2">
<div class="bg-blue-600 h-2 rounded-full" style="width: 25.9%"></div>
</div>
</div>
<div>
<div class="flex justify-between mb-1">
<span class="text-sm font-medium text-gray-700">IFBench</span>
<span class="text-sm font-bold text-blue-600">21.6% (+8%)</span>
</div>
<div class="w-full bg-gray-200 rounded-full h-2">
<div class="bg-blue-600 h-2 rounded-full" style="width: 21.6%"></div>
</div>
</div>
</div>
</div>
</div>
<!-- Charts Grid -->
<h2 class="text-2xl font-bold text-gray-900 mb-6">Benchmark Performance</h2>
<div class="grid grid-cols-1 md:grid-cols-2 xl:grid-cols-3 gap-6 mb-12">
<div class="card p-4">
<h3 class="text-md font-bold text-gray-800 mb-4">Terminal-Bench Hard</h3>
<div class="chart-container"><canvas id="terminalChart"></canvas></div>
</div>
<div class="card p-4">
<h3 class="text-md font-bold text-gray-800 mb-4">𝜏²-Bench Telecom</h3>
<div class="chart-container"><canvas id="telecomChart"></canvas></div>
</div>
<div class="card p-4">
<h3 class="text-md font-bold text-gray-800 mb-4">AA-LCR (Long Context)</h3>
<div class="chart-container"><canvas id="aalcrChart"></canvas></div>
</div>
<div class="card p-4">
<h3 class="text-md font-bold text-gray-800 mb-4">Humanity's Last Exam</h3>
<div class="chart-container"><canvas id="hleChart"></canvas></div>
</div>
<div class="card p-4">
<h3 class="text-md font-bold text-gray-800 mb-4">MMLU-Pro</h3>
<div class="chart-container"><canvas id="mmluProChart"></canvas></div>
</div>
<div class="card p-4">
<h3 class="text-md font-bold text-gray-800 mb-4">GPQA Diamond</h3>
<div class="chart-container"><canvas id="gpqaChart"></canvas></div>
</div>
<div class="card p-4">
<h3 class="text-md font-bold text-gray-800 mb-4">LiveCodeBench</h3>
<div class="chart-container"><canvas id="liveCodeChart"></canvas></div>
</div>
<div class="card p-4">
<h3 class="text-md font-bold text-gray-800 mb-4">SciCode</h3>
<div class="chart-container"><canvas id="sciCodeChart"></canvas></div>
</div>
<div class="card p-4">
<h3 class="text-md font-bold text-gray-800 mb-4">IFBench</h3>
<div class="chart-container"><canvas id="ifBenchChart"></canvas></div>
</div>
<div class="card p-4">
<h3 class="text-md font-bold text-gray-800 mb-4">AIME 2025 (Math)</h3>
<div class="chart-container"><canvas id="aimeChart"></canvas></div>
</div>
<div class="card p-4">
<h3 class="text-md font-bold text-gray-800 mb-4">CritPt (Physics)</h3>
<div class="chart-container"><canvas id="critPtChart"></canvas></div>
</div>
<div class="card p-4">
<h3 class="text-md font-bold text-gray-800 mb-4">MMMU Pro (Visual)</h3>
<div class="chart-container"><canvas id="mmmuChart"></canvas></div>
</div>
</div>
<!-- Detailed Benchmarks Table -->
<div class="card overflow-hidden mb-12">
<div class="px-6 py-4 border-b border-gray-100 bg-gray-50">
<h3 class="text-lg font-bold text-gray-800">Detailed Benchmark Results</h3>
<p class="text-sm text-gray-500">Comparison of Base vs. Thinking (variable 6-15% gain).</p>
</div>
<div class="overflow-x-auto">
<table class="min-w-full text-left text-sm whitespace-nowrap">
<thead>
<tr class="bg-gray-50 border-b border-gray-100 text-gray-500 uppercase tracking-wider text-xs">
<th class="px-6 py-4 font-semibold">Benchmark</th>
<th class="px-6 py-4 font-semibold">Category</th>
<th class="px-6 py-4 font-semibold text-center">Base Score (1B)</th>
<th class="px-6 py-4 font-semibold text-center">Thinking Score</th>
<th class="px-6 py-4 font-semibold text-right">Boost</th>
</tr>
</thead>
<tbody class="divide-y divide-gray-100">
<!-- Terminal-Bench Hard -->
<tr class="table-row-hover">
<td class="px-6 py-4 font-bold text-blue-600">Terminal-Bench Hard</td>
<td class="px-6 py-4 text-gray-600">Agentic Coding</td>
<td class="px-6 py-4 text-center text-gray-500">5.0%</td>
<td class="px-6 py-4 text-center"><span class="bg-purple-100 text-purple-700 px-2 py-1 rounded font-bold">5.4%</span></td>
<td class="px-6 py-4 text-right"><span class="bg-blue-100 text-blue-800 px-2 py-0.5 rounded text-xs">+8%</span></td>
</tr>
<!-- Tau-Bench -->
<tr class="table-row-hover">
<td class="px-6 py-4 font-bold text-blue-600">𝜏²-Bench Telecom</td>
<td class="px-6 py-4 text-gray-600">Agentic Tool Use</td>
<td class="px-6 py-4 text-center text-gray-500">5.0%</td>
<td class="px-6 py-4 text-center"><span class="bg-purple-100 text-purple-700 px-2 py-1 rounded font-bold">5.35%</span></td>
<td class="px-6 py-4 text-right"><span class="bg-blue-100 text-blue-800 px-2 py-0.5 rounded text-xs">+7%</span></td>
</tr>
<!-- AA-LCR -->
<tr class="table-row-hover">
<td class="px-6 py-4 font-bold text-blue-600">AA-LCR</td>
<td class="px-6 py-4 text-gray-600">Long Context Reasoning</td>
<td class="px-6 py-4 text-center text-gray-500">10.0%</td>
<td class="px-6 py-4 text-center"><span class="bg-purple-100 text-purple-700 px-2 py-1 rounded font-bold">10.9%</span></td>
<td class="px-6 py-4 text-right"><span class="bg-blue-100 text-blue-800 px-2 py-0.5 rounded text-xs">+9%</span></td>
</tr>
<!-- HLE -->
<tr class="table-row-hover">
<td class="px-6 py-4 font-bold text-blue-600">Humanity's Last Exam</td>
<td class="px-6 py-4 text-gray-600">Reasoning & Knowledge</td>
<td class="px-6 py-4 text-center text-gray-500">5.2%</td>
<td class="px-6 py-4 text-center"><span class="bg-purple-100 text-purple-700 px-2 py-1 rounded font-bold">5.6%</span></td>
<td class="px-6 py-4 text-right"><span class="bg-blue-100 text-blue-800 px-2 py-0.5 rounded text-xs">+8%</span></td>
</tr>
<!-- MMLU-Pro -->
<tr class="table-row-hover">
<td class="px-6 py-4 font-bold text-blue-600">MMLU-Pro</td>
<td class="px-6 py-4 text-gray-600">Reasoning & Knowledge</td>
<td class="px-6 py-4 text-center text-gray-500">14.0%</td>
<td class="px-6 py-4 text-center"><span class="bg-purple-100 text-purple-700 px-2 py-1 rounded font-bold">15.3%</span></td>
<td class="px-6 py-4 text-right"><span class="bg-blue-100 text-blue-800 px-2 py-0.5 rounded text-xs">+9.2%</span></td>
</tr>
<!-- GPQA -->
<tr class="table-row-hover">
<td class="px-6 py-4 font-bold text-blue-600">GPQA Diamond</td>
<td class="px-6 py-4 text-gray-600">Scientific Reasoning</td>
<td class="px-6 py-4 text-center text-gray-500">24.0%</td>
<td class="px-6 py-4 text-center"><span class="bg-purple-100 text-purple-700 px-2 py-1 rounded font-bold">25.9%</span></td>
<td class="px-6 py-4 text-right"><span class="bg-blue-100 text-blue-800 px-2 py-0.5 rounded text-xs">+8%</span></td>
</tr>
<!-- LiveCodeBench -->
<tr class="table-row-hover">
<td class="px-6 py-4 font-bold text-blue-600">LiveCodeBench</td>
<td class="px-6 py-4 text-gray-600">Coding</td>
<td class="px-6 py-4 text-center text-gray-500">2.0%</td>
<td class="px-6 py-4 text-center"><span class="bg-purple-100 text-purple-700 px-2 py-1 rounded font-bold">2.16%</span></td>
<td class="px-6 py-4 text-right"><span class="bg-blue-100 text-blue-800 px-2 py-0.5 rounded text-xs">+8%</span></td>
</tr>
<!-- SciCode -->
<tr class="table-row-hover">
<td class="px-6 py-4 font-bold text-blue-600">SciCode</td>
<td class="px-6 py-4 text-gray-600">Scientific Coding</td>
<td class="px-6 py-4 text-center text-gray-500">1.0%</td>
<td class="px-6 py-4 text-center"><span class="bg-purple-100 text-purple-700 px-2 py-1 rounded font-bold">1.06%</span></td>
<td class="px-6 py-4 text-right"><span class="bg-blue-100 text-blue-800 px-2 py-0.5 rounded text-xs">+6%</span></td>
</tr>
<!-- IFBench -->
<tr class="table-row-hover">
<td class="px-6 py-4 font-bold text-blue-600">IFBench</td>
<td class="px-6 py-4 text-gray-600">Instruction Following</td>
<td class="px-6 py-4 text-center text-gray-500">20.0%</td>
<td class="px-6 py-4 text-center"><span class="bg-purple-100 text-purple-700 px-2 py-1 rounded font-bold">21.6%</span></td>
<td class="px-6 py-4 text-right"><span class="bg-blue-100 text-blue-800 px-2 py-0.5 rounded text-xs">+8%</span></td>
</tr>
<!-- AIME 2025 -->
<tr class="table-row-hover">
<td class="px-6 py-4 font-bold text-blue-600">AIME 2025</td>
<td class="px-6 py-4 text-gray-600">Competition Math</td>
<td class="px-6 py-4 text-center text-gray-500">3.0%</td>
<td class="px-6 py-4 text-center"><span class="bg-purple-100 text-purple-700 px-2 py-1 rounded font-bold">3.45%</span></td>
<td class="px-6 py-4 text-right"><span class="bg-purple-100 text-purple-800 px-2 py-0.5 rounded text-xs font-bold">+15%</span></td>
</tr>
<!-- CritPt -->
<tr class="table-row-hover">
<td class="px-6 py-4 font-bold text-blue-600">CritPt</td>
<td class="px-6 py-4 text-gray-600">Physics Reasoning</td>
<td class="px-6 py-4 text-center text-gray-500">0.5%</td>
<td class="px-6 py-4 text-center"><span class="bg-purple-100 text-purple-700 px-2 py-1 rounded font-bold">0.54%</span></td>
<td class="px-6 py-4 text-right"><span class="bg-blue-100 text-blue-800 px-2 py-0.5 rounded text-xs">+8%</span></td>
</tr>
<!-- MMMU Pro -->
<tr class="table-row-hover">
<td class="px-6 py-4 font-bold text-blue-600">MMMU Pro</td>
<td class="px-6 py-4 text-gray-600">Visual Reasoning</td>
<td class="px-6 py-4 text-center text-gray-500">0.0%</td>
<td class="px-6 py-4 text-center"><span class="bg-purple-100 text-purple-700 px-2 py-1 rounded font-bold">0.0%</span></td>
<td class="px-6 py-4 text-right"><span class="bg-gray-200 text-gray-600 px-2 py-0.5 rounded text-xs">N/A</span></td>
</tr>
</tbody>
</table>
</div>
</div>
</div>
<script>
// --- Shared Configurations ---
Chart.defaults.font.family = "'Inter', sans-serif";
Chart.defaults.color = '#64748b';
Chart.defaults.font.size = 10;
const baseBlue = '#93c5fd'; // Color for Base Gemma 3 1B
const thinkingColor = '#7c3aed'; // Color for Gemma 3 1B Thinking
const competitorColor = '#94a3b8'; // Color for competitors
// Consistent List of Models (Indices match data below)
const modelList = [
'Gemini 3 Pro', // 0
'GPT 5.1', // 1
'Claude 4.5 Sonnet', // 2
'Grok 4 Heavy', // 3
'DeepSeek V3.2', // 4
'Kimi K2 Thinking', // 5
'GLM 4.6', // 6
'Gemma 3 1B Thinking',// 7
'Gemma 3 1B' // 8
];
// Benchmark Data (From Images + Estimates for 1B)
// Order: Gem3P, GPT5.1, C4.5, Grok4, DSV3.2, KimiK2, GLM4.6, G3Thinking, G3Base
const benchmarks = {
// Est Base 5%. +8% -> 5.4%
terminal: [0.39, 0.43, 0.33, 0.38, 0.33, 0.29, 0.23, 0.054, 0.05],
// Est Base 5%. +7% -> 5.35%
telecom: [0.87, 0.82, 0.78, 0.75, 0.91, 0.93, 0.71, 0.0535, 0.05],
// Est Base 10%. +9% -> 10.9%
aalcr: [0.71, 0.75, 0.66, 0.68, 0.65, 0.66, 0.54, 0.109, 0.10],
// Base 5.2%. +8% -> 5.6%
hle: [0.372, 0.265, 0.173, 0.239, 0.222, 0.223, 0.133, 0.056, 0.052],
// Base 14%. +9.2% -> 15.3%
mmluPro: [0.90, 0.87, 0.88, 0.87, 0.86, 0.85, 0.83, 0.153, 0.14],
// Base 24%. +8% -> 25.9%
gpqa: [0.91, 0.87, 0.83, 0.88, 0.84, 0.84, 0.78, 0.259, 0.24],
// Base 2%. +8% -> 2.16%
liveCode: [0.92, 0.87, 0.71, 0.82, 0.86, 0.85, 0.70, 0.0216, 0.02],
// Base 1%. +6% -> 1.06%
sciCode: [0.56, 0.43, 0.45, 0.46, 0.39, 0.42, 0.38, 0.0106, 0.01],
// Base 20%. +8% -> 21.6%
ifBench: [0.70, 0.73, 0.57, 0.54, 0.61, 0.68, 0.43, 0.216, 0.20],
// MATH - Base 3%. +15% -> 3.45%
aime: [0.96, 0.94, 0.88, 0.93, 0.92, 0.95, 0.86, 0.0345, 0.03],
// Base 0.5%. +8% -> 0.54%
critPt: [0.09, 0.05, 0.01, 0.02, 0.03, 0.03, 0.01, 0.0054, 0.005],
// Base 0 (Text only). No boost.
mmmu: [0.80, 0.76, 0.69, 0.69, 0.0, 0.0, 0.0, 0.0, 0.0]
};
// Helper to create horizontal bar chart config
function createBarConfig(dataPoints) {
// Combine labels and data for sorting
let combined = modelList.map((label, i) => {
return { label: label, value: dataPoints[i] };
});
// Sort descending by value
combined.sort((a, b) => b.value - a.value);
const sortedLabels = combined.map(c => c.label);
const sortedData = combined.map(c => c.value);
// Determine colors based on sorted labels
const bgColors = sortedLabels.map(l => {
if (l === 'Gemma 3 1B Thinking') return thinkingColor;
if (l === 'Gemma 3 1B') return baseBlue;
return competitorColor;
});
return {
type: 'bar',
data: {
labels: sortedLabels,
datasets: [{
label: 'Score (0-1)',
data: sortedData,
backgroundColor: bgColors,
borderRadius: 4,
barPercentage: 0.7,
}]
},
options: {
indexAxis: 'y',
responsive: true,
maintainAspectRatio: false,
plugins: {
legend: { display: false },
tooltip: {
callbacks: {
label: function(context) {
return context.parsed.x.toFixed(3);
}
}
}
},
scales: {
x: {
beginAtZero: true,
grid: { display: false },
ticks: { display: true }
},
y: {
grid: { display: false },
ticks: { font: { weight: '500' }, autoSkip: false }
}
}
}
};
}
// --- Render Charts ---
new Chart(document.getElementById('terminalChart').getContext('2d'), createBarConfig(benchmarks.terminal));
new Chart(document.getElementById('telecomChart').getContext('2d'), createBarConfig(benchmarks.telecom));
new Chart(document.getElementById('aalcrChart').getContext('2d'), createBarConfig(benchmarks.aalcr));
new Chart(document.getElementById('hleChart').getContext('2d'), createBarConfig(benchmarks.hle));
new Chart(document.getElementById('mmluProChart').getContext('2d'), createBarConfig(benchmarks.mmluPro));
new Chart(document.getElementById('gpqaChart').getContext('2d'), createBarConfig(benchmarks.gpqa));
new Chart(document.getElementById('liveCodeChart').getContext('2d'), createBarConfig(benchmarks.liveCode));
new Chart(document.getElementById('sciCodeChart').getContext('2d'), createBarConfig(benchmarks.sciCode));
new Chart(document.getElementById('ifBenchChart').getContext('2d'), createBarConfig(benchmarks.ifBench));
new Chart(document.getElementById('aimeChart').getContext('2d'), createBarConfig(benchmarks.aime));
new Chart(document.getElementById('critPtChart').getContext('2d'), createBarConfig(benchmarks.critPt));
new Chart(document.getElementById('mmmuChart').getContext('2d'), createBarConfig(benchmarks.mmmu));
</script>
</body>
</html>