Tiny-ML-Leaderboard / index.html
MihaiPopa-1's picture
Add PotentSulfurLM, as a followup to CinnabarLM!
d11bc14 verified
raw
history blame
16 kB
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Tiny-ML Leaderboard</title>
<script src="https://cdn.jsdelivr.net/npm/chart.js@4"></script>
<style>
:root {
--bg: #0d1117;
--card: #161b22;
--border: #30363d;
--text: #c9d1d9;
--accent: #58a6ff;
--green: #3fb950;
--orange: #d29922;
--compactai: #3fb950;
--supralabs: #58a6ff;
--axiomiclabs: #c2b6ff;
--mihaipopa: #93c6aa;
}
* { margin: 0; padding: 0; box-sizing: border-box; }
body { font-family: -apple-system,BlinkMacSystemFont,'Segoe UI',Helvetica,Arial,sans-serif; background: var(--bg); color: var(--text); padding: 2rem; }
.container { max-width: 1200px; margin: 0 auto; }
h1 { font-size: 1.8rem; margin-bottom: .25rem; }
.subtitle { color: #8b949e; margin-bottom: 2rem; font-size: .95rem; }
.note { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; margin-bottom: 2rem; font-size: .9rem; line-height: 1.5; }
.note a { color: var(--accent); }
.section-title { font-size: 1.2rem; margin: 2rem 0 1rem; padding-bottom: .5rem; border-bottom: 1px solid var(--border); }
.chart-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 1.5rem; margin-bottom: 2rem; }
.chart-card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
.chart-card.full { grid-column: 1 / -1; }
.chart-card h3 { font-size: .95rem; margin-bottom: .75rem; color: var(--text); }
.chart-card canvas { max-height: 360px; }
.legend { display: flex; gap: 1.5rem; margin-bottom: .75rem; font-size: .8rem; }
.legend-item { display: flex; align-items: center; gap: .4rem; }
.legend-dot { width: 10px; height: 10px; border-radius: 50%; }
table { width: 100%; border-collapse: collapse; font-size: .85rem; }
th { background: var(--card); padding: .75rem .5rem; text-align: center; border: 1px solid var(--border); font-weight: 600; white-space: nowrap; }
td { padding: .6rem .5rem; text-align: center; border: 1px solid var(--border); }
td:first-child, th:first-child { text-align: left; padding-left: 1rem; }
tr:hover td { background: rgba(88,166,255,.05); }
.org-tag { display: inline-block; padding: .15rem .5rem; border-radius: 4px; font-size: .75rem; font-weight: 600; }
.org-compactai { background: #3fb95033; color: var(--compactai); }
.org-supralabs { background: #1f6feb33; color: var(--supralabs); }
.org-axiomiclabs { background: #8957e533; color: var(--axiomiclabs); }
.org-mihaipopa { background: #93c6aa33; color: var(--mihaipopa); }
.row-compactai { border-left: 3px solid var(--compactai); }
.row-supralabs { border-left: 3px solid var(--supralabs); }
.row-axiomiclabs { border-left: 3px solid var(--axiomiclabs); }
.row-mihaipopa { border-left: 3px solid var(--mihaipopa); }
.best { color: var(--green); font-weight: 700; }
.na { color: #484f58; font-style: italic; }
.links a { color: var(--accent); text-decoration: none; }
.links a:hover { text-decoration: underline; }
footer { margin-top: 3rem; padding-top: 1.5rem; border-top: 1px solid var(--border); font-size: .8rem; color: #484f58; text-align: center; }
footer a { color: var(--accent); }
.methodology { margin-top: 2rem; }
.methodology h3 { margin-bottom: .5rem; }
.methodology p { font-size: .85rem; line-height: 1.6; color: #8b949e; }
@media (max-width: 768px) {
body { padding: 1rem; }
.chart-grid { grid-template-columns: 1fr; }
table { font-size: .75rem; }
th, td { padding: .4rem .3rem; }
}
</style>
</head>
<body>
<div class="container">
<h1>Tiny-ML Leaderboard</h1>
<p class="subtitle">Sub-100M parameter language models, same eval harness, transparent methodology.</p>
<div class="note">
<strong>Why this exists.</strong> The community deserves a single place to compare tiny LMs fairly.
We include every model with verifiable benchmarks — ours, our competitors', yours.
<a href="https://huggingface.co/spaces/CompactAI-O/Tiny-ML-Leaderboard/discussions" target="_blank">Submit a model via PR.</a><br>
</div>
<h2 class="section-title">Detailed Results</h2>
<table>
<thead>
<tr>
<th>Model</th>
<th>Org</th>
<th>Params</th>
<th>WikiText-2 ↓</th>
<th>BLiMP ↑</th>
<th>ARC-Easy ↑</th>
<th>Training Tokens</th>
<th>Links</th>
</tr>
</thead>
<tbody id="leaderboard-body">
<!-- Dynamic content -->
</tbody>
</table>
<h2 class="section-title">Benchmark Overview</h2>
<div class="legend">
<span class="legend-item"><span class="legend-dot" style="background:var(--compactai)"></span> CompactAI</span>
<span class="legend-item"><span class="legend-dot" style="background:var(--supralabs)"></span> SupraLabs</span>
<span class="legend-item"><span class="legend-dot" style="background:var(--axiomiclabs)"></span> Axiomic Labs</span>
<span class="legend-item"><span class="legend-dot" style="background:var(--mihaipopa)"></span> Mihai Popa</span>
</div>
<div class="chart-grid">
<div class="chart-card">
<h3>BLiMP ↑ (higher is better)</h3>
<canvas id="blimpChart"></canvas>
</div>
<div class="chart-card">
<h3>ARC-Easy ↑ (higher is better)</h3>
<canvas id="arcChart"></canvas>
</div>
<div class="chart-card full">
<h3>WikiText-2 ↓ (lower is better)</h3>
<canvas id="wikiChart"></canvas>
</div>
</div>
<div class="methodology" style="margin-top:1.5rem">
<h3>Add your model</h3>
<p>
Open a PR on this Space with your model's benchmark results and reproduction steps.
We require: params, training data provenance, eval harness used, and scores for at least 2 of the 3 benchmarks.
</p>
</div>
<footer>
<p>Tiny-ML Leaderboard by <a href="https://huggingface.co/CompactAI-O">CompactAI</a>.
Not affiliated with SupraLabs or LH-Tech-AI.
All benchmark data is self-reported by model authors unless otherwise noted.</p>
</footer>
</div>
<script>
const models = [
{
name: 'Supra-50M-Instruct',
org: 'supralabs',
params: '51.8M',
blimp: 76.3,
arc: 52.2,
wiki: null,
tokens: '20B',
links: { card: 'https://huggingface.co/SupraLabs/Supra-50M-Instruct', base: 'https://huggingface.co/SupraLabs/Supra-50M-Base' }
},
{
name: 'GPT-S-5M',
org: 'axiomiclabs',
params: '5.16M',
blimp: 72.27,
arc: 35.69,
wiki: 2.56,
tokens: '25B',
links: { card: 'https://huggingface.co/AxiomicLabs/GPT-S-5M' }
},
{
name: 'Glint-1.3 (merged)',
org: 'compactai',
params: '982K',
blimp: 68.7,
arc: 32.5,
wiki: 3.08,
tokens: '100B',
links: { card: 'https://huggingface.co/CompactAI-O/Glint-1.3' }
},
{
name: 'Supra-Mini-v5',
org: 'supralabs',
params: '7.87M',
blimp: 63.5,
arc: 34.4,
wiki: 2.66,
tokens: '—',
links: { card: 'https://huggingface.co/SupraLabs/Supra-Mini-v5-8M' }
},
{
name: 'CinnabarLM 4M',
org: 'mihaipopa',
params: '4.23M',
blimp: 62.87,
arc: 27.36,
wiki: 2.226,
tokens: '~80M',
links: { card: 'https://huggingface.co/MihaiPopa-1/CinnabarLM-4M-Base' }
},
{
name: 'Glint-1',
org: 'compactai',
params: '1M',
blimp: 61.2,
arc: 32.0,
wiki: 4.07,
tokens: '100B',
links: { card: 'https://huggingface.co/CompactAI-O/Glint-1' }
},
{
name: 'Supra-Mini-v4',
org: 'supralabs',
params: '2.62M',
blimp: 60.7,
arc: 31.5,
wiki: 3.17,
tokens: '—',
links: { card: 'https://huggingface.co/SupraLabs/Supra-Mini-v4-2M' }
},
{
name: 'Glint-0.4',
org: 'compactai',
params: '1M',
blimp: 58.5,
arc: 31.0,
wiki: 5.24,
tokens: '10B',
links: { card: 'https://huggingface.co/CompactAI-O/Glint-0.4' }
},
{
name: 'Supra-Mini-v3',
org: 'supralabs',
params: '468K',
blimp: 55.3,
arc: 27.3,
wiki: 4.49,
tokens: '—',
links: { card: 'https://huggingface.co/SupraLabs/Supra-Mini-v3-0.5M' }
},
{
name: 'Supra-Mini-v2',
org: 'supralabs',
params: '168K',
blimp: 53.5,
arc: 26.8,
wiki: 7.79,
tokens: '—',
links: { card: 'https://huggingface.co/SupraLabs/Supra-Mini-v2-0.1M' }
},
{
name: 'Glint-0.2',
org: 'compactai',
params: '1M',
blimp: 49.8,
arc: 27.0,
wiki: null,
tokens: '~100M',
links: { card: 'https://huggingface.co/CompactAI-O/Glint-0.2' }
},
{
name: 'Glint-0.3',
org: 'compactai',
params: '1M',
blimp: 47.3,
arc: 25.5,
wiki: null,
tokens: '~100M',
links: { card: 'https://huggingface.co/CompactAI-O/Glint-0.3' }
},
{
name: 'Glint-0.1',
org: 'compactai',
params: '1M',
blimp: 46.7,
arc: 21.0,
wiki: null,
tokens: '~100M',
links: { card: 'https://huggingface.co/CompactAI-O/Glint-0.1' }
},
{
name: 'Shard-1',
org: 'compactai',
params: '54.5M',
blimp: null,
arc: null,
wiki: null,
tokens: '~20B',
links: { card: 'https://huggingface.co/CompactAI-O/Shard-1' }
},
{
name: 'CinnabarLM 1.5M',
org: 'mihaipopa',
params: '1.71M',
blimp: 60.51,
arc: 26.68,
wiki: 2.35,
tokens: '~50M',
links: { card: 'https://huggingface.co/MihaiPopa-1/CinnabarLM-1.5M-Base' }
},
{
name: 'CinnabarLM 1.4M',
org: 'mihaipopa',
params: '1.51M',
blimp: 60.7,
arc: 24.58,
wiki: 2.316,
tokens: '~30M',
links: { card: 'https://huggingface.co/MihaiPopa-1/CinnabarLM-1.4M-Base' }
},
{
name: 'PotentSulfurLM 500K',
org: 'mihaipopa',
params: '587K',
blimp: 59.01,
arc: 27.06,
wiki: 2.613,
tokens: '~200M',
links: { card: 'https://huggingface.co/MihaiPopa-1/PotentSulfurLM-500K-Base' }
},
{
name: 'StorySupra-10M',
org: 'supralabs',
params: '12.6M',
blimp: null,
arc: null,
wiki: null,
tokens: '—',
links: { card: 'https://huggingface.co/SupraLabs/StorySupra-10M' }
},
{
name: 'DistillSupra-0.2M',
org: 'supralabs',
params: '289K',
blimp: null,
arc: null,
wiki: null,
tokens: '—',
links: { card: 'https://huggingface.co/SupraLabs/DistillSupra-0.2M' }
},
{
name: 'MicroSupra-1k',
org: 'supralabs',
params: '1K',
blimp: null,
arc: null,
wiki: null,
tokens: '—',
links: { card: 'https://huggingface.co/SupraLabs/MicroSupra-1k' }
},
{
name: 'TrueMath',
org: 'compactai',
params: '1-layer',
blimp: null,
arc: null,
wiki: null,
tokens: 'synthetic',
links: { card: 'https://huggingface.co/CompactAI-O/TrueMath' }
}
];
const orgNameMap = {
compactai: 'CompactAI',
supralabs: 'SupraLabs',
axiomiclabs: 'Axiomic Labs',
mihaipopa: 'Mihai Popa'
};
const colorMap = {
compactai: '#3fb950',
supralabs: '#58a6ff',
axiomiclabs: '#c2b6ff',
mihaipopa: '#93c6aa'
};
const bgMap = {
compactai: 'rgba(63,185,80,0.7)',
supralabs: 'rgba(88,166,255,0.7)',
axiomiclabs: 'rgba(194,182,255,0.7)',
mihaipopa: 'rgba(99,232,173,0.7)'
};
function getScore(m) {
const b = m.blimp || 0;
const a = m.arc || 0;
if (b === 0 && a === 0) return -1;
if (b === 0) return a;
if (a === 0) return b;
return (b + a) / 2;
}
const worstGreen = [255, 255, 255];
const bestGreen = [63, 185, 80];
function getColor(value, min, max, lowerIsBetter, useLog = false) {
if (value === null || isNaN(value) || min === max) return '';
let v = value, mn = min, mx = max;
if (useLog) { v = Math.log(v); mn = Math.log(mn); mx = Math.log(mx); }
let percent = (v - mn) / (mx - mn);
if (lowerIsBetter) percent = 1 - percent;
const adjusted = Math.pow(Math.max(0, Math.min(1, percent)), 2.5);
const r = Math.round(worstGreen[0] + (bestGreen[0] - worstGreen[0]) * adjusted);
const g = Math.round(worstGreen[1] + (bestGreen[1] - worstGreen[1]) * adjusted);
const b = Math.round(worstGreen[2] + (bestGreen[2] - worstGreen[2]) * adjusted);
return `color: rgb(${r}, ${g}, ${b})`;
}
function renderTable() {
const tbody = document.getElementById('leaderboard-body');
const sortedModels = [...models].sort((a, b) => getScore(b) - getScore(a));
const blimpVals = models.map(m => m.blimp).filter(v => v !== null && !isNaN(v));
const arcVals = models.map(m => m.arc).filter(v => v !== null && !isNaN(v));
const wikiVals = models.map(m => m.wiki).filter(v => v !== null && !isNaN(v));
const blimpMin = Math.min(...blimpVals), blimpMax = Math.max(...blimpVals);
const arcMin = Math.min(...arcVals), arcMax = Math.max(...arcVals);
const wikiMin = Math.min(...wikiVals), wikiMax = Math.max(...wikiVals);
tbody.innerHTML = sortedModels.map(m => {
const isBestBlimp = m.blimp && m.blimp === blimpMax;
const isBestArc = m.arc && m.arc === arcMax;
const isBestWiki = m.wiki && m.wiki === wikiMin;
return `
<tr class="row-${m.org}">
<td><strong>${m.name}</strong></td>
<td><span class="org-tag org-${m.org}">${orgNameMap[m.org]}</span></td>
<td>${m.params}</td>
<td class="${m.wiki ? '' : 'na'}" style="${getColor(m.wiki, wikiMin, wikiMax, true, true)}">${m.wiki ? `<span class="${isBestWiki ? 'best' : ''}">${m.wiki}</span>` : (m.wiki === null ? 'TBD' : 'not reported')}</td>
<td class="${m.blimp ? '' : 'na'}" style="${getColor(m.blimp, blimpMin, blimpMax, false)}">${m.blimp ? `<span class="${isBestBlimp ? 'best' : ''}">${m.blimp}%</span>` : (m.blimp === null ? 'TBD' : 'not reported')}</td>
<td class="${m.arc ? '' : 'na'}" style="${getColor(m.arc, arcMin, arcMax, false)}">${m.arc ? `<span class="${isBestArc ? 'best' : ''}">${m.arc}%</span>` : (m.arc === null ? 'TBD' : 'not reported')}</td>
<td>${m.tokens}</td>
<td class="links">
<a href="${m.links.card}">card</a>
${m.links.base ? `<a href="${m.links.base}">base</a>` : ''}
</td>
</tr>
`;
}).join('');
}
function buildChart(canvasId, metric, label, reverse) {
const fmt = metric === 'wiki' ? v => v.toFixed(2) : v => v.toFixed(1) + '%';
const sorted = models
.filter(d => d[metric] !== null && typeof d[metric] === 'number')
.sort((a, b) => reverse ? a[metric] - b[metric] : b[metric] - a[metric]);
new Chart(document.getElementById(canvasId), {
type: 'bar',
data: {
labels: sorted.map(d => d.name),
datasets: [{
data: sorted.map(d => d[metric]),
backgroundColor: sorted.map(d => bgMap[d.org]),
borderColor: sorted.map(d => colorMap[d.org]),
borderWidth: 1,
borderRadius: 3,
}]
},
options: {
indexAxis: 'y',
responsive: true,
maintainAspectRatio: true,
plugins: {
legend: { display: false },
tooltip: {
callbacks: {
label: ctx => fmt(ctx.parsed.x)
}
}
},
scales: {
x: {
type: metric === 'wiki' ? 'logarithmic' : 'linear',
beginAtZero: metric !== 'wiki',
grid: { color: 'rgba(255,255,255,0.06)' },
ticks: { color: '#8b949e', callback: v => metric === 'wiki' ? v.toFixed(1) : v + '%' }
},
y: {
grid: { display: false },
ticks: {
color: '#c9d1d9',
font: { size: 11 },
autoSkip: false,
callback: function(val, index) {
const label = this.getLabelForValue(val);
return label.length > 18 ? label.slice(0, 17) + '…' : label;
}
}
}
}
}
});
}
window.addEventListener('DOMContentLoaded', () => {
renderTable();
buildChart('blimpChart', 'blimp', 'BLiMP Accuracy', false);
buildChart('arcChart', 'arc', 'ARC-Easy Accuracy', false);
buildChart('wikiChart', 'wiki', 'WikiText-2 Perplexity', true);
});
</script>
</body>
</html>