Tiny-ML-Leaderboard / index.html
LH-Tech-AI's picture
Update index.html
0a1878e verified
raw
history blame
16.4 kB
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Tiny-ML Leaderboard</title>
<script src="https://cdn.jsdelivr.net/npm/chart.js@4"></script>
<style>
:root {
--bg: #0d1117;
--card: #161b22;
--border: #30363d;
--text: #c9d1d9;
--accent: #58a6ff;
--green: #3fb950;
--orange: #d29922;
--compactai: #3fb950;
--supralabs: #58a6ff;
}
* { margin: 0; padding: 0; box-sizing: border-box; }
body { font-family: -apple-system,BlinkMacSystemFont,'Segoe UI',Helvetica,Arial,sans-serif; background: var(--bg); color: var(--text); padding: 2rem; }
.container { max-width: 1200px; margin: 0 auto; }
h1 { font-size: 1.8rem; margin-bottom: .25rem; }
.subtitle { color: #8b949e; margin-bottom: 2rem; font-size: .95rem; }
.note { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; margin-bottom: 2rem; font-size: .9rem; line-height: 1.5; }
.note a { color: var(--accent); }
.section-title { font-size: 1.2rem; margin: 2rem 0 1rem; padding-bottom: .5rem; border-bottom: 1px solid var(--border); }
.chart-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 1.5rem; margin-bottom: 2rem; }
.chart-card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
.chart-card.full { grid-column: 1 / -1; }
.chart-card h3 { font-size: .95rem; margin-bottom: .75rem; color: var(--text); }
.chart-card canvas { max-height: 360px; }
.legend { display: flex; gap: 1.5rem; margin-bottom: .75rem; font-size: .8rem; }
.legend-item { display: flex; align-items: center; gap: .4rem; }
.legend-dot { width: 10px; height: 10px; border-radius: 50%; }
table { width: 100%; border-collapse: collapse; font-size: .85rem; }
th { background: var(--card); padding: .75rem .5rem; text-align: center; border: 1px solid var(--border); font-weight: 600; white-space: nowrap; }
td { padding: .6rem .5rem; text-align: center; border: 1px solid var(--border); }
td:first-child, th:first-child { text-align: left; padding-left: 1rem; }
tr:hover td { background: rgba(88,166,255,.05); }
.org-tag { display: inline-block; padding: .15rem .5rem; border-radius: 4px; font-size: .75rem; font-weight: 600; }
.org-compactai { background: #3fb95033; color: var(--compactai); }
.org-supralabs { background: #1f6feb33; color: var(--supralabs); }
.row-compactai { border-left: 3px solid var(--compactai); }
.row-supralabs { border-left: 3px solid var(--supralabs); }
.best { color: var(--green); font-weight: 700; }
.na { color: #484f58; font-style: italic; }
.links a { color: var(--accent); text-decoration: none; }
.links a:hover { text-decoration: underline; }
footer { margin-top: 3rem; padding-top: 1.5rem; border-top: 1px solid var(--border); font-size: .8rem; color: #484f58; text-align: center; }
footer a { color: var(--accent); }
.methodology { margin-top: 2rem; }
.methodology h3 { margin-bottom: .5rem; }
.methodology p { font-size: .85rem; line-height: 1.6; color: #8b949e; }
@media (max-width: 768px) {
body { padding: 1rem; }
.chart-grid { grid-template-columns: 1fr; }
table { font-size: .75rem; }
th, td { padding: .4rem .3rem; }
}
</style>
</head>
<body>
<div class="container">
<h1>Tiny-ML Leaderboard</h1>
<p class="subtitle">Sub-100M parameter language models, same eval harness, transparent methodology.</p>
<div class="note">
<strong>Why this exists.</strong> The community deserves a single place to compare tiny LMs fairly.
We include every model with verifiable benchmarks β€” ours, our competitors', yours.
<a href="https://huggingface.co/spaces/CompactAI-O/Tiny-ML-Leaderboard/discussions" target="_blank">Submit a model via PR.</a><br>
</div>
<h2 class="section-title">Detailed Results</h2>
<table>
<thead>
<tr>
<th>Model</th>
<th>Org</th>
<th>Params</th>
<th>WikiText-2 ↓</th>
<th>BLiMP ↑</th>
<th>ARC-Easy ↑</th>
<th>Training Tokens</th>
<th>Links</th>
</tr>
</thead>
<tbody>
<tr class="row-supralabs">
<td><strong>Supra-50M-Instruct</strong></td>
<td><span class="org-tag org-supralabs">SupraLabs</span></td>
<td>51.8M</td>
<td class="na">not reported</td>
<td><span class="best">76.3%</span></td>
<td><span class="best">52.2%</span></td>
<td>20B</td>
<td class="links"><a href="https://huggingface.co/SupraLabs/Supra-50M-Instruct">card</a> <a href="https://huggingface.co/SupraLabs/Supra-50M-Base">base</a></td>
</tr>
<tr class="row-compactai">
<td><strong>Glint-1.3 (merged)</strong></td>
<td><span class="org-tag org-compactai">CompactAI</span></td>
<td>982K</td>
<td>3.08</td>
<td>68.7%</td>
<td>32.5%</td>
<td>100B</td>
<td class="links"><a href="https://huggingface.co/CompactAI-O/Glint-1.3">card</a></td>
</tr>
<tr class="row-supralabs">
<td><strong>Supra-Mini-v5</strong></td>
<td><span class="org-tag org-supralabs">SupraLabs</span></td>
<td>7.87M</td>
<td><span class="best">2.66</span></td>
<td>63.5%</td>
<td>34.4%</td>
<td class="na">β€”</td>
<td class="links"><a href="https://huggingface.co/SupraLabs/Supra-Mini-v5-8M">card</a></td>
</tr>
<tr class="row-compactai">
<td><strong>Glint-1</strong></td>
<td><span class="org-tag org-compactai">CompactAI</span></td>
<td>1M</td>
<td>4.07</td>
<td>61.2%</td>
<td>32.0%</td>
<td>100B</td>
<td class="links"><a href="https://huggingface.co/CompactAI-O/Glint-1">card</a></td>
</tr>
<tr class="row-supralabs">
<td><strong>Supra-Mini-v4</strong></td>
<td><span class="org-tag org-supralabs">SupraLabs</span></td>
<td>2.62M</td>
<td>3.17</td>
<td>60.7%</td>
<td>31.5%</td>
<td class="na">β€”</td>
<td class="links"><a href="https://huggingface.co/SupraLabs/Supra-Mini-v4-2M">card</a></td>
</tr>
<tr class="row-compactai">
<td><strong>Glint-0.4</strong></td>
<td><span class="org-tag org-compactai">CompactAI</span></td>
<td>1M</td>
<td>5.24</td>
<td>58.5%</td>
<td>31.0%</td>
<td>10B</td>
<td class="links"><a href="https://huggingface.co/CompactAI-O/Glint-0.4">card</a></td>
</tr>
<tr class="row-supralabs">
<td><strong>Supra-Mini-v3</strong></td>
<td><span class="org-tag org-supralabs">SupraLabs</span></td>
<td>468K</td>
<td>4.49</td>
<td>55.3%</td>
<td>27.3%</td>
<td class="na">β€”</td>
<td class="links"><a href="https://huggingface.co/SupraLabs/Supra-Mini-v3-0.5M">card</a></td>
</tr>
<tr class="row-supralabs">
<td><strong>Supra-Mini-v2</strong></td>
<td><span class="org-tag org-supralabs">SupraLabs</span></td>
<td>168K</td>
<td>7.79</td>
<td>53.5%</td>
<td>26.8%</td>
<td class="na">β€”</td>
<td class="links"><a href="https://huggingface.co/SupraLabs/Supra-Mini-v2-0.1M">card</a></td>
</tr>
<tr class="row-compactai">
<td><strong>Glint-0.2</strong></td>
<td><span class="org-tag org-compactai">CompactAI</span></td>
<td>1M</td>
<td class="na">TBD</td>
<td>49.8%</td>
<td>27.0%</td>
<td>~100M</td>
<td class="links"><a href="https://huggingface.co/CompactAI-O/Glint-0.2">card</a></td>
</tr>
<tr class="row-compactai">
<td><strong>Glint-0.3</strong></td>
<td><span class="org-tag org-compactai">CompactAI</span></td>
<td>1M</td>
<td class="na">TBD</td>
<td>47.3%</td>
<td>25.5%</td>
<td>~100M</td>
<td class="links"><a href="https://huggingface.co/CompactAI-O/Glint-0.3">card</a></td>
</tr>
<tr class="row-compactai">
<td><strong>Glint-0.1</strong></td>
<td><span class="org-tag org-compactai">CompactAI</span></td>
<td>1M</td>
<td class="na">TBD</td>
<td>46.7%</td>
<td>21.0%</td>
<td>~100M</td>
<td class="links"><a href="https://huggingface.co/CompactAI-O/Glint-0.1">card</a></td>
</tr>
<tr class="row-compactai">
<td><strong>Shard-1</strong></td>
<td><span class="org-tag org-compactai">CompactAI</span></td>
<td>54.5M</td>
<td class="na">TBD</td>
<td class="na">TBD</td>
<td class="na">TBD</td>
<td>~20B</td>
<td class="links"><a href="https://huggingface.co/CompactAI-O/Shard-1">card</a></td>
</tr>
<tr class="row-supralabs">
<td><strong>StorySupra-10M</strong></td>
<td><span class="org-tag org-supralabs">SupraLabs</span></td>
<td>12.6M</td>
<td class="na">not reported</td>
<td class="na">not reported</td>
<td class="na">not reported</td>
<td class="na">β€”</td>
<td class="links"><a href="https://huggingface.co/SupraLabs/StorySupra-10M">card</a></td>
</tr>
<tr class="row-supralabs">
<td><strong>DistillSupra-0.2M</strong></td>
<td><span class="org-tag org-supralabs">SupraLabs</span></td>
<td>289K</td>
<td class="na">not reported</td>
<td class="na">not reported</td>
<td class="na">not reported</td>
<td class="na">β€”</td>
<td class="links"><a href="https://huggingface.co/SupraLabs/DistillSupra-0.2M">card</a></td>
</tr>
<tr class="row-supralabs">
<td><strong>MicroSupra-1k</strong></td>
<td><span class="org-tag org-supralabs">SupraLabs</span></td>
<td>1K</td>
<td class="na">not reported</td>
<td class="na">not reported</td>
<td class="na">not reported</td>
<td class="na">β€”</td>
<td class="links"><a href="https://huggingface.co/SupraLabs/MicroSupra-1k">card</a></td>
</tr>
<tr class="row-compactai">
<td><strong>TrueMath</strong></td>
<td><span class="org-tag org-compactai">CompactAI</span></td>
<td>1-layer</td>
<td class="na">β€”</td>
<td class="na">β€”</td>
<td class="na">β€”</td>
<td>synthetic</td>
<td class="links"><a href="https://huggingface.co/CompactAI-O/TrueMath">card</a></td>
</tr>
</tbody>
</table>
<h2 class="section-title">Benchmark Overview</h2>
<div class="legend">
<span class="legend-item"><span class="legend-dot" style="background:var(--compactai)"></span> CompactAI</span>
<span class="legend-item"><span class="legend-dot" style="background:var(--supralabs)"></span> SupraLabs</span>
</div>
<div class="chart-grid">
<div class="chart-card">
<h3>BLiMP ↑ (higher is better)</h3>
<canvas id="blimpChart"></canvas>
</div>
<div class="chart-card">
<h3>ARC-Easy ↑ (higher is better)</h3>
<canvas id="arcChart"></canvas>
</div>
<div class="chart-card full">
<h3>WikiText-2 ↓ (lower is better)</h3>
<canvas id="wikiChart"></canvas>
</div>
</div>
<div class="methodology" style="margin-top:1.5rem">
<h3>Add your model</h3>
<p>
Open a PR on this Space with your model's benchmark results and reproduction steps.
We require: params, training data provenance, eval harness used, and scores for at least 2 of the 3 benchmarks.
</p>
</div>
<footer>
<p>Tiny-ML Leaderboard by <a href="https://huggingface.co/CompactAI-O">CompactAI</a>.
Not affiliated with SupraLabs or LH-Tech-AI.
All benchmark data is self-reported by model authors unless otherwise noted.</p>
</footer>
</div>
<script>
const models = [
{ name: 'Supra-50M-Instruct', org: 'supralabs', blimp: 76.3, arc: 52.2, wiki: null },
{ name: 'Glint-1.3 (merged)', org: 'compactai', blimp: 68.7, arc: 32.5, wiki: 3.08 },
{ name: 'Supra-Mini-v5', org: 'supralabs', blimp: 63.5, arc: 34.4, wiki: 2.66 },
{ name: 'Glint-1', org: 'compactai', blimp: 61.2, arc: 32.0, wiki: 4.07 },
{ name: 'Supra-Mini-v4', org: 'supralabs', blimp: 60.7, arc: 31.5, wiki: 3.17 },
{ name: 'Glint-0.4', org: 'compactai', blimp: 58.5, arc: 31.0, wiki: 5.24 },
{ name: 'Supra-Mini-v3', org: 'supralabs', blimp: 55.3, arc: 27.3, wiki: 4.49 },
{ name: 'Supra-Mini-v2', org: 'supralabs', blimp: 53.5, arc: 26.8, wiki: 7.79 },
{ name: 'Glint-0.2', org: 'compactai', blimp: 49.8, arc: 27.0, wiki: null },
{ name: 'Glint-0.3', org: 'compactai', blimp: 47.3, arc: 25.5, wiki: null },
{ name: 'Glint-0.1', org: 'compactai', blimp: 46.7, arc: 21.0, wiki: null },
];
const colorMap = { compactai: '#3fb950', supralabs: '#58a6ff' };
const bgMap = { compactai: 'rgba(63,185,80,0.7)', supralabs: 'rgba(88,166,255,0.7)' };
function buildChart(canvasId, metric, label, reverse) {
const m = metric === 'wiki' ? 'wiki' : metric;
const fmt = metric === 'wiki' ? v => v.toFixed(2) : v => v.toFixed(1) + '%';
const sorted = models
.filter(d => d[metric] !== null)
.sort((a, b) => reverse ? a[metric] - b[metric] : b[metric] - a[metric]);
new Chart(document.getElementById(canvasId), {
type: 'bar',
data: {
labels: sorted.map(d => d.name),
datasets: [{
data: sorted.map(d => d[metric]),
backgroundColor: sorted.map(d => bgMap[d.org]),
borderColor: sorted.map(d => colorMap[d.org]),
borderWidth: 1,
borderRadius: 3,
}]
},
options: {
indexAxis: 'y',
responsive: true,
maintainAspectRatio: true,
plugins: {
legend: { display: false },
tooltip: {
callbacks: {
label: ctx => fmt(ctx.parsed.x)
}
}
},
scales: {
x: {
beginAtZero: true,
grid: { color: 'rgba(255,255,255,0.06)' },
ticks: { color: '#8b949e', callback: v => metric === 'wiki' ? v.toFixed(1) : v + '%' }
},
y: {
grid: { display: false },
ticks: {
color: '#c9d1d9',
font: { size: 11 },
autoSkip: false,
callback: label => label.length > 18 ? label.slice(0, 17) + '…' : label
}
}
}
}
});
}
// Apply gradient text color to table metric values based on performance
function applyTableGradient() {
// Define metric ranges (min and max values)
const blimpMin = 46.7, blimpMax = 76.3;
const arcMin = 21.0, arcMax = 52.2;
const wikiMin = 2.66, wikiMax = 7.79;
// Text colors from white (worst) to bright green (best)
const worstGreen = [255, 255, 255]; // #ffffff β€” white, fully readable
const bestGreen = [63, 185, 80]; // #3fb950 β€” bright vibrant green
function getColor(value, min, max, lowerIsBetter) {
if (value === null || isNaN(value)) return '';
let percent = (value - min) / (max - min);
// For metrics where lower is better, invert so best=brightest green
if (lowerIsBetter) percent = 1 - percent;
const clamped = Math.max(0, Math.min(1, percent));
// Non-linear power curve: only top performers get vivid green,
// mid/low values stay muted so smarter versions visually pop
const adjusted = Math.pow(clamped, 2.5);
const r = Math.round(worstGreen[0] + (bestGreen[0] - worstGreen[0]) * adjusted);
const g = Math.round(worstGreen[1] + (bestGreen[1] - worstGreen[1]) * adjusted);
const b = Math.round(worstGreen[2] + (bestGreen[2] - worstGreen[2]) * adjusted);
return `rgb(${r}, ${g}, ${b})`;
}
// Select all table cells with metric values
const blimpCells = document.querySelectorAll('td:nth-child(5)'); // BLiMP column
const arcCells = document.querySelectorAll('td:nth-child(6)'); // ARC-Easy column
const wikiCells = document.querySelectorAll('td:nth-child(4)'); // WikiText-2 column
blimpCells.forEach(cell => {
const text = cell.textContent.replace('%', '').trim();
if (text && text !== 'not reported' && text !== 'TBD' && text !== 'β€”') {
const value = parseFloat(text);
if (!isNaN(value)) {
cell.style.color = getColor(value, blimpMin, blimpMax, false);
}
}
});
arcCells.forEach(cell => {
const text = cell.textContent.replace('%', '').trim();
if (text && text !== 'not reported' && text !== 'TBD' && text !== 'β€”') {
const value = parseFloat(text);
if (!isNaN(value)) {
cell.style.color = getColor(value, arcMin, arcMax, false);
}
}
});
wikiCells.forEach(cell => {
const text = cell.textContent.trim();
if (text && text !== 'not reported' && text !== 'TBD' && text !== 'β€”') {
const value = parseFloat(text);
if (!isNaN(value)) {
cell.style.color = getColor(value, wikiMin, wikiMax, true);
}
}
});
}
// Apply gradient after page loads
window.addEventListener('DOMContentLoaded', applyTableGradient);
buildChart('blimpChart', 'blimp', 'BLiMP Accuracy', false);
buildChart('arcChart', 'arc', 'ARC-Easy Accuracy', false);
buildChart('wikiChart', 'wiki', 'WikiText-2 Perplexity', true);
</script>
</body>
</html>