Spaces:

CompactAI-O
/

Tiny-ML-Leaderboard

Running

App Files Files Community

Tiny-ML-Leaderboard / index.html

MihaiPopa-1

Add PotentSulfurLM, as a followup to CinnabarLM!

d11bc14 verified about 16 hours ago

raw

history blame

16 kB

	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<title>Tiny-ML Leaderboard</title>
	<script src="https://cdn.jsdelivr.net/npm/chart.js@4"></script>
	<style>
	:root {
	--bg: #0d1117;
	--card: #161b22;
	--border: #30363d;
	--text: #c9d1d9;
	--accent: #58a6ff;
	--green: #3fb950;
	--orange: #d29922;
	--compactai: #3fb950;
	--supralabs: #58a6ff;
	--axiomiclabs: #c2b6ff;
	--mihaipopa: #93c6aa;
	}
	* { margin: 0; padding: 0; box-sizing: border-box; }
	body { font-family: -apple-system,BlinkMacSystemFont,'Segoe UI',Helvetica,Arial,sans-serif; background: var(--bg); color: var(--text); padding: 2rem; }
	.container { max-width: 1200px; margin: 0 auto; }
	h1 { font-size: 1.8rem; margin-bottom: .25rem; }
	.subtitle { color: #8b949e; margin-bottom: 2rem; font-size: .95rem; }
	.note { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; margin-bottom: 2rem; font-size: .9rem; line-height: 1.5; }
	.note a { color: var(--accent); }
	.section-title { font-size: 1.2rem; margin: 2rem 0 1rem; padding-bottom: .5rem; border-bottom: 1px solid var(--border); }
	.chart-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 1.5rem; margin-bottom: 2rem; }
	.chart-card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1rem; }
	.chart-card.full { grid-column: 1 / -1; }
	.chart-card h3 { font-size: .95rem; margin-bottom: .75rem; color: var(--text); }
	.chart-card canvas { max-height: 360px; }
	.legend { display: flex; gap: 1.5rem; margin-bottom: .75rem; font-size: .8rem; }
	.legend-item { display: flex; align-items: center; gap: .4rem; }
	.legend-dot { width: 10px; height: 10px; border-radius: 50%; }
	table { width: 100%; border-collapse: collapse; font-size: .85rem; }
	th { background: var(--card); padding: .75rem .5rem; text-align: center; border: 1px solid var(--border); font-weight: 600; white-space: nowrap; }
	td { padding: .6rem .5rem; text-align: center; border: 1px solid var(--border); }
	td:first-child, th:first-child { text-align: left; padding-left: 1rem; }
	tr:hover td { background: rgba(88,166,255,.05); }
	.org-tag { display: inline-block; padding: .15rem .5rem; border-radius: 4px; font-size: .75rem; font-weight: 600; }
	.org-compactai { background: #3fb95033; color: var(--compactai); }
	.org-supralabs { background: #1f6feb33; color: var(--supralabs); }
	.org-axiomiclabs { background: #8957e533; color: var(--axiomiclabs); }
	.org-mihaipopa { background: #93c6aa33; color: var(--mihaipopa); }

	.row-compactai { border-left: 3px solid var(--compactai); }
	.row-supralabs { border-left: 3px solid var(--supralabs); }
	.row-axiomiclabs { border-left: 3px solid var(--axiomiclabs); }
	.row-mihaipopa { border-left: 3px solid var(--mihaipopa); }
	.best { color: var(--green); font-weight: 700; }
	.na { color: #484f58; font-style: italic; }
	.links a { color: var(--accent); text-decoration: none; }
	.links a:hover { text-decoration: underline; }
	footer { margin-top: 3rem; padding-top: 1.5rem; border-top: 1px solid var(--border); font-size: .8rem; color: #484f58; text-align: center; }
	footer a { color: var(--accent); }
	.methodology { margin-top: 2rem; }
	.methodology h3 { margin-bottom: .5rem; }
	.methodology p { font-size: .85rem; line-height: 1.6; color: #8b949e; }
	@media (max-width: 768px) {
	body { padding: 1rem; }
	.chart-grid { grid-template-columns: 1fr; }
	table { font-size: .75rem; }
	th, td { padding: .4rem .3rem; }
	}
	</style>
	</head>
	<body>
	<div class="container">

	<h1>Tiny-ML Leaderboard</h1>
	<p class="subtitle">Sub-100M parameter language models, same eval harness, transparent methodology.</p>

	<div class="note">
	<strong>Why this exists.</strong> The community deserves a single place to compare tiny LMs fairly.
	We include every model with verifiable benchmarks — ours, our competitors', yours.
	<a href="https://huggingface.co/spaces/CompactAI-O/Tiny-ML-Leaderboard/discussions" target="_blank">Submit a model via PR.</a><br>
	</div>

	<h2 class="section-title">Detailed Results</h2>

	<table>
	<thead>
	<tr>
	<th>Model</th>
	<th>Org</th>
	<th>Params</th>
	<th>WikiText-2 ↓</th>
	<th>BLiMP ↑</th>
	<th>ARC-Easy ↑</th>
	<th>Training Tokens</th>
	<th>Links</th>
	</tr>
	</thead>
	<tbody id="leaderboard-body">
	<!-- Dynamic content -->
	</tbody>
	</table>

	<h2 class="section-title">Benchmark Overview</h2>

	<div class="legend">
	<span class="legend-item"><span class="legend-dot" style="background:var(--compactai)"></span> CompactAI</span>
	<span class="legend-item"><span class="legend-dot" style="background:var(--supralabs)"></span> SupraLabs</span>
	<span class="legend-item"><span class="legend-dot" style="background:var(--axiomiclabs)"></span> Axiomic Labs</span>
	<span class="legend-item"><span class="legend-dot" style="background:var(--mihaipopa)"></span> Mihai Popa</span>
	</div>

	<div class="chart-grid">
	<div class="chart-card">
	<h3>BLiMP ↑ (higher is better)</h3>
	<canvas id="blimpChart"></canvas>
	</div>
	<div class="chart-card">
	<h3>ARC-Easy ↑ (higher is better)</h3>
	<canvas id="arcChart"></canvas>
	</div>
	<div class="chart-card full">
	<h3>WikiText-2 ↓ (lower is better)</h3>
	<canvas id="wikiChart"></canvas>
	</div>
	</div>



	<div class="methodology" style="margin-top:1.5rem">
	<h3>Add your model</h3>
	<p>
	Open a PR on this Space with your model's benchmark results and reproduction steps.
	We require: params, training data provenance, eval harness used, and scores for at least 2 of the 3 benchmarks.
	</p>
	</div>

	<footer>
	<p>Tiny-ML Leaderboard by <a href="https://huggingface.co/CompactAI-O">CompactAI</a>.
	Not affiliated with SupraLabs or LH-Tech-AI.
	All benchmark data is self-reported by model authors unless otherwise noted.</p>
	</footer>

	</div>
	<script>
	const models = [
	{
	name: 'Supra-50M-Instruct',
	org: 'supralabs',
	params: '51.8M',
	blimp: 76.3,
	arc: 52.2,
	wiki: null,
	tokens: '20B',
	links: { card: 'https://huggingface.co/SupraLabs/Supra-50M-Instruct', base: 'https://huggingface.co/SupraLabs/Supra-50M-Base' }
	},
	{
	name: 'GPT-S-5M',
	org: 'axiomiclabs',
	params: '5.16M',
	blimp: 72.27,
	arc: 35.69,
	wiki: 2.56,
	tokens: '25B',
	links: { card: 'https://huggingface.co/AxiomicLabs/GPT-S-5M' }
	},
	{
	name: 'Glint-1.3 (merged)',
	org: 'compactai',
	params: '982K',
	blimp: 68.7,
	arc: 32.5,
	wiki: 3.08,
	tokens: '100B',
	links: { card: 'https://huggingface.co/CompactAI-O/Glint-1.3' }
	},
	{
	name: 'Supra-Mini-v5',
	org: 'supralabs',
	params: '7.87M',
	blimp: 63.5,
	arc: 34.4,
	wiki: 2.66,
	tokens: '—',
	links: { card: 'https://huggingface.co/SupraLabs/Supra-Mini-v5-8M' }
	},
	{
	name: 'CinnabarLM 4M',
	org: 'mihaipopa',
	params: '4.23M',
	blimp: 62.87,
	arc: 27.36,
	wiki: 2.226,
	tokens: '~80M',
	links: { card: 'https://huggingface.co/MihaiPopa-1/CinnabarLM-4M-Base' }
	},
	{
	name: 'Glint-1',
	org: 'compactai',
	params: '1M',
	blimp: 61.2,
	arc: 32.0,
	wiki: 4.07,
	tokens: '100B',
	links: { card: 'https://huggingface.co/CompactAI-O/Glint-1' }
	},
	{
	name: 'Supra-Mini-v4',
	org: 'supralabs',
	params: '2.62M',
	blimp: 60.7,
	arc: 31.5,
	wiki: 3.17,
	tokens: '—',
	links: { card: 'https://huggingface.co/SupraLabs/Supra-Mini-v4-2M' }
	},
	{
	name: 'Glint-0.4',
	org: 'compactai',
	params: '1M',
	blimp: 58.5,
	arc: 31.0,
	wiki: 5.24,
	tokens: '10B',
	links: { card: 'https://huggingface.co/CompactAI-O/Glint-0.4' }
	},
	{
	name: 'Supra-Mini-v3',
	org: 'supralabs',
	params: '468K',
	blimp: 55.3,
	arc: 27.3,
	wiki: 4.49,
	tokens: '—',
	links: { card: 'https://huggingface.co/SupraLabs/Supra-Mini-v3-0.5M' }
	},
	{
	name: 'Supra-Mini-v2',
	org: 'supralabs',
	params: '168K',
	blimp: 53.5,
	arc: 26.8,
	wiki: 7.79,
	tokens: '—',
	links: { card: 'https://huggingface.co/SupraLabs/Supra-Mini-v2-0.1M' }
	},
	{
	name: 'Glint-0.2',
	org: 'compactai',
	params: '1M',
	blimp: 49.8,
	arc: 27.0,
	wiki: null,
	tokens: '~100M',
	links: { card: 'https://huggingface.co/CompactAI-O/Glint-0.2' }
	},
	{
	name: 'Glint-0.3',
	org: 'compactai',
	params: '1M',
	blimp: 47.3,
	arc: 25.5,
	wiki: null,
	tokens: '~100M',
	links: { card: 'https://huggingface.co/CompactAI-O/Glint-0.3' }
	},
	{
	name: 'Glint-0.1',
	org: 'compactai',
	params: '1M',
	blimp: 46.7,
	arc: 21.0,
	wiki: null,
	tokens: '~100M',
	links: { card: 'https://huggingface.co/CompactAI-O/Glint-0.1' }
	},
	{
	name: 'Shard-1',
	org: 'compactai',
	params: '54.5M',
	blimp: null,
	arc: null,
	wiki: null,
	tokens: '~20B',
	links: { card: 'https://huggingface.co/CompactAI-O/Shard-1' }
	},
	{
	name: 'CinnabarLM 1.5M',
	org: 'mihaipopa',
	params: '1.71M',
	blimp: 60.51,
	arc: 26.68,
	wiki: 2.35,
	tokens: '~50M',
	links: { card: 'https://huggingface.co/MihaiPopa-1/CinnabarLM-1.5M-Base' }
	},
	{
	name: 'CinnabarLM 1.4M',
	org: 'mihaipopa',
	params: '1.51M',
	blimp: 60.7,
	arc: 24.58,
	wiki: 2.316,
	tokens: '~30M',
	links: { card: 'https://huggingface.co/MihaiPopa-1/CinnabarLM-1.4M-Base' }
	},
	{
	name: 'PotentSulfurLM 500K',
	org: 'mihaipopa',
	params: '587K',
	blimp: 59.01,
	arc: 27.06,
	wiki: 2.613,
	tokens: '~200M',
	links: { card: 'https://huggingface.co/MihaiPopa-1/PotentSulfurLM-500K-Base' }
	},
	{
	name: 'StorySupra-10M',
	org: 'supralabs',
	params: '12.6M',
	blimp: null,
	arc: null,
	wiki: null,
	tokens: '—',
	links: { card: 'https://huggingface.co/SupraLabs/StorySupra-10M' }
	},
	{
	name: 'DistillSupra-0.2M',
	org: 'supralabs',
	params: '289K',
	blimp: null,
	arc: null,
	wiki: null,
	tokens: '—',
	links: { card: 'https://huggingface.co/SupraLabs/DistillSupra-0.2M' }
	},
	{
	name: 'MicroSupra-1k',
	org: 'supralabs',
	params: '1K',
	blimp: null,
	arc: null,
	wiki: null,
	tokens: '—',
	links: { card: 'https://huggingface.co/SupraLabs/MicroSupra-1k' }
	},
	{
	name: 'TrueMath',
	org: 'compactai',
	params: '1-layer',
	blimp: null,
	arc: null,
	wiki: null,
	tokens: 'synthetic',
	links: { card: 'https://huggingface.co/CompactAI-O/TrueMath' }
	}
	];

	const orgNameMap = {
	compactai: 'CompactAI',
	supralabs: 'SupraLabs',
	axiomiclabs: 'Axiomic Labs',
	mihaipopa: 'Mihai Popa'
	};

	const colorMap = {
	compactai: '#3fb950',
	supralabs: '#58a6ff',
	axiomiclabs: '#c2b6ff',
	mihaipopa: '#93c6aa'
	};

	const bgMap = {
	compactai: 'rgba(63,185,80,0.7)',
	supralabs: 'rgba(88,166,255,0.7)',
	axiomiclabs: 'rgba(194,182,255,0.7)',
	mihaipopa: 'rgba(99,232,173,0.7)'
	};

	function getScore(m) {
	const b = m.blimp \|\| 0;
	const a = m.arc \|\| 0;
	if (b === 0 && a === 0) return -1;
	if (b === 0) return a;
	if (a === 0) return b;
	return (b + a) / 2;
	}

	const worstGreen = [255, 255, 255];
	const bestGreen = [63, 185, 80];

	function getColor(value, min, max, lowerIsBetter, useLog = false) {
	if (value === null \|\| isNaN(value) \|\| min === max) return '';
	let v = value, mn = min, mx = max;
	if (useLog) { v = Math.log(v); mn = Math.log(mn); mx = Math.log(mx); }
	let percent = (v - mn) / (mx - mn);
	if (lowerIsBetter) percent = 1 - percent;
	const adjusted = Math.pow(Math.max(0, Math.min(1, percent)), 2.5);
	const r = Math.round(worstGreen[0] + (bestGreen[0] - worstGreen[0]) * adjusted);
	const g = Math.round(worstGreen[1] + (bestGreen[1] - worstGreen[1]) * adjusted);
	const b = Math.round(worstGreen[2] + (bestGreen[2] - worstGreen[2]) * adjusted);
	return `color: rgb(${r}, ${g}, ${b})`;
	}

	function renderTable() {
	const tbody = document.getElementById('leaderboard-body');
	const sortedModels = [...models].sort((a, b) => getScore(b) - getScore(a));

	const blimpVals = models.map(m => m.blimp).filter(v => v !== null && !isNaN(v));
	const arcVals = models.map(m => m.arc).filter(v => v !== null && !isNaN(v));
	const wikiVals = models.map(m => m.wiki).filter(v => v !== null && !isNaN(v));

	const blimpMin = Math.min(...blimpVals), blimpMax = Math.max(...blimpVals);
	const arcMin = Math.min(...arcVals), arcMax = Math.max(...arcVals);
	const wikiMin = Math.min(...wikiVals), wikiMax = Math.max(...wikiVals);

	tbody.innerHTML = sortedModels.map(m => {
	const isBestBlimp = m.blimp && m.blimp === blimpMax;
	const isBestArc = m.arc && m.arc === arcMax;
	const isBestWiki = m.wiki && m.wiki === wikiMin;

	return `
	<tr class="row-${m.org}">
	<td><strong>${m.name}</strong></td>
	<td><span class="org-tag org-${m.org}">${orgNameMap[m.org]}</span></td>
	<td>${m.params}</td>
	<td class="${m.wiki ? '' : 'na'}" style="${getColor(m.wiki, wikiMin, wikiMax, true, true)}">${m.wiki ? `<span class="${isBestWiki ? 'best' : ''}">${m.wiki}</span>` : (m.wiki === null ? 'TBD' : 'not reported')}</td>
	<td class="${m.blimp ? '' : 'na'}" style="${getColor(m.blimp, blimpMin, blimpMax, false)}">${m.blimp ? `<span class="${isBestBlimp ? 'best' : ''}">${m.blimp}%</span>` : (m.blimp === null ? 'TBD' : 'not reported')}</td>
	<td class="${m.arc ? '' : 'na'}" style="${getColor(m.arc, arcMin, arcMax, false)}">${m.arc ? `<span class="${isBestArc ? 'best' : ''}">${m.arc}%</span>` : (m.arc === null ? 'TBD' : 'not reported')}</td>
	<td>${m.tokens}</td>
	<td class="links">
	<a href="${m.links.card}">card</a>
	${m.links.base ? `<a href="${m.links.base}">base</a>` : ''}
	</td>
	</tr>
	`;
	}).join('');
	}

	function buildChart(canvasId, metric, label, reverse) {
	const fmt = metric === 'wiki' ? v => v.toFixed(2) : v => v.toFixed(1) + '%';
	const sorted = models
	.filter(d => d[metric] !== null && typeof d[metric] === 'number')
	.sort((a, b) => reverse ? a[metric] - b[metric] : b[metric] - a[metric]);

	new Chart(document.getElementById(canvasId), {
	type: 'bar',
	data: {
	labels: sorted.map(d => d.name),
	datasets: [{
	data: sorted.map(d => d[metric]),
	backgroundColor: sorted.map(d => bgMap[d.org]),
	borderColor: sorted.map(d => colorMap[d.org]),
	borderWidth: 1,
	borderRadius: 3,
	}]
	},
	options: {
	indexAxis: 'y',
	responsive: true,
	maintainAspectRatio: true,
	plugins: {
	legend: { display: false },
	tooltip: {
	callbacks: {
	label: ctx => fmt(ctx.parsed.x)
	}
	}
	},
	scales: {
	x: {
	type: metric === 'wiki' ? 'logarithmic' : 'linear',
	beginAtZero: metric !== 'wiki',
	grid: { color: 'rgba(255,255,255,0.06)' },
	ticks: { color: '#8b949e', callback: v => metric === 'wiki' ? v.toFixed(1) : v + '%' }
	},
	y: {
	grid: { display: false },
	ticks: {
	color: '#c9d1d9',
	font: { size: 11 },
	autoSkip: false,
	callback: function(val, index) {
	const label = this.getLabelForValue(val);
	return label.length > 18 ? label.slice(0, 17) + '…' : label;
	}
	}
	}
	}
	}
	});
	}

	window.addEventListener('DOMContentLoaded', () => {
	renderTable();
	buildChart('blimpChart', 'blimp', 'BLiMP Accuracy', false);
	buildChart('arcChart', 'arc', 'ARC-Easy Accuracy', false);
	buildChart('wikiChart', 'wiki', 'WikiText-2 Perplexity', true);
	});
	</script>
	</body>
	</html>