Spaces:

abhijitramesh
/

webgpu-bench

Running

App Files Files Community

webgpu-bench / js /tables.js

GitHub Actions

sync from abhijitramesh/webgpu-bench@ef7e64472d

ed5d4b6 16 days ago

history blame contribute delete

21.6 kB

	import { formatTokS, formatMs, categorizeError, groupBy, quantSortKey, avgBy } from './utils.js';
	import { expandCpuRows } from './data.js';

	let lastResults = [];
	let sortState = { key: null, dir: 'asc' };

	const NUM_KEYS = new Set([
	'sizeMB',
	'decode_tok_s', 'prefill_tok_s',
	'decode_tok_s_d0', 'decode_tok_s_dN',
	'prefill_tok_s_d0', 'prefill_tok_s_dN',
	'cpu_baseline_decode_tok_s', 'cpu_baseline_prefill_tok_s',
	'n_eval', 't_eval_ms',
	'n_p_eval', 't_p_eval_ms', 'wallTimeMs', 'consistency_rate',
	]);

	function sortResults(results, key, dir) {
	const isNum = NUM_KEYS.has(key);
	return [...results].sort((a, b) => {
	let va = a[key], vb = b[key];
	// Submitter is an object — collapse to its name for comparison and let
	// the null-handling below treat unattributed rows as the lowest.
	if (key === 'submittedBy') {
	va = va?.name \|\| null;
	vb = vb?.name \|\| null;
	}
	if (va == null && vb == null) return 0;
	if (va == null) return 1;
	if (vb == null) return -1;

	let cmp;
	if (isNum) {
	cmp = Number(va) - Number(vb);
	} else if (key === 'webgpuAvailable') {
	cmp = (va === vb) ? 0 : va ? -1 : 1;
	} else {
	cmp = String(va).localeCompare(String(vb));
	}
	return dir === 'desc' ? -cmp : cmp;
	});
	}

	function handleSort(key) {
	if (sortState.key === key) {
	sortState.dir = sortState.dir === 'asc' ? 'desc' : 'asc';
	} else {
	sortState.key = key;
	// Default to descending for performance metrics
	sortState.dir = NUM_KEYS.has(key) ? 'desc' : 'asc';
	}
	renderResultsTable(lastResults);
	}

	export function renderResultsTable(results) {
	lastResults = results;
	const container = document.getElementById('results-table');
	if (!container) return;

	if (results.length === 0) {
	container.innerHTML = `
	<div class="empty-state">
	<p>No results match the current filters.</p>
	<p class="empty-state-sub">Try resetting filters above, or <a href="run.html">run the benchmark</a> on your own machine to contribute data.</p>
	</div>`;
	return;
	}

	const sorted = sortState.key ? sortResults(results, sortState.key, sortState.dir) : results;

	// Resolve the depth-loaded column label from the data: when every visible
	// row shares one N (the typical leaderboard case), show that concrete
	// value (e.g., "@ d2048"). When rows mix depths (someone experimenting
	// with d=4096 vs d=2048), fall back to the abstract "@ dN" with a tooltip
	// listing the values present so the user knows the column is mixed.
	const depthNValues = [...new Set(results.map(r => r.n_depth_dN).filter(v => v != null))]
	.sort((a, b) => a - b);
	const dnLabel = depthNValues.length === 1 ? `d${depthNValues[0]}` : 'dN';
	const dnHeaderTitle = depthNValues.length > 1
	? `Mixed depths in view: ${depthNValues.map(v => `d${v}`).join(', ')}`
	: '';

	/* priority: 1 = always show; 2 = hide below 640px; 3 = hide below 900px */
	const cols = [
	{ key: 'machineSlug', label: 'Machine', priority: 1 },
	{ key: 'model', label: 'Model', priority: 1 },
	{ key: 'variant', label: 'Quant', priority: 1 },
	{ key: 'sizeMB', label: 'Size (MB)', priority: 3 },
	{ key: 'browser', label: 'Browser', priority: 2 },
	{ key: 'submittedBy', label: 'Submitter', priority: 2 },
	{ key: 'status', label: 'Status', priority: 1 },
	{ key: 'buildType', label: 'Build', priority: 3 },
	{ key: 'webgpuAvailable', label: 'WebGPU', priority: 3 },
	// tg / pp split into cold-cache (d=0) and depth-loaded (d=N) columns
	// so Run Study's depth-pair shows as side-by-side numbers instead of
	// overwriting one with the other. Pre-study and plain-Run records
	// populate only the side they actually measured; the other reads `—`.
	{ key: 'decode_tok_s_d0', label: 'tg @ d0', priority: 1 },
	{ key: 'decode_tok_s_dN', label: `tg @ ${dnLabel}`, priority: 1, headerTitle: dnHeaderTitle },
	{ key: 'prefill_tok_s_d0', label: 'pp @ d0', priority: 3 },
	{ key: 'prefill_tok_s_dN', label: `pp @ ${dnLabel}`, priority: 3, headerTitle: dnHeaderTitle },
	{ key: 'cpu_baseline_decode_tok_s', label: 'CPU tg tok/s', priority: 2 },
	{ key: 'cpu_baseline_prefill_tok_s', label: 'CPU pp tok/s', priority: 3 },
	{ key: 'n_eval', label: 'n_eval', priority: 3 },
	{ key: 't_eval_ms', label: 't_eval (ms)', priority: 3 },
	{ key: 'n_p_eval', label: 'n_p_eval', priority: 3 },
	{ key: 't_p_eval_ms', label: 't_p_eval (ms)', priority: 3 },
	{ key: 'wallTimeMs', label: 'Wall (s)', priority: 3 },
	{ key: 'consistency_rate', label: 'CPU Match', priority: 2 },
	{ key: 'llamaCppCommit', label: 'llama.cpp', priority: 3 },
	{ key: 'error', label: 'Error', priority: 2 },
	];

	let html = '<table class="results-table"><thead><tr>';
	cols.forEach((col, i) => {
	const isActive = sortState.key === col.key;
	const ariaSort = isActive ? (sortState.dir === 'asc' ? 'ascending' : 'descending') : 'none';
	const arrowChar = isActive ? (sortState.dir === 'asc' ? '\u2191' : '\u2193') : '\u2195';
	const pin = i === 0 ? ' col-pin col-pin-1' : (i === 1 ? ' col-pin col-pin-2' : '');
	const prio = col.priority >= 3 ? ' col-p3' : (col.priority === 2 ? ' col-p2' : '');
	const cls = `sortable${isActive ? ' sorted' : ''}${pin}${prio}`;
	const titleAttr = col.headerTitle ? ` title="${escapeHtml(col.headerTitle)}"` : '';
	html += `<th data-key="${col.key}" class="${cls}" aria-sort="${ariaSort}" scope="col" tabindex="0"${titleAttr}><span class="th-label">${col.label}</span><span class="th-sort-indicator" aria-hidden="true">${arrowChar}</span></th>`;
	});
	html += '</tr></thead><tbody>';

	for (const r of sorted) {
	const rowClass = r.status === 'done' ? 'row-pass' : 'row-fail';
	html += `<tr class="${rowClass}">`;
	cols.forEach((col, i) => {
	const pin = i === 0 ? 'col-pin col-pin-1' : (i === 1 ? 'col-pin col-pin-2' : '');
	const prio = col.priority >= 3 ? 'col-p3' : (col.priority === 2 ? 'col-p2' : '');
	const parts = [pin, prio].filter(Boolean);
	const cls = parts.length ? ` class="${parts.join(' ')}"` : '';
	html += `<td${cls}>`;
	switch (col.key) {
	case 'status':
	html += r.status === 'done'
	? '<span class="badge badge--pass">PASS</span>'
	: '<span class="badge badge--fail">FAIL</span>';
	break;
	case 'webgpuAvailable':
	html += r.webgpuAvailable
	? '<span class="badge badge--yes">Yes</span>'
	: '<span class="badge badge--no">No</span>';
	break;
	case 'decode_tok_s':
	case 'prefill_tok_s':
	case 'decode_tok_s_d0':
	case 'decode_tok_s_dN':
	case 'prefill_tok_s_d0':
	case 'prefill_tok_s_dN':
	case 'cpu_baseline_decode_tok_s':
	case 'cpu_baseline_prefill_tok_s': {
	// llama-bench style "avg \u00b1 stddev" with the pp{N} / tg{N} test
	// label as a tooltip when the new schema is present. Older records
	// without stddev fall back to the bare avg from formatTokS.
	// Depth-suffixed keys read from the matching `_d0` / `_dN`
	// stddev + test_name fields produced by mergeDepthPairs.
	let stddev = null;
	let testName = null;
	switch (col.key) {
	case 'decode_tok_s': stddev = r.decode_stddev_ts; testName = r.tg_test_name; break;
	case 'prefill_tok_s': stddev = r.prefill_stddev_ts; testName = r.pp_test_name; break;
	case 'decode_tok_s_d0': stddev = r.decode_stddev_ts_d0; testName = r.tg_test_name_d0; break;
	case 'decode_tok_s_dN': stddev = r.decode_stddev_ts_dN; testName = r.tg_test_name_dN; break;
	case 'prefill_tok_s_d0': stddev = r.prefill_stddev_ts_d0; testName = r.pp_test_name_d0; break;
	case 'prefill_tok_s_dN': stddev = r.prefill_stddev_ts_dN; testName = r.pp_test_name_dN; break;
	}
	const avg = r[col.key];
	let cell;
	if (avg != null && stddev != null) {
	cell = `${formatTokS(avg)} \u00b1 ${formatTokS(stddev)}`;
	} else {
	cell = formatTokS(avg);
	}
	const titleAttr = testName ? ` title="${escapeHtml(testName)}"` : '';
	html += `<span class="mono"${titleAttr}>${cell}</span>`;
	break;
	}
	case 't_eval_ms':
	case 't_p_eval_ms':
	html += `<span class="mono">${formatMs(r[col.key])}</span>`;
	break;
	case 'wallTimeMs':
	html += `<span class="mono">${r.wallTimeMs != null ? (r.wallTimeMs / 1000).toFixed(1) : '\u2014'}</span>`;
	break;
	case 'consistency_rate':
	if (r.consistency_rate != null) {
	const pct = (r.consistency_rate * 100).toFixed(1);
	const cls = r.consistency_rate >= 0.95 ? 'text-success' : r.consistency_rate >= 0.90 ? '' : 'text-error';
	const diverge = r.consistency_first_disagree >= 0 ? ` (diverge@${r.consistency_first_disagree})` : '';
	html += `<span class="mono ${cls}">${pct}%${diverge}</span>`;
	} else {
	html += '<span class="text-muted">\u2014</span>';
	}
	break;
	case 'submittedBy':
	html += renderSubmitterCell(r.submittedBy);
	break;
	case 'machineSlug': {
	const name = r.userMachineName && r.userMachineName !== r.machineSlug
	? r.userMachineName : null;
	if (name) {
	html += `${escapeHtml(name)}<div class="machine-cell-slug text-muted mono">${escapeHtml(r.machineSlug)}</div>`;
	} else {
	html += escapeHtml(r.machineSlug);
	}
	break;
	}
	case 'llamaCppCommit':
	if (r.llamaCppCommit) {
	// Prefer the human-readable git describe when present (e.g.
	// "b8708-12-gd12cc3d1c"); fall back to a short commit hash.
	const label = r.llamaCppDescribe \|\| r.llamaCppCommit.slice(0, 10);
	html += `<a class="mono" href="https://github.com/ggml-org/llama.cpp/commit/${r.llamaCppCommit}" target="_blank" rel="noopener">${escapeHtml(label)}</a>`;
	} else {
	html += '<span class="text-muted">\u2014</span>';
	}
	break;
	case 'error':
	if (r.error) {
	const cat = categorizeError(r.error);
	const short = r.error.length > 60 ? r.error.slice(0, 60) + '\u2026' : r.error;
	html += `<span class="error-cell" title="${escapeHtml(r.error)}"><span class="error-cat">${cat}</span>${escapeHtml(short)}</span>`;
	} else {
	html += '<span class="text-muted">\u2014</span>';
	}
	break;
	case 'sizeMB':
	case 'n_eval':
	case 'n_p_eval':
	html += `<span class="mono">${r[col.key] != null ? r[col.key] : '\u2014'}</span>`;
	break;
	default:
	html += escapeHtml(String(r[col.key] ?? '\u2014'));
	}
	html += '</td>';
	});
	html += '</tr>';
	}

	html += '</tbody></table>';
	container.innerHTML = html;

	// Wire sort click + keyboard handlers
	container.querySelectorAll('th[data-key]').forEach(th => {
	th.addEventListener('click', () => handleSort(th.dataset.key));
	th.addEventListener('keydown', (e) => {
	if (e.key === 'Enter' \|\| e.key === ' ') {
	e.preventDefault();
	handleSort(th.dataset.key);
	}
	});
	});
	}

	export function renderErrorTable(results) {
	const container = document.getElementById('error-table');
	if (!container) return;

	const errors = results.filter(r => r.status !== 'done' && r.error);
	if (errors.length === 0) {
	container.innerHTML = `
	<div class="empty-state">
	<p>No errors in the current filter.</p>
	<p class="empty-state-sub">Either every benchmark passed, or no results are in scope — try widening the filter.</p>
	</div>`;
	return;
	}

	const grouped = groupBy(errors, r => categorizeError(r.error));

	let html = '<div class="table-card"><table class="data-table"><thead><tr><th>Category</th><th>Count</th><th>Variants</th><th>Browsers</th></tr></thead><tbody>';
	for (const [cat, items] of Object.entries(grouped).sort((a, b) => b[1].length - a[1].length)) {
	const variants = [...new Set(items.map(i => i.variant))].join(', ');
	const browsers = [...new Set(items.map(i => i.browser))].join(', ');
	html += `<tr><td><span class="error-cat">${cat}</span></td><td><span class="mono">${items.length}</span></td><td>${variants}</td><td>${browsers}</td></tr>`;
	}
	html += '</tbody></table></div>';
	container.innerHTML = html;
	}

	export function renderMachineInfo(machines) {
	const container = document.getElementById('machine-info');
	if (!container) return;

	const addYourMachineCard = `
	<a class="machine-card machine-card-add" href="run.html">
	<div class="machine-card-header">
	<svg class="machine-card-icon" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><polyline points="4 17 10 11 4 5"/><line x1="12" y1="19" x2="20" y2="19"/></svg>
	<h3>Add your machine</h3>
	</div>
	<p class="machine-card-add-blurb">Run benchmarks directly in your browser. Results post to the leaderboard.</p>
	<code class="machine-card-add-cmd">npm run bench:quick</code>
	<span class="machine-card-add-cta">
	Open Run page
	<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><line x1="5" y1="12" x2="19" y2="12"/><polyline points="12 5 19 12 12 19"/></svg>
	</span>
	</a>`;

	if (machines.length === 0) {
	container.innerHTML = `<div class="machine-grid">${addYourMachineCard}</div>`;
	return;
	}

	let html = '<div class="machine-grid">';
	for (const m of machines) {
	const failCount = m.resultCount - m.passCount;
	const title = m.userMachineName \|\| m.cpus;
	const showHardwareRow = m.userMachineName && m.userMachineName !== m.cpus;
	html += `
	<div class="machine-card">
	<div class="machine-card-header">
	<svg class="machine-card-icon" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="2" y="2" width="20" height="8" rx="2" ry="2"/><rect x="2" y="14" width="20" height="8" rx="2" ry="2"/><line x1="6" y1="6" x2="6.01" y2="6"/><line x1="6" y1="18" x2="6.01" y2="18"/></svg>
	<h3>${escapeHtml(title)}</h3>
	</div>
	<div class="machine-card-specs">
	${showHardwareRow ? `<div class="spec-row"><span class="spec-label">Hardware</span><span class="spec-value">${escapeHtml(m.cpus)}</span></div>` : ''}
	<div class="spec-row"><span class="spec-label">Platform</span><span class="spec-value">${m.platform}</span></div>
	<div class="spec-row"><span class="spec-label">Arch</span><span class="spec-value">${m.arch}</span></div>
	<div class="spec-row"><span class="spec-label">RAM</span><span class="spec-value">${m.totalMemoryGB} GB</span></div>
	<div class="spec-row"><span class="spec-label">Results</span><span class="spec-value">${m.resultCount}</span></div>
	<div class="spec-row"><span class="spec-label">Passed</span><span class="spec-value text-success">${m.passCount}</span></div>
	<div class="spec-row"><span class="spec-label">Failed</span><span class="spec-value text-error">${failCount}</span></div>
	${m.llamaCppCommit ? `<div class="spec-row"><span class="spec-label">llama.cpp</span><span class="spec-value"><a href="https://github.com/ggml-org/llama.cpp/commit/${m.llamaCppCommit}" target="_blank" rel="noopener">${escapeHtml(m.llamaCppDescribe \|\| m.llamaCppCommit.slice(0, 10))}</a></span></div>` : ''}
	</div>
	</div>`;
	}
	html += addYourMachineCard;
	html += '</div>';
	container.innerHTML = html;
	}

	function escapeHtml(str) {
	const div = document.createElement('div');
	div.textContent = str;
	return div.innerHTML;
	}

	/* Render a single submitter's avatar + @username link for the Results
	table column. Falls back to an em-dash if attribution is unknown. */
	function renderSubmitterCell(sb) {
	if (!sb?.name) return '<span class="text-muted">\u2014</span>';
	const avatar = sb.avatarUrl
	? `<img class="submitter-avatar" src="${escapeHtml(sb.avatarUrl)}" alt="" width="18" height="18" loading="lazy">`
	: '<span class="submitter-avatar submitter-avatar--placeholder" aria-hidden="true"></span>';
	return `<a class="submitter-link" href="https://huggingface.co/${escapeHtml(sb.name)}" target="_blank" rel="noopener" title="View @${escapeHtml(sb.name)} on Hugging Face">${avatar}<span class="submitter-name">@${escapeHtml(sb.name)}</span></a>`;
	}

	export function renderCpuGpuTable(results) {
	const container = document.getElementById('cpu-gpu-table');
	if (!container) return;

	// CPU is pinned to d=0 by the runner, so the comparison must read GPU's
	// d=0 number for an apples-to-apples ratio. Plain-Run records that only
	// measured d=N have null `_d0` and silently drop out of the comparison
	// — that's the right call: without a cold-cache GPU sample the speedup
	// ratio would be measuring different workloads.
	const METRICS = [
	{ cpuField: 'decode_tok_s', gpuField: 'decode_tok_s_d0', label: 'Decode tok/s @ d0' },
	{ cpuField: 'prefill_tok_s', gpuField: 'prefill_tok_s_d0', label: 'Prefill tok/s @ d0' },
	];

	const passed = results.filter(r => r.status === 'done');
	// CPU side aggregates standalone CPU runs (nGpuLayers === 0) plus
	// synthetic rows derived from the cpu_baseline_* fields on browser-flow
	// GPU records. See expandCpuRows() in data.js.
	const cpuResults = expandCpuRows(passed);
	const gpuResults = passed.filter(r => r.nGpuLayers !== 0);

	if (cpuResults.length === 0 \|\| gpuResults.length === 0) {
	container.innerHTML = '<div class="empty-state"><p>Select "All Backends" to see CPU vs GPU comparison.</p></div>';
	return;
	}

	const gpuBrowsers = [...new Set(gpuResults.map(r => r.browser))].sort();

	const cpuByModelVariant = groupBy(cpuResults, r => `${r.model}::${r.variant}`);
	const gpuByModelVariant = groupBy(gpuResults, r => `${r.model}::${r.variant}`);

	const keys = [...new Set([...Object.keys(cpuByModelVariant), ...Object.keys(gpuByModelVariant)])]
	.filter(k => cpuByModelVariant[k] && gpuByModelVariant[k]);

	if (keys.length === 0) {
	container.innerHTML = '<div class="empty-state"><p>No matching model+variant pairs between CPU and GPU results.</p></div>';
	return;
	}

	keys.sort((a, b) => {
	const [aModel, aVar] = a.split('::');
	const [bModel, bVar] = b.split('::');
	if (aModel !== bModel) return aModel.localeCompare(bModel);
	return quantSortKey(aVar) - quantSortKey(bVar);
	});

	// Two-row grouped header: row1 = group labels (CPU, Chromium, …), row2 = metric sub-labels
	// CPU gets colspan = METRICS.length, each GPU browser gets colspan = METRICS.length * 2 (value + speedup per metric)
	const gpuColspan = METRICS.length * 2;
	// CPU side reads cpuField; GPU side reads gpuField (_d0 for apples-to-
	// apples). Both labels match the metric's display label.
	let html = '<div class="table-card"><div class="results-wrapper"><table class="results-table"><thead>';

	// Row 1: group headers
	html += '<tr>';
	html += '<th rowspan="2" class="th-group-border">Model</th><th rowspan="2" class="th-group-border">Quant</th>';
	html += `<th colspan="${METRICS.length}" class="th-group th-group-border">CPU</th>`;
	for (const b of gpuBrowsers) {
	html += `<th colspan="${gpuColspan}" class="th-group th-group-border">${escapeHtml(b.charAt(0).toUpperCase() + b.slice(1))}</th>`;
	}
	html += '</tr>';

	// Row 2: metric sub-headers
	html += '<tr>';
	for (const m of METRICS) {
	html += `<th class="th-sub">${m.label}</th>`;
	}
	for (const b of gpuBrowsers) {
	for (const m of METRICS) {
	html += `<th class="th-sub">${m.label}</th><th class="th-sub">Speedup</th>`;
	}
	}
	html += '</tr></thead><tbody>';

	for (const key of keys) {
	const [model, variant] = key.split('::');
	const cpuItems = cpuByModelVariant[key] \|\| [];
	const gpuByBrowser = groupBy(gpuByModelVariant[key] \|\| [], 'browser');

	html += '<tr>';
	html += `<td>${escapeHtml(model)}</td>`;
	html += `<td><span class="mono">${escapeHtml(variant)}</span></td>`;

	// CPU columns
	for (const m of METRICS) {
	const val = avgBy(cpuItems, m.cpuField);
	html += `<td><span class="mono">${formatTokS(val)}</span></td>`;
	}

	// GPU columns per browser
	for (const b of gpuBrowsers) {
	const gpuItems = gpuByBrowser[b] \|\| [];
	for (const m of METRICS) {
	const cpuVal = avgBy(cpuItems, m.cpuField);
	const gpuVal = avgBy(gpuItems, m.gpuField);
	const speedup = cpuVal && gpuVal ? gpuVal / cpuVal : null;
	const cls = speedup == null ? '' : speedup >= 3 ? 'text-success' : speedup >= 1.5 ? '' : speedup >= 1 ? 'text-muted' : 'text-error';
	html += `<td><span class="mono">${formatTokS(gpuVal)}</span></td>`;
	html += `<td><span class="mono ${cls}">${speedup != null ? speedup.toFixed(2) + '\u00d7' : '\u2014'}</span></td>`;
	}
	}

	html += '</tr>';
	}

	html += '</tbody></table></div></div>';
	container.innerHTML = html;
	}