import { formatTokS, formatMs, categorizeError, groupBy, quantSortKey, avgBy } from './utils.js'; import { expandCpuRows } from './data.js'; let lastResults = []; let sortState = { key: null, dir: 'asc' }; const NUM_KEYS = new Set([ 'sizeMB', 'decode_tok_s', 'prefill_tok_s', 'decode_tok_s_d0', 'decode_tok_s_dN', 'prefill_tok_s_d0', 'prefill_tok_s_dN', 'cpu_baseline_decode_tok_s', 'cpu_baseline_prefill_tok_s', 'n_eval', 't_eval_ms', 'n_p_eval', 't_p_eval_ms', 'wallTimeMs', 'consistency_rate', ]); function sortResults(results, key, dir) { const isNum = NUM_KEYS.has(key); return [...results].sort((a, b) => { let va = a[key], vb = b[key]; // Submitter is an object — collapse to its name for comparison and let // the null-handling below treat unattributed rows as the lowest. if (key === 'submittedBy') { va = va?.name || null; vb = vb?.name || null; } if (va == null && vb == null) return 0; if (va == null) return 1; if (vb == null) return -1; let cmp; if (isNum) { cmp = Number(va) - Number(vb); } else if (key === 'webgpuAvailable') { cmp = (va === vb) ? 0 : va ? -1 : 1; } else { cmp = String(va).localeCompare(String(vb)); } return dir === 'desc' ? -cmp : cmp; }); } function handleSort(key) { if (sortState.key === key) { sortState.dir = sortState.dir === 'asc' ? 'desc' : 'asc'; } else { sortState.key = key; // Default to descending for performance metrics sortState.dir = NUM_KEYS.has(key) ? 'desc' : 'asc'; } renderResultsTable(lastResults); } export function renderResultsTable(results) { lastResults = results; const container = document.getElementById('results-table'); if (!container) return; if (results.length === 0) { container.innerHTML = `

No results match the current filters.

Try resetting filters above, or run the benchmark on your own machine to contribute data.

`; return; } const sorted = sortState.key ? sortResults(results, sortState.key, sortState.dir) : results; // Resolve the depth-loaded column label from the data: when every visible // row shares one N (the typical leaderboard case), show that concrete // value (e.g., "@ d2048"). When rows mix depths (someone experimenting // with d=4096 vs d=2048), fall back to the abstract "@ dN" with a tooltip // listing the values present so the user knows the column is mixed. const depthNValues = [...new Set(results.map(r => r.n_depth_dN).filter(v => v != null))] .sort((a, b) => a - b); const dnLabel = depthNValues.length === 1 ? `d${depthNValues[0]}` : 'dN'; const dnHeaderTitle = depthNValues.length > 1 ? `Mixed depths in view: ${depthNValues.map(v => `d${v}`).join(', ')}` : ''; /* priority: 1 = always show; 2 = hide below 640px; 3 = hide below 900px */ const cols = [ { key: 'machineSlug', label: 'Machine', priority: 1 }, { key: 'model', label: 'Model', priority: 1 }, { key: 'variant', label: 'Quant', priority: 1 }, { key: 'sizeMB', label: 'Size (MB)', priority: 3 }, { key: 'browser', label: 'Browser', priority: 2 }, { key: 'submittedBy', label: 'Submitter', priority: 2 }, { key: 'status', label: 'Status', priority: 1 }, { key: 'buildType', label: 'Build', priority: 3 }, { key: 'webgpuAvailable', label: 'WebGPU', priority: 3 }, // tg / pp split into cold-cache (d=0) and depth-loaded (d=N) columns // so Run Study's depth-pair shows as side-by-side numbers instead of // overwriting one with the other. Pre-study and plain-Run records // populate only the side they actually measured; the other reads `—`. { key: 'decode_tok_s_d0', label: 'tg @ d0', priority: 1 }, { key: 'decode_tok_s_dN', label: `tg @ ${dnLabel}`, priority: 1, headerTitle: dnHeaderTitle }, { key: 'prefill_tok_s_d0', label: 'pp @ d0', priority: 3 }, { key: 'prefill_tok_s_dN', label: `pp @ ${dnLabel}`, priority: 3, headerTitle: dnHeaderTitle }, { key: 'cpu_baseline_decode_tok_s', label: 'CPU tg tok/s', priority: 2 }, { key: 'cpu_baseline_prefill_tok_s', label: 'CPU pp tok/s', priority: 3 }, { key: 'n_eval', label: 'n_eval', priority: 3 }, { key: 't_eval_ms', label: 't_eval (ms)', priority: 3 }, { key: 'n_p_eval', label: 'n_p_eval', priority: 3 }, { key: 't_p_eval_ms', label: 't_p_eval (ms)', priority: 3 }, { key: 'wallTimeMs', label: 'Wall (s)', priority: 3 }, { key: 'consistency_rate', label: 'CPU Match', priority: 2 }, { key: 'llamaCppCommit', label: 'llama.cpp', priority: 3 }, { key: 'error', label: 'Error', priority: 2 }, ]; let html = ''; cols.forEach((col, i) => { const isActive = sortState.key === col.key; const ariaSort = isActive ? (sortState.dir === 'asc' ? 'ascending' : 'descending') : 'none'; const arrowChar = isActive ? (sortState.dir === 'asc' ? '\u2191' : '\u2193') : '\u2195'; const pin = i === 0 ? ' col-pin col-pin-1' : (i === 1 ? ' col-pin col-pin-2' : ''); const prio = col.priority >= 3 ? ' col-p3' : (col.priority === 2 ? ' col-p2' : ''); const cls = `sortable${isActive ? ' sorted' : ''}${pin}${prio}`; const titleAttr = col.headerTitle ? ` title="${escapeHtml(col.headerTitle)}"` : ''; html += ``; }); html += ''; for (const r of sorted) { const rowClass = r.status === 'done' ? 'row-pass' : 'row-fail'; html += ``; cols.forEach((col, i) => { const pin = i === 0 ? 'col-pin col-pin-1' : (i === 1 ? 'col-pin col-pin-2' : ''); const prio = col.priority >= 3 ? 'col-p3' : (col.priority === 2 ? 'col-p2' : ''); const parts = [pin, prio].filter(Boolean); const cls = parts.length ? ` class="${parts.join(' ')}"` : ''; html += ``; switch (col.key) { case 'status': html += r.status === 'done' ? 'PASS' : 'FAIL'; break; case 'webgpuAvailable': html += r.webgpuAvailable ? 'Yes' : 'No'; break; case 'decode_tok_s': case 'prefill_tok_s': case 'decode_tok_s_d0': case 'decode_tok_s_dN': case 'prefill_tok_s_d0': case 'prefill_tok_s_dN': case 'cpu_baseline_decode_tok_s': case 'cpu_baseline_prefill_tok_s': { // llama-bench style "avg \u00b1 stddev" with the pp{N} / tg{N} test // label as a tooltip when the new schema is present. Older records // without stddev fall back to the bare avg from formatTokS. // Depth-suffixed keys read from the matching `_d0` / `_dN` // stddev + test_name fields produced by mergeDepthPairs. let stddev = null; let testName = null; switch (col.key) { case 'decode_tok_s': stddev = r.decode_stddev_ts; testName = r.tg_test_name; break; case 'prefill_tok_s': stddev = r.prefill_stddev_ts; testName = r.pp_test_name; break; case 'decode_tok_s_d0': stddev = r.decode_stddev_ts_d0; testName = r.tg_test_name_d0; break; case 'decode_tok_s_dN': stddev = r.decode_stddev_ts_dN; testName = r.tg_test_name_dN; break; case 'prefill_tok_s_d0': stddev = r.prefill_stddev_ts_d0; testName = r.pp_test_name_d0; break; case 'prefill_tok_s_dN': stddev = r.prefill_stddev_ts_dN; testName = r.pp_test_name_dN; break; } const avg = r[col.key]; let cell; if (avg != null && stddev != null) { cell = `${formatTokS(avg)} \u00b1 ${formatTokS(stddev)}`; } else { cell = formatTokS(avg); } const titleAttr = testName ? ` title="${escapeHtml(testName)}"` : ''; html += `${cell}`; break; } case 't_eval_ms': case 't_p_eval_ms': html += `${formatMs(r[col.key])}`; break; case 'wallTimeMs': html += `${r.wallTimeMs != null ? (r.wallTimeMs / 1000).toFixed(1) : '\u2014'}`; break; case 'consistency_rate': if (r.consistency_rate != null) { const pct = (r.consistency_rate * 100).toFixed(1); const cls = r.consistency_rate >= 0.95 ? 'text-success' : r.consistency_rate >= 0.90 ? '' : 'text-error'; const diverge = r.consistency_first_disagree >= 0 ? ` (diverge@${r.consistency_first_disagree})` : ''; html += `${pct}%${diverge}`; } else { html += '\u2014'; } break; case 'submittedBy': html += renderSubmitterCell(r.submittedBy); break; case 'machineSlug': { const name = r.userMachineName && r.userMachineName !== r.machineSlug ? r.userMachineName : null; if (name) { html += `${escapeHtml(name)}

${escapeHtml(r.machineSlug)}

`; } else { html += escapeHtml(r.machineSlug); } break; } case 'llamaCppCommit': if (r.llamaCppCommit) { // Prefer the human-readable git describe when present (e.g. // "b8708-12-gd12cc3d1c"); fall back to a short commit hash. const label = r.llamaCppDescribe || r.llamaCppCommit.slice(0, 10); html += `${escapeHtml(label)}`; } else { html += '\u2014'; } break; case 'error': if (r.error) { const cat = categorizeError(r.error); const short = r.error.length > 60 ? r.error.slice(0, 60) + '\u2026' : r.error; html += `${cat}${escapeHtml(short)}`; } else { html += '\u2014'; } break; case 'sizeMB': case 'n_eval': case 'n_p_eval': html += `${r[col.key] != null ? r[col.key] : '\u2014'}`; break; default: html += escapeHtml(String(r[col.key] ?? '\u2014')); } html += ''; }); html += ''; } html += '

${col.label}

'; container.innerHTML = html; // Wire sort click + keyboard handlers container.querySelectorAll('th[data-key]').forEach(th => { th.addEventListener('click', () => handleSort(th.dataset.key)); th.addEventListener('keydown', (e) => { if (e.key === 'Enter' || e.key === ' ') { e.preventDefault(); handleSort(th.dataset.key); } }); }); } export function renderErrorTable(results) { const container = document.getElementById('error-table'); if (!container) return; const errors = results.filter(r => r.status !== 'done' && r.error); if (errors.length === 0) { container.innerHTML = `

No errors in the current filter.

Either every benchmark passed, or no results are in scope — try widening the filter.

`; return; } const grouped = groupBy(errors, r => categorizeError(r.error)); let html = '

'; for (const [cat, items] of Object.entries(grouped).sort((a, b) => b[1].length - a[1].length)) { const variants = [...new Set(items.map(i => i.variant))].join(', '); const browsers = [...new Set(items.map(i => i.browser))].join(', '); html += ``; } html += '

Category	Count	Variants	Browsers
${cat}	${items.length}	${variants}	${browsers}

'; container.innerHTML = html; } export function renderMachineInfo(machines) { const container = document.getElementById('machine-info'); if (!container) return; const addYourMachineCard = `

Add your machine

Run benchmarks directly in your browser. Results post to the leaderboard.

npm run bench:quick Open Run page `; if (machines.length === 0) { container.innerHTML = `

${addYourMachineCard}

`; return; } let html = '

'; for (const m of machines) { const failCount = m.resultCount - m.passCount; const title = m.userMachineName || m.cpus; const showHardwareRow = m.userMachineName && m.userMachineName !== m.cpus; html += `

${escapeHtml(title)}

${showHardwareRow ? `

Hardware${escapeHtml(m.cpus)}

` : ''}

Platform${m.platform}

Arch${m.arch}

RAM${m.totalMemoryGB} GB

Results${m.resultCount}

Passed${m.passCount}

Failed${failCount}

${m.llamaCppCommit ? `

llama.cpp${escapeHtml(m.llamaCppDescribe || m.llamaCppCommit.slice(0, 10))}

` : ''}

`; } html += addYourMachineCard; html += '

'; container.innerHTML = html; } function escapeHtml(str) { const div = document.createElement('div'); div.textContent = str; return div.innerHTML; } /* Render a single submitter's avatar + @username link for the Results table column. Falls back to an em-dash if attribution is unknown. */ function renderSubmitterCell(sb) { if (!sb?.name) return '\u2014'; const avatar = sb.avatarUrl ? `

` : ''; return `${avatar}@${escapeHtml(sb.name)}`; } export function renderCpuGpuTable(results) { const container = document.getElementById('cpu-gpu-table'); if (!container) return; // CPU is pinned to d=0 by the runner, so the comparison must read GPU's // d=0 number for an apples-to-apples ratio. Plain-Run records that only // measured d=N have null `_d0` and silently drop out of the comparison // — that's the right call: without a cold-cache GPU sample the speedup // ratio would be measuring different workloads. const METRICS = [ { cpuField: 'decode_tok_s', gpuField: 'decode_tok_s_d0', label: 'Decode tok/s @ d0' }, { cpuField: 'prefill_tok_s', gpuField: 'prefill_tok_s_d0', label: 'Prefill tok/s @ d0' }, ]; const passed = results.filter(r => r.status === 'done'); // CPU side aggregates standalone CPU runs (nGpuLayers === 0) plus // synthetic rows derived from the cpu_baseline_* fields on browser-flow // GPU records. See expandCpuRows() in data.js. const cpuResults = expandCpuRows(passed); const gpuResults = passed.filter(r => r.nGpuLayers !== 0); if (cpuResults.length === 0 || gpuResults.length === 0) { container.innerHTML = '

Select "All Backends" to see CPU vs GPU comparison.

'; return; } const gpuBrowsers = [...new Set(gpuResults.map(r => r.browser))].sort(); const cpuByModelVariant = groupBy(cpuResults, r => `${r.model}::${r.variant}`); const gpuByModelVariant = groupBy(gpuResults, r => `${r.model}::${r.variant}`); const keys = [...new Set([...Object.keys(cpuByModelVariant), ...Object.keys(gpuByModelVariant)])] .filter(k => cpuByModelVariant[k] && gpuByModelVariant[k]); if (keys.length === 0) { container.innerHTML = '

No matching model+variant pairs between CPU and GPU results.

'; return; } keys.sort((a, b) => { const [aModel, aVar] = a.split('::'); const [bModel, bVar] = b.split('::'); if (aModel !== bModel) return aModel.localeCompare(bModel); return quantSortKey(aVar) - quantSortKey(bVar); }); // Two-row grouped header: row1 = group labels (CPU, Chromium, …), row2 = metric sub-labels // CPU gets colspan = METRICS.length, each GPU browser gets colspan = METRICS.length * 2 (value + speedup per metric) const gpuColspan = METRICS.length * 2; // CPU side reads cpuField; GPU side reads gpuField (_d0 for apples-to- // apples). Both labels match the metric's display label. let html = '

'; // Row 1: group headers html += ''; html += ''; html += ``; for (const b of gpuBrowsers) { html += ``; } html += ''; // Row 2: metric sub-headers html += ''; for (const m of METRICS) { html += ``; } for (const b of gpuBrowsers) { for (const m of METRICS) { html += ``; } } html += ''; for (const key of keys) { const [model, variant] = key.split('::'); const cpuItems = cpuByModelVariant[key] || []; const gpuByBrowser = groupBy(gpuByModelVariant[key] || [], 'browser'); html += ''; html += ``; html += ``; // CPU columns for (const m of METRICS) { const val = avgBy(cpuItems, m.cpuField); html += ``; } // GPU columns per browser for (const b of gpuBrowsers) { const gpuItems = gpuByBrowser[b] || []; for (const m of METRICS) { const cpuVal = avgBy(cpuItems, m.cpuField); const gpuVal = avgBy(gpuItems, m.gpuField); const speedup = cpuVal && gpuVal ? gpuVal / cpuVal : null; const cls = speedup == null ? '' : speedup >= 3 ? 'text-success' : speedup >= 1.5 ? '' : speedup >= 1 ? 'text-muted' : 'text-error'; html += ``; html += ``; } } html += ''; } html += '

Model	Quant	CPU	${escapeHtml(b.charAt(0).toUpperCase() + b.slice(1))}
Model	Quant	${m.label}	${m.label}	Speedup
${escapeHtml(model)}	${escapeHtml(variant)}	${formatTokS(val)}	${formatTokS(gpuVal)}	${speedup != null ? speedup.toFixed(2) + '\u00d7' : '\u2014'}

'; container.innerHTML = html; }