webgpu-bench / js /run /controller.js
GitHub Actions
sync from abhijitramesh/webgpu-bench@e5c33f18bc
e261d14
// Run-tab controller. Mounts into the existing #run-section subtree and
// drives the one-click benchmark UI using the dashboard's design-system
// classes. Detects `surface` (localhost / space / pages) to gate the
// server save checkbox and the HF hub sign-in/submit row.
import { ggufSource, inventoryOpfs, purgeOpfs, OPFS_ROOT_NAME } from './source.js';
import { getDeviceBudgetMB, variantFits, describeDevice, isMobileDevice } from './device.js';
import {
resumeHFSession, beginHFSignIn, signOutHF, submitResultsToDataset,
HF_OAUTH_PENDING_KEY,
} from './hub.js';
import { isHubConfigured, HF_DATASET_REPO, CONSISTENCY_PROMPT } from './config.js';
const RUN_INTENT_STORAGE_KEY = 'webgpu-bench:runIntent';
const USER_REPORTED_STORAGE_KEY = 'webgpu-bench:userReported';
const CRASH_STALE_MS = 10_000;
const DEFAULT_N_PREDICT = 128;
const DEFAULT_N_CTX = 2048;
const DEFAULT_N_GPU_LAYERS = 999;
const YIELD_BETWEEN_RUNS_MS = 500;
// iOS Safari needs much longer to actually release Metal/WebGPU buffer
// allocations after worker.terminate() β€” back-to-back runs at the desktop
// 500 ms cadence trip Jetsam and Safari reloads the tab. 4 s gives the
// GPU process room to drain. Android Chromium is more forgiving but
// shares the same code path here.
const MOBILE_YIELD_BETWEEN_RUNS_MS = 4_000;
// llama-bench defaults: -p 512 -n 128 -r 5
const DEFAULT_N_PROMPT = 512;
const DEFAULT_N_GEN = 128;
const DEFAULT_N_DEPTH = 2048;
const DEFAULT_ITERATIONS = 5;
const MIN_ITERATIONS_FOR_SUBMIT = 5;
const state = {
surface: 'pages', // 'localhost' | 'space' | 'pages' | 'file'
source: null, // ggufSource() β€” single OPFS-backed source
models: null, // parsed models.json
budget: null, // { budgetMB, memGB, quotaMB, probedMB, isMobile, source }
device: null, // describeDevice() output
cacheStatus: {}, // { 'repo/file': { cachedBytes } }
variants: [], // flat variant rows with metadata
running: false,
aborted: false,
results: [], // result records from the current session
hfSession: null, // { accessToken, expiresAt, userName } when signed in
iterations: DEFAULT_ITERATIONS,
nPrompt: DEFAULT_N_PROMPT,
nGen: DEFAULT_N_GEN,
nDepth: DEFAULT_N_DEPTH,
// True while a Run Study is in flight (or a restored study session).
// Drives the progress table layout: study mode renders pp/tg as
// d=0 / d=N column pairs so both passes' numbers stay visible
// instead of the d=N pass overwriting d=0.
studyMode: false,
// User-controlled phase toggles. Both default OFF β€” a Run (or Run Study)
// does GPU perf only unless the user explicitly opts in to the CPU
// baseline. The CPU pass is the slowest step on most devices and most
// submissions don't need its consistency / comparison output, so making
// it opt-in keeps the default experience fast.
runConsistency: false,
runCpuPerf: false,
mounted: false,
// Tracks variants the Run pipeline downloaded this session (as opposed to
// the standalone Download button or pre-existing cache). Only these are
// candidates for post-run eviction when the user has opted in.
sessionDownloads: new Set(),
// Handle to the currently-running worker, so Abort can terminate it.
currentWorker: null,
// Set of fns that abort an in-flight async op (worker terminate, fetch
// signal abort). Multiple concurrent ops register here β€” Run study has a
// worker running variant i AND a prefetch downloading variant i+1, both
// of which need to be cancellable. Abort handler iterates the whole set.
abortHandlers: new Set(),
// Build metadata fetched from `build/<variant>/build-info.json`. Stamped
// onto every result record so we can compare performance across llama.cpp
// versions. JSPI and Asyncify variants are built from the same source
// tree, so a single fetch is enough; both files would be identical.
buildInfo: null,
// User-reported machine identity (Machine Name / GPU Name / Browser /
// OS). Filled by the "Your machine" form on the Run page, persisted to
// localStorage between visits, and stamped onto every result record so
// the leaderboard can attribute submissions even when UA / WebGPU
// adapter info is missing or wrong. machineName/browser/os are required
// before submission; gpuName is optional.
userReported: { machineName: '', gpuName: '', browser: '', os: '' },
};
const USER_REPORTED_REQUIRED = ['machineName', 'browser', 'os'];
function loadUserReported() {
try {
const raw = localStorage.getItem(USER_REPORTED_STORAGE_KEY);
if (!raw) return null;
const parsed = JSON.parse(raw);
if (parsed && typeof parsed === 'object') return parsed;
} catch { /* corrupt storage */ }
return null;
}
function saveUserReported() {
try {
localStorage.setItem(USER_REPORTED_STORAGE_KEY, JSON.stringify(state.userReported));
} catch { /* quota / disabled */ }
}
// Register an abort callback for an in-flight async op (worker terminate,
// fetch signal abort, etc.). Returns an unregister fn the caller MUST
// invoke when the op settles, so we don't accumulate stale handlers across
// runs. Abort handler iterates state.abortHandlers and calls every fn.
function registerAbort(fn) {
state.abortHandlers.add(fn);
return () => state.abortHandlers.delete(fn);
}
async function loadBuildInfo() {
// Try jspi first (Chrome path), fall back to asyncify (Safari/Firefox path).
// Either contains the same llama.cpp commit/describe.
const candidates = ['./build/jspi/build-info.json', './build/asyncify/build-info.json'];
for (const url of candidates) {
try {
const r = await fetch(url, { cache: 'no-cache' });
if (!r.ok) continue;
const data = await r.json();
if (data && (data.llamaCppCommit || data.llamaCppDescribe)) return data;
} catch { /* try next */ }
}
return null;
}
// ──────────────── surface detection ────────────────
async function detectSurface() {
const params = new URLSearchParams(location.search);
if (params.get('mode') === 'local') return 'localhost';
if (params.get('mode') === 'hosted') return 'space';
if (/\.static\.hf\.space$/.test(location.hostname)) return 'space';
if (location.hostname === 'localhost' || location.hostname === '127.0.0.1') {
try {
const r = await fetch('/api/models', { method: 'HEAD' });
if (r.ok) return 'localhost';
} catch { /* no backend */ }
}
if (location.protocol === 'file:') return 'file';
// Fallback for any other hosted location (mirror, preview deploy, etc.).
// Read-only: Submit hidden, no backend save.
return 'pages';
}
function canSubmit() {
return state.surface === 'localhost'
|| (state.surface === 'space' && isHubConfigured());
}
// ──────────────── data loading ────────────────
async function loadModels() {
// Page lives at /site/run.html locally and /run.html on the HF Space
// (flattened root). Sibling `./models.json` works in both; `/api/models`
// is the Express backend only.
const candidates = state.surface === 'localhost'
? ['/api/models', './models.json', '/models.json']
: ['./models.json', '/models.json'];
let lastErr = null;
for (const url of candidates) {
try {
const r = await fetch(url);
if (r.ok) return await r.json();
lastErr = new Error(`${url} β†’ ${r.status}`);
} catch (err) {
lastErr = err;
}
}
throw lastErr || new Error('Could not load models.json');
}
async function loadCacheStatus() {
// Cache lives in OPFS on every surface β€” ggufSource writes through
// the same `opfsHandleForModel` path everywhere.
try {
return await inventoryOpfs();
} catch (err) {
console.warn('OPFS inventory failed:', err.message);
return {};
}
}
// ──────────────── variant bookkeeping ────────────────
function flattenVariants(models) {
const out = [];
for (const m of models.models || []) {
for (const v of m.variants || []) {
out.push({
modelName: m.name,
repo: m.repo,
quant: v.quant,
filename: v.filename,
sizeMB: typeof v.sizeMB === 'number' ? v.sizeMB : 0,
warnings: computeWarnings(m.name, v.quant),
});
}
}
return out;
}
function getQuickVariantSet() {
const list = state.models?.quickVariants;
return new Set(Array.isArray(list) && list.length ? list : ['Q2_K', 'Q4_K_M', 'Q8_0']);
}
function isQuickVariant(v) {
return getQuickVariantSet().has(v.quant);
}
function computeWarnings(modelName, quant) {
// SSM_SCAN and Q1_0 are both supported in the bundled llama.cpp
// (ggml-webgpu.cpp). granite-4 ran cleanly in the apr-30 run; Q1_0 is
// wired into the fast-path dequant table. No warnings to surface today.
return [];
}
function cacheKey(v) { return `${v.repo}/${v.filename}`; }
function variantFitsDevice(v) {
// New variantFits signature: pass both budgets so the predicate can
// check (a) model fits in GPU memory + small overhead, and (b) WASM
// heap can hold the working set. See device.js for the rationale.
return variantFits(v.sizeMB, {
gpuBudgetMB: state.budget.gpuBudgetMB,
heapBudgetMB: state.budget.heapBudgetMB,
});
}
function isCached(v) {
const entry = state.cacheStatus[cacheKey(v)];
return !!entry && entry.cachedBytes > 0;
}
function groupByFamily(variants) {
const map = new Map();
for (const v of variants) {
if (!map.has(v.modelName)) map.set(v.modelName, []);
map.get(v.modelName).push(v);
}
return map;
}
// ──────────────── rendering ────────────────
function $(id) { return document.getElementById(id); }
/* Pretty browser name + version. Prefers UA Client Hints (clean
{ brand, version } pairs) over UA-string regex parsing. The brand list
is ordered Chromium-favoured, so pick the most-specific brand the user
actually has (Edg β†’ Chrome β†’ Chromium). */
function formatBrowser(d) {
const preferred = ['Microsoft Edge', 'Edg', 'Opera', 'Brave', 'Arc', 'Vivaldi',
'Google Chrome', 'Chromium'];
const brands = d.uaBrands || [];
for (const name of preferred) {
const hit = brands.find(b => b.brand === name);
if (hit) return `${hit.brand} ${hit.version}`;
}
if (brands.length > 0) return `${brands[0].brand} ${brands[0].version}`;
// Non-Chromium fallback: regex on userAgent. Capture brand + version
// separately so the slash isn't visible.
const m = (d.userAgent || '').match(/(Firefox|FxiOS|Edg|CriOS|Chrome|Version)\/([\d.]+)/);
if (!m) return 'browser';
const brand = m[1] === 'Version' ? 'Safari' : (m[1] === 'CriOS' ? 'Chrome iOS' : (m[1] === 'FxiOS' ? 'Firefox iOS' : m[1]));
return `${brand} ${m[2]}`;
}
/* Pretty OS + architecture. `navigator.platform` is unreliable on Apple
Silicon (it returns "MacIntel" for back-compat); prefer UA-CH and fall
back to the WebGPU vendor as a strong arm64 signal on Macs. */
function formatPlatform(d) {
const ua = d.userAgent || '';
const platHint = (d.uaPlatform || d.platform || '').toLowerCase();
let os;
if (platHint.includes('mac') || /Mac/.test(ua)) os = 'macOS';
else if (platHint.includes('win') || /Win/.test(ua)) os = 'Windows';
else if (/iPhone|iPad|iPod/.test(ua) || platHint.includes('ios')) os = 'iOS';
else if (/Android/.test(ua) || platHint.includes('android')) os = 'Android';
else if (platHint.includes('linux') || /Linux/.test(ua)) os = 'Linux';
else os = d.uaPlatform || d.platform || 'unknown';
let arch = '';
if (d.uaArch === 'arm') arch = 'arm64';
else if (d.uaArch === 'x86') arch = 'x86_64';
else if (d.uaArch) arch = d.uaArch;
else if (os === 'macOS' && d.gpu?.vendor === 'apple') arch = 'arm64';
else if (os === 'iOS') arch = 'arm64';
else if (/arm|aarch/i.test(ua)) arch = 'arm64';
else if (/x86_64|Win64;|x64/i.test(ua)) arch = 'x86_64';
return arch ? `${os} Β· ${arch}` : os;
}
function renderHeader() {
const d = state.device;
const b = state.budget;
const badge = $('run-mode-badge');
if (badge) {
const labels = {
localhost: 'Local dev',
space: 'Hosted Β· Hugging Face',
pages: 'Read-only preview',
file: 'Local file',
};
badge.textContent = labels[state.surface] || state.surface;
badge.className = `badge run-mode-badge run-mode-${state.surface}`;
}
const browserStr = formatBrowser(d);
const platformStr = formatPlatform(d);
const gpuStr = d.gpu
? [d.gpu.vendor, d.gpu.architecture, d.gpu.device].filter(Boolean).join(' ').trim()
: '';
$('device-browser').textContent = browserStr;
$('device-platform').textContent = platformStr;
$('device-gpu').textContent = gpuStr || (d.webgpu ? 'WebGPU (no info)' : 'no WebGPU');
const memStr = b.memGB !== null ? `${b.memGB} GB` : 'β€”';
$('device-memory').textContent = memStr;
// budgetMB is now the GPU-memory budget (per device.js _computeBudget),
// since with OPFS streaming the model lives in WebGPU buffers, not the
// WASM heap. We surface the heap budget separately in the source line so
// a curious reader can see both probes' results.
const budgetGB = (b.budgetMB / 1024).toFixed(1);
const heapGB = (b.heapBudgetMB / 1024).toFixed(1);
$('device-budget').textContent = `${budgetGB} GB`;
$('device-budget-source').textContent = `GPU memory Β· WASM heap: ${heapGB} GB`;
const webgpuCell = $('device-webgpu');
if (webgpuCell) {
webgpuCell.textContent = d.webgpu ? 'yes' : 'no';
webgpuCell.classList.toggle('text-success', d.webgpu);
webgpuCell.classList.toggle('text-error', !d.webgpu);
}
const llamaCell = $('device-llamacpp');
if (llamaCell) {
const bi = state.buildInfo;
if (bi?.llamaCppCommit) {
const label = bi.llamaCppDescribe || bi.llamaCppCommit.slice(0, 10);
llamaCell.innerHTML = '';
const a = document.createElement('a');
a.href = `https://github.com/ggml-org/llama.cpp/commit/${bi.llamaCppCommit}`;
a.target = '_blank';
a.rel = 'noopener';
a.className = 'mono';
a.textContent = label;
llamaCell.appendChild(a);
} else {
llamaCell.textContent = 'β€”';
}
}
// Surface-dependent UI gating.
const hubRow = $('hub-row');
if (hubRow) hubRow.hidden = state.surface !== 'space';
const saveLocalRow = $('save-local-row');
if (saveLocalRow) saveLocalRow.hidden = state.surface !== 'localhost';
const pagesBanner = $('run-pages-banner');
if (pagesBanner) pagesBanner.hidden = state.surface !== 'pages';
const mobileBanner = $('run-mobile-banner');
if (mobileBanner) mobileBanner.hidden = !state.budget?.isMobile;
const purgeBtn = $('btn-purge');
// Cache lives in OPFS on every surface now, so the Purge button is
// always meaningful. Was hidden on localhost back when the disk-cache
// path lived on the server.
if (purgeBtn) purgeBtn.hidden = false;
renderHfSection();
}
function renderHfSection() {
if (state.surface !== 'space') return;
const signinBtn = $('btn-signin');
const submitBtn = $('btn-submit');
const userEl = $('hf-user');
if (!signinBtn || !submitBtn || !userEl) return;
if (!isHubConfigured()) {
signinBtn.disabled = true;
signinBtn.textContent = 'HF hub not configured';
signinBtn.title = 'Set HF_DATASET_REPO in site/js/run/config.js';
submitBtn.hidden = true;
userEl.textContent = '';
return;
}
if (state.hfSession) {
signinBtn.textContent = 'Sign out';
// Sign-out itself is fine mid-run, but stay consistent with the disabled
// sign-in state so the row doesn't toggle look mid-run.
signinBtn.disabled = state.running;
submitBtn.hidden = false;
const eligible = submittableResults();
submitBtn.disabled = state.running || eligible.length === 0;
submitBtn.title = state.running
? 'Wait for the benchmark to finish before submitting'
: (eligible.length === 0 && state.results.length > 0
? `Need at least ${MIN_ITERATIONS_FOR_SUBMIT} successful iterations per variant to submit`
: '');
const who = state.hfSession.userName ? `@${state.hfSession.userName}` : 'signed in';
const hint = eligible.length > 0
? ` Β· ${eligible.length}/${state.results.length} variants eligible`
: '';
userEl.textContent = `${who} Β· β†’ ${HF_DATASET_REPO}${hint}`;
} else {
signinBtn.textContent = 'Sign in with Hugging Face';
// Sign-in triggers a full-page redirect, which would kill an in-flight
// worker. Disable the button while the benchmark is running so the user
// can't accidentally lose their run; results are saved progressively to
// localStorage and restored on the next mount, so finishing the run and
// signing in afterwards still lets them submit.
signinBtn.disabled = state.running;
signinBtn.title = state.running
? 'Wait for the benchmark to finish before signing in'
: '';
submitBtn.hidden = true;
userEl.textContent = '';
}
}
function renderModels() {
const panel = $('run-models');
panel.innerHTML = '';
const groups = groupByFamily(state.variants);
for (const [family, variants] of groups) {
const fitsCount = variants.filter(variantFitsDevice).length;
const quickFitCount = variants.filter(v => isQuickVariant(v) && variantFitsDevice(v)).length;
// Card wrapper (not <details>, to avoid nested-interactive with the
// family-level checkbox). A dedicated toggle button expands/collapses
// the variant list.
const familyEl = document.createElement('section');
familyEl.className = 'run-family card';
familyEl.dataset.family = family;
const header = document.createElement('div');
header.className = 'run-family-summary';
const toggleBtn = document.createElement('button');
toggleBtn.type = 'button';
toggleBtn.className = 'run-family-toggle';
toggleBtn.setAttribute('aria-expanded', 'false');
toggleBtn.setAttribute('aria-label', `Expand ${family}`);
toggleBtn.innerHTML = '<span class="run-family-chevron" aria-hidden="true"></span>';
const selectAllId = `run-family-all-${family.replace(/[^a-z0-9]/gi, '-')}`;
const selectAll = document.createElement('input');
selectAll.type = 'checkbox';
selectAll.className = 'run-family-select-all';
selectAll.dataset.family = family;
selectAll.id = selectAllId;
selectAll.setAttribute('aria-label', `Select all variants in ${family}`);
const nameLabel = document.createElement('label');
nameLabel.className = 'run-family-name';
nameLabel.htmlFor = selectAllId;
nameLabel.textContent = family;
const paramChip = document.createElement('span');
paramChip.className = 'run-family-params';
const params = parseParamSize(family);
if (params) paramChip.textContent = params;
else paramChip.hidden = true;
const stats = document.createElement('span');
stats.className = 'run-family-stats';
stats.textContent = `${variants.length} variants Β· ${fitsCount} fit Β· ${quickFitCount} quick`;
header.append(toggleBtn, selectAll, nameLabel, paramChip, stats);
familyEl.appendChild(header);
const list = document.createElement('div');
list.className = 'run-variant-list';
list.hidden = true;
for (const v of variants) {
const row = document.createElement('label');
row.className = 'run-variant-row';
if (!variantFitsDevice(v)) row.classList.add('is-non-fit');
row.dataset.key = cacheKey(v);
const cb = document.createElement('input');
cb.type = 'checkbox';
cb.className = 'run-variant-select';
cb.dataset.key = cacheKey(v);
cb.checked = isQuickVariant(v) && variantFitsDevice(v);
const quant = document.createElement('span');
quant.className = 'run-variant-quant';
quant.textContent = v.quant;
const filename = document.createElement('code');
filename.className = 'run-variant-file';
filename.textContent = v.filename;
const size = document.createElement('span');
size.className = 'run-variant-size';
size.textContent = v.sizeMB > 0 ? formatSize(v.sizeMB) : '?';
const badges = document.createElement('span');
badges.className = 'run-variant-badges';
updateBadgesForVariant(badges, v);
row.append(cb, quant, filename, size, badges);
list.appendChild(row);
}
familyEl.appendChild(list);
panel.appendChild(familyEl);
updateFamilySelectAllState(family);
}
}
function updateFamilySelectAllState(family) {
const panel = $('run-models');
if (!panel) return;
const familyEl = panel.querySelector(
`.run-family[data-family="${cssEscape(family)}"]`,
);
if (!familyEl) return;
// Only count fit variants β€” the parent checkbox is intentionally limited
// to toggling fits (non-fits would OOM). If we counted non-fits here too,
// the parent could never reach "all checked" for any mixed family, which
// wedges its underlying `checked` at false and turns subsequent clicks
// into no-ops (see SmolLM3-3B: 21 fit / 24 variants).
const rows = familyEl.querySelectorAll('.run-variant-row:not(.is-non-fit) .run-variant-select');
const all = rows.length;
const checked = [...rows].filter(cb => cb.checked).length;
const selectAll = familyEl.querySelector('.run-family-select-all');
if (!selectAll) return;
selectAll.checked = checked === all && all > 0;
selectAll.indeterminate = checked > 0 && checked < all;
}
function updateBadgesForVariant(badgesEl, v) {
badgesEl.innerHTML = '';
if (isCached(v)) badgesEl.appendChild(makeBadge('cached', 'badge--cached'));
for (const w of v.warnings) badgesEl.appendChild(makeBadge(w, 'badge--warn'));
}
function refreshCacheBadge(v) {
const row = document.querySelector(`.run-variant-row[data-key="${cssEscape(cacheKey(v))}"]`);
if (!row) return;
const badges = row.querySelector('.run-variant-badges');
if (badges) updateBadgesForVariant(badges, v);
}
function makeBadge(text, cls) {
const el = document.createElement('span');
el.className = `badge ${cls}`;
el.textContent = text;
return el;
}
function formatSize(mb) {
if (mb >= 1024) return `${(mb / 1024).toFixed(1)} GB`;
return `${mb.toFixed(0)} MB`;
}
/* Pull a parameter-count hint (e.g. "1B", "270M", "0.6B") from a family
name. Most family names embed this near the end (Llama-3.2-1B-Instruct,
gemma-3-270m-it). Returns the LAST `<digits>[Bb|Mm]` token in the name,
uppercased. Returns null if no match β€” chip is then hidden. */
function parseParamSize(name) {
if (!name) return null;
const matches = String(name).match(/(\d+\.?\d*)\s*[BbMm](?![A-Za-z])/g);
if (!matches?.length) return null;
const last = matches[matches.length - 1];
return last.toUpperCase().replace(/\s+/g, '');
}
function escapeText(s) {
return String(s).replace(/[&<>]/g, c => ({ '&': '&amp;', '<': '&lt;', '>': '&gt;' }[c]));
}
function escapeAttr(s) { return escapeText(s).replace(/"/g, '&quot;'); }
function cssEscape(s) {
if (window.CSS?.escape) return CSS.escape(s);
return String(s).replace(/[^\w-]/g, ch => `\\${ch}`);
}
// ──────────────── selection / filters ────────────────
function wireSelectionHandlers() {
const panel = $('run-models');
panel.addEventListener('change', (e) => {
const t = e.target;
if (t.classList?.contains('run-family-select-all')) {
const family = t.dataset.family;
const rows = panel.querySelectorAll(
`.run-family[data-family="${cssEscape(family)}"] .run-variant-row`,
);
// Only affect fit variants β€” checking non-fit can cause OOM on the
// user's device, which is actively dangerous.
rows.forEach(row => {
if (row.classList.contains('is-non-fit')) return;
const cb = row.querySelector('.run-variant-select');
if (cb) cb.checked = t.checked;
});
updateFamilySelectAllState(family);
updateButtons();
} else if (t.classList?.contains('run-variant-select')) {
const familyEl = t.closest('.run-family');
if (familyEl) updateFamilySelectAllState(familyEl.dataset.family);
updateButtons();
}
});
panel.addEventListener('click', (e) => {
// Clicks on the select-all checkbox or name label must not toggle
// expansion β€” they have their own semantics.
if (e.target.closest('.run-family-select-all, .run-family-name, .run-variant-list, .run-variant-row')) {
return;
}
const header = e.target.closest?.('.run-family-summary');
if (!header) return;
const familyEl = header.closest('.run-family');
const list = familyEl?.querySelector('.run-variant-list');
const toggle = familyEl?.querySelector('.run-family-toggle');
if (!list || !toggle) return;
const expanded = !list.hidden;
list.hidden = expanded;
toggle.setAttribute('aria-expanded', String(!expanded));
familyEl.classList.toggle('is-open', !expanded);
});
}
function wireFilters() {
['hide-ud', 'hide-iq', 'hide-hifp'].forEach(id => {
const el = $(id);
if (el) el.addEventListener('change', applyFilters);
});
}
function wireFamilySearch() {
const input = $('family-search');
if (!input) return;
// Live-filter family cards on input. Match against the lowercased family
// name; auto-expand any family that matches a non-empty query so the user
// sees the relevant variants without an extra click.
input.addEventListener('input', () => {
const q = input.value.trim().toLowerCase();
document.querySelectorAll('.run-family').forEach(el => {
const family = (el.dataset.family || '').toLowerCase();
const match = q === '' || family.includes(q);
el.hidden = !match;
// Expand on match-with-query so variants are visible without a click.
if (q !== '' && match) {
const list = el.querySelector('.run-variant-list');
const toggle = el.querySelector('.run-family-toggle');
if (list && toggle) {
list.hidden = false;
toggle.setAttribute('aria-expanded', 'true');
el.classList.add('is-open');
}
}
});
});
}
function wireBatchSelect() {
const apply = (pred) => {
document.querySelectorAll('.run-variant-select').forEach(cb => {
const v = state.variants.find(x => cacheKey(x) === cb.dataset.key);
cb.checked = pred(v);
});
document.querySelectorAll('.run-family').forEach(el => {
if (el.dataset.family) updateFamilySelectAllState(el.dataset.family);
});
updateButtons();
};
$('btn-select-quick')?.addEventListener('click', () => {
apply(v => !!v && isQuickVariant(v) && variantFitsDevice(v));
});
$('btn-select-fit')?.addEventListener('click', () => {
apply(v => !!v && variantFitsDevice(v));
});
$('btn-select-none')?.addEventListener('click', () => {
apply(() => false);
});
}
function wirePerfInputs() {
const reps = $('iterations-input');
if (reps) {
reps.value = String(state.iterations);
reps.addEventListener('change', () => {
const n = Math.max(1, Math.min(50, parseInt(reps.value, 10) || DEFAULT_ITERATIONS));
state.iterations = n;
reps.value = String(n);
});
}
const np = $('n-prompt-input');
if (np) {
np.value = String(state.nPrompt);
np.addEventListener('change', () => {
const n = Math.max(0, Math.min(4096, parseInt(np.value, 10)));
state.nPrompt = Number.isFinite(n) ? n : DEFAULT_N_PROMPT;
np.value = String(state.nPrompt);
});
}
const ng = $('n-gen-input');
if (ng) {
ng.value = String(state.nGen);
ng.addEventListener('change', () => {
const n = Math.max(0, Math.min(4096, parseInt(ng.value, 10)));
state.nGen = Number.isFinite(n) ? n : DEFAULT_N_GEN;
ng.value = String(state.nGen);
});
}
const nd = $('n-depth-input');
if (nd) {
nd.value = String(state.nDepth);
nd.addEventListener('change', () => {
const n = Math.max(0, Math.min(32768, parseInt(nd.value, 10)));
state.nDepth = Number.isFinite(n) ? n : DEFAULT_N_DEPTH;
nd.value = String(state.nDepth);
});
}
const runCons = $('run-consistency');
if (runCons) {
runCons.checked = state.runConsistency;
runCons.addEventListener('change', () => {
state.runConsistency = runCons.checked;
});
}
const runCpu = $('run-cpu-perf');
if (runCpu) {
runCpu.checked = state.runCpuPerf;
runCpu.addEventListener('change', () => {
state.runCpuPerf = runCpu.checked;
});
}
}
function submittableResults() {
return state.results.filter(r =>
r.status === 'done' && (r.metrics?.iterations || 0) >= MIN_ITERATIONS_FOR_SUBMIT,
);
}
function applyFilters() {
const hideUd = $('hide-ud')?.checked;
const hideIq = $('hide-iq')?.checked;
const hideHifp = $('hide-hifp')?.checked;
const hiddenByFamily = new Map();
document.querySelectorAll('.run-variant-row').forEach(row => {
const v = state.variants.find(x => cacheKey(x) === row.dataset.key);
if (!v) return;
const isUd = v.quant.startsWith('UD-');
const isIq = /^IQ/.test(v.quant) || /^UD-IQ/.test(v.quant);
const isHifp = /^(BF16|F16|bf16|f16)$/.test(v.quant);
const hide = (hideUd && isUd) || (hideIq && isIq) || (hideHifp && isHifp);
row.style.display = hide ? 'none' : '';
if (hide) hiddenByFamily.set(v.modelName, (hiddenByFamily.get(v.modelName) || 0) + 1);
});
// Refresh the per-family stats line so users see hidden filter impact.
document.querySelectorAll('.run-family').forEach(familyEl => {
const family = familyEl.dataset.family;
const all = [...familyEl.querySelectorAll('.run-variant-row')];
const visible = all.filter(r => r.style.display !== 'none').length;
const fit = all.filter(r => !r.classList.contains('is-non-fit') && r.style.display !== 'none').length;
const quick = all.filter(r => {
if (r.style.display === 'none' || r.classList.contains('is-non-fit')) return false;
const v = state.variants.find(x => cacheKey(x) === r.dataset.key);
return v && isQuickVariant(v);
}).length;
const stats = familyEl.querySelector('.run-family-stats');
if (!stats) return;
const hiddenCount = hiddenByFamily.get(family) || 0;
const base = `${visible} variants Β· ${fit} fit Β· ${quick} quick`;
stats.textContent = hiddenCount > 0 ? `${base} Β· ${hiddenCount} hidden` : base;
});
// A selected-but-now-hidden variant is a footgun; re-count the queue.
updateButtons();
}
function getCheckedVariants() {
return Array.from(document.querySelectorAll('.run-variant-select:checked'))
.map(cb => state.variants.find(v => cacheKey(v) === cb.dataset.key))
.filter(Boolean);
}
function updateButtons() {
const checked = getCheckedVariants();
const cachedChecked = checked.filter(isCached);
const dl = $('btn-download'); if (dl) dl.disabled = state.running || checked.length === 0;
// Run is now allowed even when nothing is cached β€” the pipeline downloads
// on demand. (Download button remains for the "pre-cache without running"
// workflow.)
const rn = $('btn-run'); if (rn) rn.disabled = state.running || checked.length === 0;
const study = $('btn-run-study'); if (study) study.disabled = state.running;
const ab = $('btn-abort'); if (ab) { ab.disabled = !state.running; ab.hidden = !state.running; }
renderBudgetMeter(checked, cachedChecked);
// Keep the Sign in / Submit buttons in sync with the running flag β€” they
// depend on it so the user can't kick off a redirect mid-run.
renderHfSection();
}
/* Show selected size as a fill bar against the device's max model size.
Three states drive the fill color: under (signal green), nearing (amber
β‰₯ 70%), over (red β‰₯ 100%). When nothing is selected, hide the whole
widget so the action bar isn't dominated by an empty meter. */
function renderBudgetMeter(checked, cachedChecked) {
const widget = $('run-budget');
const fill = $('run-budget-fill');
const text = $('run-budget-text');
const meta = $('run-budget-meta');
if (!widget || !fill || !text || !meta) return;
if (checked.length === 0) {
widget.hidden = true;
return;
}
widget.hidden = false;
const totalMB = checked.reduce((a, v) => a + (v.sizeMB || 0), 0);
const toDownload = checked.filter(v => !isCached(v));
const dlMB = toDownload.reduce((a, v) => a + (v.sizeMB || 0), 0);
const budgetMB = state.budget?.budgetMB || 0;
// Largest single model is what really matters for the device β€” total is
// download size, not peak memory. Show both.
const largest = checked.reduce((m, v) => Math.max(m, v.sizeMB || 0), 0);
const pct = budgetMB > 0 ? Math.min(100, (largest / budgetMB) * 100) : 0;
fill.style.width = `${pct}%`;
let tone = 'ok';
if (budgetMB > 0 && largest > budgetMB) tone = 'over';
else if (budgetMB > 0 && largest / budgetMB >= 0.7) tone = 'warn';
widget.dataset.tone = tone;
text.innerHTML = `<strong>${checked.length}</strong> selected Β· <span class="run-budget-size">${formatSize(totalMB)}</span> total`;
const metaParts = [];
if (largest > 0 && budgetMB > 0) {
metaParts.push(`largest ${formatSize(largest)} / budget ${formatSize(budgetMB)}`);
}
if (cachedChecked.length > 0) metaParts.push(`${cachedChecked.length} cached`);
if (dlMB > 0) metaParts.push(`~${formatSize(dlMB)} to download`);
meta.textContent = metaParts.join(' Β· ');
}
// ──────────────── progress table ────────────────
function ensureProgressTable() {
const wrap = $('run-progress-wrapper');
if (!wrap) return null;
// Reveal the progress card + its header β€” they are hidden by default on
// mount so the user doesn't see an empty "Progress" scaffold, but we must
// un-hide them as soon as the first row (download or run) appears.
const card = wrap.closest('.table-card');
if (card) card.hidden = false;
const header = card?.previousElementSibling;
if (header?.classList?.contains('section-header')) header.hidden = false;
// Layout key β€” 'study' means pp/tg are split into d=0 and d=N columns,
// 'plain' means a single column each. If the existing table doesn't
// match the current state, drop it: state.results + the run loop are the
// source of truth, the progress table is just a visual scaffold.
const wantedLayout = state.studyMode ? 'study' : 'plain';
let table = wrap.querySelector('table');
if (table && table.dataset.layout !== wantedLayout) {
table.remove();
table = null;
}
if (!table) {
table = document.createElement('table');
table.className = 'results-table run-progress-table';
table.dataset.layout = wantedLayout;
const dN = state.nDepth || 0;
const ppHead = state.studyMode
? `<th class="num" title="Prompt processing throughput at empty cache (avg \u00b1 stddev t/s)">pp tok/s @ d0</th>
<th class="num" title="Prompt processing throughput at depth ${dN} (avg \u00b1 stddev t/s)">pp tok/s @ d${dN}</th>`
: `<th class="num" title="Prompt processing throughput (avg \u00b1 stddev t/s)">pp tok/s</th>`;
const tgHead = state.studyMode
? `<th class="num" title="Text generation throughput at empty cache (avg \u00b1 stddev t/s)">tg tok/s @ d0</th>
<th class="num" title="Text generation throughput at depth ${dN} (avg \u00b1 stddev t/s)">tg tok/s @ d${dN}</th>`
: `<th class="num" title="Text generation throughput (avg \u00b1 stddev t/s)">tg tok/s</th>`;
table.innerHTML = `
<thead>
<tr>
<th>Model</th>
<th>Variant</th>
<th>Status</th>
${ppHead}
${tgHead}
<th class="num">Wall s</th>
<th>Error</th>
</tr>
</thead>
<tbody></tbody>
`;
wrap.appendChild(table);
}
return table;
}
function progressRowFor(v) {
const key = cacheKey(v);
const table = ensureProgressTable();
const tbody = table.querySelector('tbody');
let tr = tbody.querySelector(`tr[data-key="${cssEscape(key)}"]`);
if (!tr) {
tr = document.createElement('tr');
tr.dataset.key = key;
tr.className = 'run-row-queued';
// pp/tg cells gain a depth-suffixed class in study mode so
// fillFromRecord can route each record to its own column. Plain mode
// still uses a single .prefill-dn / .decode-dn cell β€” pre-study (or
// single-pass) records all go there regardless of nDepth.
const ppCells = state.studyMode
? '<td class="num prefill prefill-d0">β€”</td><td class="num prefill prefill-dn">β€”</td>'
: '<td class="num prefill prefill-dn">β€”</td>';
const tgCells = state.studyMode
? '<td class="num decode decode-d0">β€”</td><td class="num decode decode-dn">β€”</td>'
: '<td class="num decode decode-dn">β€”</td>';
tr.innerHTML = `
<td>${escapeText(v.modelName)}</td>
<td>${escapeText(v.quant)}</td>
<td class="status">queued</td>
${ppCells}
${tgCells}
<td class="num wall">β€”</td>
<td class="err"></td>
`;
tbody.appendChild(tr);
}
let tickInterval = null;
const stopTicker = () => {
if (tickInterval !== null) { clearInterval(tickInterval); tickInterval = null; }
};
return {
// sinceMs: optional epoch ms. When set, the cell ticks once a second so
// long-running phases (CPU pp512 warmup, big-model rep calls) show
// wall-clock progress instead of looking hung. Cleared on next setStatus.
setStatus(status, msg, sinceMs) {
stopTicker();
tr.className = `run-row-${rowClassFor(status)}`;
const cell = tr.querySelector('.status');
const render = () => {
const base = msg ? `${status} β€” ${msg}` : status;
cell.textContent = sinceMs
? `${base} (${Math.floor((Date.now() - sinceMs) / 1000)}s)`
: base;
};
render();
if (sinceMs) tickInterval = setInterval(render, 1000);
},
setProgress(fraction, downloaded, total) {
stopTicker();
const pct = (fraction * 100).toFixed(1);
const detail = total > 0
? `${pct}% (${formatSize(downloaded / (1024 * 1024))} / ${formatSize(total / (1024 * 1024))})`
: '';
tr.querySelector('.status').textContent = detail ? `downloading ${detail}` : 'downloading';
},
fillFromRecord(record) {
stopTicker();
tr.className = `run-row-${record.status === 'done' ? 'ok' : 'error'}`;
tr.querySelector('.status').textContent = record.status;
// Format llama-bench style: "avg \u00b1 stddev" with the test name as
// the cell tooltip so users see the exact pp/tg N that was measured.
const tests = record.metrics?.tests || [];
const pp = tests.find(t => t.name?.startsWith('pp'));
const tg = tests.find(t => t.name?.startsWith('tg'));
const fmt = (t) => t ? `${t.avg_ts.toFixed(2)} \u00b1 ${t.stddev_ts.toFixed(2)}` : '\u2014';
// In study mode pick d=0 vs d=N based on the record's nDepth so the
// first pass doesn't get clobbered by the second. Plain mode only
// ever has the .prefill-dn / .decode-dn cells.
const isD0 = state.studyMode && (record.nDepth ?? 0) === 0;
const ppSel = isD0 ? '.prefill-d0' : '.prefill-dn';
const tgSel = isD0 ? '.decode-d0' : '.decode-dn';
const ppCell = tr.querySelector(ppSel);
const tgCell = tr.querySelector(tgSel);
if (ppCell) {
ppCell.textContent = fmt(pp);
if (pp) ppCell.title = pp.name;
}
if (tgCell) {
tgCell.textContent = fmt(tg);
if (tg) tgCell.title = tg.name;
}
// Wall cell accumulates across depth passes in study mode so the
// user sees total time per variant. Plain mode is a single-shot
// assignment as before.
const wallSec = record.wallTimeMs ? record.wallTimeMs / 1000 : 0;
const wallEl = tr.querySelector('.wall');
if (state.studyMode) {
const prev = parseFloat(wallEl.dataset.totalSec || '0') || 0;
const total = prev + wallSec;
wallEl.dataset.totalSec = String(total);
wallEl.textContent = total > 0 ? total.toFixed(1) : '\u2014';
} else {
wallEl.textContent = wallSec > 0 ? wallSec.toFixed(1) : '\u2014';
}
tr.querySelector('.err').textContent = describeError(record.error);
},
};
}
function rowClassFor(status) {
if (status === 'done' || status === 'ok' || status === 'cached') return 'ok';
if (status === 'error') return 'error';
if (status === 'queued' || !status) return 'queued';
return 'running';
}
// ──────────────── logging ────────────────
function logLine(msg) {
const pre = $('log-output');
if (!pre) return;
const line = `[${new Date().toISOString().slice(11, 23)}] ${msg}\n`;
pre.textContent += line;
pre.scrollTop = pre.scrollHeight;
if (msg.startsWith('[wasm:err]')) {
const wasmPre = $('wasm-error-output');
if (wasmPre) {
wasmPre.textContent += line;
wasmPre.scrollTop = wasmPre.scrollHeight;
}
}
}
// ──────────────── machine / browser info ────────────────
function browserInfo() {
const ua = navigator.userAgent;
if (/Firefox\/(\d+)/.test(ua)) return `firefox-${RegExp.$1}`;
if (/Edg\/(\d+)/.test(ua)) return `edge-${RegExp.$1}`;
if (/Chrome\/(\d+)/.test(ua)) return `chromium-${RegExp.$1}`;
if (/Version\/(\d+).*Safari/.test(ua)) return `webkit-${RegExp.$1}`;
return 'browser-unknown';
}
function slugify(s) {
return String(s).toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-|-$/g, '') || 'unknown';
}
// ──────────────── user-reported submission fields ────────────────
// Best-effort default for the four user-reported inputs, derived from the
// auto-detected device + browser data. The user is expected to edit these
// before running β€” defaults exist only so the form isn't empty on first
// visit. Returns { machineName, gpuName, browser, os }.
function autoDetectedUserReported() {
const d = state.device || {};
const gpu = d.gpu || {};
const gpuStr = [gpu.vendor, gpu.architecture, gpu.device, gpu.description]
.filter(Boolean).join(' ').trim();
const memGB = state.budget?.memGB;
const browser = formatBrowser(d);
const os = formatPlatform(d);
// machineName default: "<gpu> Β· <memGB> GB" if both known, else either,
// else the OS string. The user is encouraged to replace with a friendly
// label like "MacBook Pro M3 16GB".
let machineName = '';
if (gpuStr && memGB) machineName = `${gpuStr} Β· ${memGB} GB`;
else if (gpuStr) machineName = gpuStr;
else if (memGB) machineName = `${memGB} GB device`;
else machineName = os;
return { machineName, gpuName: gpuStr, browser, os };
}
function readUserReportedFromInputs() {
return {
machineName: ($('ur-machine-name')?.value ?? '').trim(),
gpuName: ($('ur-gpu-name')?.value ?? '').trim(),
browser: ($('ur-browser')?.value ?? '').trim(),
os: ($('ur-os')?.value ?? '').trim(),
};
}
function refreshUserReportedValidation() {
const hint = $('ur-hint');
const missing = USER_REPORTED_REQUIRED.filter(k => !state.userReported[k]);
for (const k of USER_REPORTED_REQUIRED) {
const id = { machineName: 'ur-machine-name', browser: 'ur-browser', os: 'ur-os' }[k];
const el = $(id);
if (el) el.classList.toggle('is-missing', !state.userReported[k]);
}
if (hint) {
if (missing.length === 0) {
hint.textContent = 'Looks good β€” these labels will be attached to every result you submit.';
hint.classList.remove('is-warn');
} else {
hint.textContent = `Required: ${missing.join(', ')}. We'll still let you run, but submissions need these filled in.`;
hint.classList.add('is-warn');
}
}
}
function wireUserReported() {
// Pre-fill: stored values win, fall back to auto-detected defaults so
// first-time users see something rather than an empty form.
const stored = loadUserReported();
const auto = autoDetectedUserReported();
state.userReported = {
machineName: stored?.machineName?.trim() || auto.machineName,
gpuName: stored?.gpuName?.trim() || auto.gpuName,
browser: stored?.browser?.trim() || auto.browser,
os: stored?.os?.trim() || auto.os,
};
for (const [id, key] of [
['ur-machine-name', 'machineName'],
['ur-gpu-name', 'gpuName'],
['ur-browser', 'browser'],
['ur-os', 'os'],
]) {
const el = $(id);
if (!el) continue;
el.value = state.userReported[key] || '';
el.addEventListener('input', () => {
state.userReported = readUserReportedFromInputs();
saveUserReported();
refreshUserReportedValidation();
});
}
// Persist whatever the auto-detect filled in so the user doesn't lose
// it on reload before they touch anything.
saveUserReported();
refreshUserReportedValidation();
}
async function machineInfo() {
const ua = navigator.userAgent;
const platform = /Mac/.test(ua) ? 'darwin'
: /Win/.test(ua) ? 'win32'
: /Linux/.test(ua) ? 'linux'
: /iPhone|iPad|iOS/.test(ua) ? 'ios'
: /Android/.test(ua) ? 'android'
: 'unknown';
let arch = 'unknown';
let platformVersion = '';
try {
const uad = navigator.userAgentData;
if (uad?.getHighEntropyValues) {
const hev = await uad.getHighEntropyValues(['architecture', 'platformVersion']);
arch = hev.architecture || arch;
platformVersion = hev.platformVersion || '';
}
} catch { /* non-UA-Data browsers */ }
if (arch === 'unknown') {
arch = /arm/i.test(ua) ? 'arm64'
: /x86_64|Win64|x64/i.test(ua) ? 'x64'
: 'unknown';
}
const gpu = state.device?.gpu;
const gpuStr = gpu
? [gpu.vendor, gpu.architecture, gpu.device, gpu.description].filter(Boolean).join(' ').trim()
: '';
const cpus = gpuStr || 'browser';
const totalMemoryGB = navigator.deviceMemory || 0;
return {
slug: slugify(`${cpus}-${totalMemoryGB}gb-${platform}`),
platform,
platformVersion,
arch,
cpus,
totalMemoryGB,
userAgent: ua,
};
}
// ──────────────── Download ────────────────
async function onDownloadClick() {
const variants = getCheckedVariants();
if (variants.length === 0) return;
state.running = true;
state.aborted = false;
updateButtons();
for (const v of variants) {
if (state.aborted) break;
const row = progressRowFor(v);
row.setStatus('downloading', '');
const ac = new AbortController();
const unregister = registerAbort(() => ac.abort());
try {
const { size } = await state.source.opfsHandleForModel(
v.repo, v.filename,
(fr, downloaded, total) => row.setProgress(fr, downloaded, total),
ac.signal,
);
if (!ac.signal.aborted) {
state.cacheStatus[cacheKey(v)] = { cachedBytes: size };
refreshCacheBadge(v);
row.setStatus('cached', formatSize(size / (1024 * 1024)));
} else {
row.setStatus('aborted', '');
}
} catch (err) {
if (ac.signal.aborted) { row.setStatus('aborted', ''); }
else { row.setStatus('error', err.message); logLine(`Download failed: ${v.filename}: ${err.message}`); }
} finally {
unregister();
}
}
// Refresh cache inventory to reconcile any partial downloads.
state.cacheStatus = await loadCacheStatus();
document.querySelectorAll('.run-variant-row').forEach(row => {
const v = state.variants.find(x => cacheKey(x) === row.dataset.key);
if (v) refreshCacheBadge(v);
});
state.running = false;
updateButtons();
}
// ──────────────── Run ────────────────
// Curated leaderboard study: focus model at several quants for a quant
// sweep, plus every other model at the standard quant as a single
// representative point. Selection rule lives in models.json
// (`studySelection`) so the CLI's --study flag and this button stay in
// sync. Variants that don't fit the device's memory budget are dropped
// silently β€” same rule the "All fit" button enforces.
function isStudyVariant(v) {
if (!v) return false;
const sel = state.models?.studySelection;
if (!sel) return false;
if ((sel.extras || []).some(e => e.model === v.modelName && e.quant === v.quant)) return true;
if (v.modelName === sel.focusModel) return (sel.focusQuants || []).includes(v.quant);
return v.quant === sel.standardQuant;
}
async function onRunStudyClick() {
if (state.running) return;
// Apply the study selection β€” same DOM/state plumbing as wireBatchSelect.
document.querySelectorAll('.run-variant-select').forEach(cb => {
const v = state.variants.find(x => cacheKey(x) === cb.dataset.key);
cb.checked = !!v && isStudyVariant(v) && variantFitsDevice(v);
});
document.querySelectorAll('.run-family').forEach(el => {
if (el.dataset.family) updateFamilySelectAllState(el.dataset.family);
});
updateButtons();
const checked = getCheckedVariants();
if (checked.length === 0) {
logLine('Run study: no variants matched (none of the study quants fit this device).');
return;
}
logLine(`Run study: selected ${checked.length} variants β€” starting run.`);
// studyMode flips on the depth-pairing branch in runVariantWithIterations
// so each variant produces both d=0 and d=N_DEPTH records (matches the
// CLI runner's --study behavior).
await onRunClick({ studyMode: true });
}
async function onRunClick({ studyMode = false } = {}) {
// Run accepts any checked variant β€” uncached ones download just-in-time.
const variants = getCheckedVariants();
if (variants.length === 0) return;
state.running = true;
state.aborted = false;
state.results = [];
state.sessionDownloads = new Set();
// Drive progress-table layout: study mode splits pp/tg into d=0 / d=N
// columns so both depth passes' numbers stay visible.
state.studyMode = !!studyMode;
updateButtons();
if (isMobileDevice()) {
logLine(
'Mobile device β€” sequential downloads (no parallel prefetch), ' +
'forced eviction after each variant, ' +
`${(MOBILE_YIELD_BETWEEN_RUNS_MS / 1000).toFixed(1)} s cooldown between runs ` +
'(and between depth passes in study mode) ' +
'so iOS can release WebGPU buffers before the next load.',
);
if (state.budget?.source) {
logLine(`GPU budget: ${state.budget.source}`);
}
}
const machine = await machineInfo();
const browser = browserInfo();
// Mobile forces eviction regardless of the checkbox: keeping multiple
// ~700 MB GGUFs in OPFS while the GPU process retains buffers from the
// just-finished run is the fastest path to a Jetsam tab kill on iOS.
const evictAfter = isMobileDevice() || !!$('evict-after-run')?.checked;
// One-ahead prefetch: while variant i runs, we may have variant i+1
// downloading. Only one prefetch in flight at a time.
// On mobile, the overlap is a measurement hazard β€” concurrent download
// contends with inference for SoC power, memory bandwidth, and OPFS
// write queues. Skip the prefetch entirely; runBenchmarkInWorker's
// opfsHandleForModel does the download inline (with the same progress
// events the prefetch row would have shown).
const skipPrefetch = isMobileDevice();
const prefetchFor = async (v) => {
if (!v || isCached(v)) return;
if (skipPrefetch) return;
const row = progressRowFor(v);
row.setStatus('prefetching', '');
const ac = new AbortController();
const unregister = registerAbort(() => ac.abort());
try {
const { size } = await state.source.opfsHandleForModel(
v.repo, v.filename,
(fr, downloaded, total) => row.setProgress(fr, downloaded, total),
ac.signal,
);
state.cacheStatus[cacheKey(v)] = { cachedBytes: size };
state.sessionDownloads.add(cacheKey(v));
refreshCacheBadge(v);
row.setStatus('cached', formatSize(size / (1024 * 1024)));
} catch (err) {
if (ac.signal.aborted) {
row.setStatus('aborted', '');
return;
}
row.setStatus('error', `prefetch: ${err.message}`);
logLine(`Prefetch failed: ${v.filename}: ${err.message}`);
} finally {
unregister();
}
};
// Seed the first prefetch before the loop so variant 0 starts downloading
// while we set up. The loop awaits each prefetch completion before running.
let prefetchPromise = prefetchFor(variants[0]);
for (let i = 0; i < variants.length; i++) {
if (state.aborted) break;
const v = variants[i];
const row = progressRowFor(v);
// Wait for variant i to be cached (either via prefetch or pre-existing).
await prefetchPromise;
if (state.aborted) break;
// When skipPrefetch is on (mobile), variants arrive uncached and
// runBenchmarkInWorker β†’ opfsHandleForModel handles the inline
// download. Skip the cache-check error path in that case.
if (!skipPrefetch && !isCached(v)) {
row.setStatus('error', 'not cached after prefetch');
prefetchPromise = prefetchFor(variants[i + 1]);
continue;
}
// Kick off prefetch of i+1 in parallel with the run of i.
prefetchPromise = prefetchFor(variants[i + 1]);
// Persist run intent so a tab crash leaves a breadcrumb.
writeRunIntent(v);
row.setStatus('running', '');
// Depth schedule for this variant. Study mode pairs d=0 with the
// configured d=N so the dashboard can compare cold-cache against
// depth-loaded numbers; non-study runs do a single pass at the user's
// configured depth (default 2048). Mirrors the runner.js depth loop.
const baseDepth = Math.max(0, state.nDepth ?? DEFAULT_N_DEPTH);
const depthsToRun = (studyMode && baseDepth > 0) ? [0, baseDepth] : [baseDepth];
let sharedCpu = null;
for (let di = 0; di < depthsToRun.length; di++) {
if (state.aborted) break;
// Inter-depth cooldown β€” mirrors the inter-variant sleep below. In
// study mode each variant spawns a fresh worker for d=0 and another
// for d=N back-to-back; without a gap, the second worker requests a
// GPUDevice and a larger KV cache while iOS Metal is still draining
// the just-terminated first worker. On long study queues this is
// the seam where cumulative pressure tips the tab into Jetsam,
// typically on the last (largest) variant.
if (di > 0) {
const cooldownMs = isMobileDevice() ? MOBILE_YIELD_BETWEEN_RUNS_MS : YIELD_BETWEEN_RUNS_MS;
row.setStatus('cooldown', `${(cooldownMs / 1000).toFixed(1)}s before d=${depthsToRun[di]}`);
await sleep(cooldownMs);
if (state.aborted) break;
}
const nDepth = depthsToRun[di];
const start = performance.now();
const variantResult = await runVariantWithIterations(v, row, {
nDepth,
cpuResult: sharedCpu,
});
const wallTimeMs = performance.now() - start;
const record = makeRecord(v, variantResult, machine, browser, wallTimeMs);
state.results.push(record);
row.fillFromRecord(record);
// Cache the CPU pass from the first depth so subsequent depth runs
// skip it (CPU baseline is depth-independent).
if (!sharedCpu && variantResult.cpu?.status === 'done') {
sharedCpu = variantResult.cpu;
}
try {
// sessionStorage so results survive in-tab navigations (the OAuth
// sign-in redirect in particular) but reset when the user actually
// closes the tab β€” they don't want stale results on a fresh visit.
sessionStorage.setItem(RESULTS_STORAGE_KEY, JSON.stringify(state.results));
} catch { /* quota */ }
// Mobile: drop per-rep raw arrays from the in-memory record after
// sessionStorage has the full copy. The dashboard only reads the
// aggregates (avg_ts, stddev_ts) and on iOS Safari every byte that
// isn't reclaimed between variants edges the tab toward Jetsam.
// Trade-off: an HF submission in the same session loses per-rep
// samples; a fresh page-load rehydrates from sessionStorage and
// recovers them.
if (isMobileDevice()) {
if (record.metrics) {
delete record.metrics.prefill_samples;
delete record.metrics.decode_samples;
for (const t of record.metrics.tests || []) {
delete t.samples_ts;
delete t.samples_ns;
}
}
if (record.consistency) delete record.consistency.token_ids;
record.output = '';
}
if (state.surface === 'localhost' && $('save-local')?.checked) {
fetch('/api/results', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(record),
}).catch(err => logLine(`POST /api/results failed: ${err.message}`));
}
}
clearRunIntent();
// Evict if enabled and this variant was downloaded this session. Files
// the user had cached before the run are always preserved.
if (evictAfter && state.sessionDownloads.has(cacheKey(v))) {
try {
const res = await state.source.evictModel(v.repo, v.filename);
if (res.ok) {
logLine(`Evicted ${v.filename} (${formatSize(res.bytesFreed / (1024 * 1024))})`);
delete state.cacheStatus[cacheKey(v)];
state.sessionDownloads.delete(cacheKey(v));
refreshCacheBadge(v);
} else {
logLine(`Eviction skipped (${v.filename}): ${res.reason}`);
}
} catch (err) {
logLine(`Eviction error (${v.filename}): ${err.message}`);
}
}
await sleep(isMobileDevice() ? MOBILE_YIELD_BETWEEN_RUNS_MS : YIELD_BETWEEN_RUNS_MS);
}
// Queue ended or aborted: make sure we don't leave a prefetch running.
try { await prefetchPromise; } catch { /* already logged */ }
renderOutput();
state.running = false;
updateButtons();
renderHfSection();
}
// Spawn a dedicated worker, transfer the stream + params, relay events back
// into the provided callbacks, resolve with the worker's final record.
// The worker is terminated (and state.currentWorker cleared) when done.
function runInWorker({
params,
opfsPath,
onStatus,
onProgress,
onLog,
}) {
return new Promise((resolve) => {
let worker;
try {
worker = new Worker(new URL('./bench-worker.js', import.meta.url));
} catch (err) {
resolve({ status: 'error', error: `worker construct failed: ${err.message}` });
return;
}
state.currentWorker = worker;
let settled = false;
let unregister = () => {};
const finish = (record) => {
if (settled) return;
settled = true;
try { worker.terminate(); } catch { /* noop */ }
if (state.currentWorker === worker) state.currentWorker = null;
unregister();
resolve(record);
};
unregister = registerAbort(() => finish({ status: 'aborted', error: 'aborted by user' }));
worker.onmessage = (e) => {
const msg = e.data || {};
if (msg.type === 'status') onStatus?.(msg.status, msg.msg, msg.sinceMs);
else if (msg.type === 'progress') onProgress?.(msg.fraction, msg.downloaded, msg.total);
else if (msg.type === 'log') onLog?.(msg.line);
else if (msg.type === 'result') finish(msg.record);
};
worker.onerror = (err) => {
finish({
status: 'error',
error: describeError(err) || 'worker error (tab likely out of memory)',
});
};
worker.onmessageerror = () => {
finish({ status: 'error', error: 'worker message deserialization failed' });
};
// OPFS path is the only transport. We send the layout key only
// (rootDir + repo + filename); the worker re-resolves to a
// FileSystemFileHandle via navigator.storage.getDirectory() itself,
// since FileSystemFileHandle structured-clone is missing on iOS Safari.
try {
worker.postMessage({ type: 'run', params, opfsPath });
} catch (err) {
finish({ status: 'error', error: `postMessage(opfsPath) failed: ${err.message}` });
}
});
}
// Download to OPFS on the main thread, then hand the OPFS layout key to a
// freshly-spawned worker. The worker opens a FileSystemSyncAccessHandle
// and routes MEMFS reads through it (use_mmap=0), never copying the model
// into the WASM heap. Supports models larger than the WASM heap budget.
async function runBenchmarkInWorker(v, params, callbacks) {
const baseParams = {
buildType: 'Suspending' in WebAssembly ? 'jspi' : 'asyncify',
// Model load
nCtx: params.nCtx,
nGpuLayers: params.nGpuLayers,
// Consistency phase β€” empty consistencyPrompt skips it
consistencyPrompt: params.consistencyPrompt || '',
consistencyNPredict: params.consistencyNPredict || DEFAULT_N_PREDICT,
refTokenIds: params.refTokenIds || null,
// Perf phase β€” set both to 0 to skip
nPrompt: params.nPrompt ?? 0,
nGen: params.nGen ?? 0,
nReps: params.nReps ?? DEFAULT_ITERATIONS,
nDepth: params.nDepth ?? 0,
noWarmup: !!params.noWarmup,
};
const ac = new AbortController();
const unregister = registerAbort(() => ac.abort());
try {
callbacks.onStatus?.('downloading', 'Downloading model to OPFS...');
const r = await state.source.opfsHandleForModel(
v.repo, v.filename,
callbacks.onProgress,
ac.signal,
);
// When the prefetch is skipped (mobile path), the inline download
// above is the variant's first arrival in OPFS. Mark it as
// session-downloaded so the post-run eviction logic frees it before
// the next variant starts β€” keeping disk usage flat.
if (r.wasDownloaded) {
state.sessionDownloads.add(cacheKey(v));
state.cacheStatus[cacheKey(v)] = { cachedBytes: r.size };
refreshCacheBadge(v);
}
} catch (err) {
if (ac.signal.aborted) {
return { status: 'aborted', error: 'aborted by user' };
}
return { status: 'error', error: `opfsHandleForModel failed: ${err.message}` };
} finally {
unregister();
}
if (state.aborted) {
return { status: 'aborted', error: 'aborted by user' };
}
// Pass the OPFS layout key (rootDir + repo + filename), not a
// FileSystemFileHandle. iOS Safari can't structured-clone FileHandles,
// so the worker re-resolves it locally via navigator.storage.getDirectory().
return runInWorker({
params: baseParams,
opfsPath: { rootDir: OPFS_ROOT_NAME, repo: v.repo, filename: v.filename },
onStatus: callbacks.onStatus,
onProgress: callbacks.onProgress,
onLog: callbacks.onLog,
});
}
// Runs one variant: CPU consistency baseline (one model load, generates
// reference token IDs via bench_run), then GPU pass (one model load that
// does both consistency forced-decoding and the llama-bench-style perf
// sweep β€” pp + tg with warmup + nReps timed reps each).
// Returns an aggregate that makeRecord consumes.
//
// `opts.nDepth` overrides state.nDepth so the caller can sweep multiple
// depths per variant (study mode pairs d=0 with d=N).
// `opts.cpuResult` when provided short-circuits the CPU baseline phase β€”
// study mode runs CPU once on the d=0 pass and reuses it for d=N, since
// reference tokens and the 1-rep CPU comparator are depth-independent.
async function runVariantWithIterations(v, row, opts = {}) {
const nReps = Math.max(1, state.iterations || DEFAULT_ITERATIONS);
const nPrompt = Math.max(0, state.nPrompt ?? DEFAULT_N_PROMPT);
const nGen = Math.max(0, state.nGen ?? DEFAULT_N_GEN);
const nDepth = Math.max(0, opts.nDepth ?? state.nDepth ?? DEFAULT_N_DEPTH);
const reuseCpu = opts.cpuResult || null;
// Per-test n_ctx mirrors llama-bench (line 1211 of
// tools/llama-bench/llama-bench.cpp): sized to fit prompt+gen+depth so a
// raised depth doesn't silently overflow the cache.
const nCtxFor = (depth) => Math.max(DEFAULT_N_CTX, nPrompt + nGen + depth);
// Phase toggles from the run page. Both default OFF; combined effect:
// neither (default) β†’ only GPU perf, no CPU pass at all
// run CPU perf β†’ CPU perf baseline + GPU perf, no token-id check
// run consistency β†’ CPU consistency tokens + GPU consistency + GPU perf
// both β†’ full CPU baseline (consistency + 1-rep perf) +
// GPU consistency + GPU perf
const runConsistency = !!state.runConsistency;
const runCpuPerf = !!state.runCpuPerf;
const needCpuPass = runConsistency || runCpuPerf;
// ─── CPU baseline ───
// Skipped entirely if both toggles disable it OR caller provided a cached
// result from an earlier depth pass. Otherwise the pass mixes and matches:
// consistency_run captures token_ids; perf phase runs at nReps=1 (single
// warmup+timed rep β€” enough to populate the dashboard's CPU/GPU comparison
// without doubling CPU runtime).
let cpuResult;
if (reuseCpu) {
cpuResult = reuseCpu;
} else if (needCpuPass) {
const phaseLabel = runConsistency && runCpuPerf ? 'reference tokens + 1-rep perf'
: runConsistency ? 'reference tokens'
: '1-rep perf';
row.setStatus('cpu-baseline', phaseLabel);
try {
cpuResult = await runBenchmarkInWorker(v, {
consistencyPrompt: runConsistency ? CONSISTENCY_PROMPT : '',
consistencyNPredict: DEFAULT_N_PREDICT,
refTokenIds: null,
nPrompt: runCpuPerf ? nPrompt : 0,
nGen: runCpuPerf ? nGen : 0,
// CPU baseline keeps depth=0 β€” its job is reference-token capture
// and a single-rep perf comparator, not depth-loaded sweeping.
nDepth: 0,
nReps: 1,
nCtx: nCtxFor(0),
nGpuLayers: 0,
}, {
onStatus: (status, msg, sinceMs) => row.setStatus(`cpu/${status}`, msg, sinceMs),
onProgress: (fr, downloaded, total) => row.setProgress(fr, downloaded, total),
onLog: logLine,
});
} catch (err) {
cpuResult = { status: 'error', error: err.message || String(err) };
}
} else {
cpuResult = { status: 'skipped' };
}
// CPU pass is best-effort. Failures (OOM, slow device, missing op) don't
// block the GPU run β€” the user opted into resilience implicitly by the
// phase being best-effort, and explicitly via the skip checkboxes.
const cpuOk = cpuResult.status === 'done';
if (cpuResult.status === 'error') {
logLine(`CPU baseline failed (${cpuResult.error || 'unknown'}) β€” proceeding with GPU run.`);
row.setStatus('cpu-skipped', 'continuing with GPU only');
}
// refTokenIds is the GPU pass's input for forced-decode consistency. Only
// pass when we actually have tokens (consistency was requested AND CPU
// produced tokens).
const refTokenIds = (cpuOk && runConsistency && cpuResult.consistency?.token_ids?.length)
? cpuResult.consistency.token_ids.join(',')
: '';
if (state.aborted) {
return { status: 'error', error: 'aborted', cpu: cpuResult, gpu: null };
}
// ─── GPU pass: consistency (when not skipped) + perf in one model load ───
row.setStatus('gpu-run', 'loading model');
let gpuResult;
try {
gpuResult = await runBenchmarkInWorker(v, {
consistencyPrompt: runConsistency ? CONSISTENCY_PROMPT : '',
consistencyNPredict: DEFAULT_N_PREDICT,
refTokenIds: refTokenIds || null,
nPrompt,
nGen,
nDepth,
nReps,
nCtx: nCtxFor(nDepth),
nGpuLayers: DEFAULT_N_GPU_LAYERS,
}, {
onStatus: (s, m, sinceMs) => row.setStatus(`gpu/${s}`, m, sinceMs),
onProgress: (fr, d, t) => row.setProgress(fr, d, t),
onLog: logLine,
});
} catch (err) {
gpuResult = { status: 'error', error: err.message || String(err) };
}
return {
status: gpuResult.status === 'done' ? 'done' : 'error',
error: gpuResult.status === 'done' ? null : (gpuResult.error || 'GPU run failed'),
cpu: cpuResult,
gpu: gpuResult,
};
}
function round2(n) { return Number.isFinite(n) ? parseFloat(n.toFixed(2)) : 0; }
function describeError(err) {
if (err == null) return '';
if (typeof err === 'string') return err;
if (typeof err === 'number' || typeof err === 'boolean') return String(err);
if (err instanceof Error) return err.message || String(err);
if (typeof err === 'object') {
const parts = [];
if (typeof err.name === 'string' && err.name) parts.push(err.name);
if (typeof err.type === 'string' && err.type) parts.push(`type=${err.type}`);
if (typeof err.message === 'string' && err.message) parts.push(err.message);
if (typeof err.reason === 'string' && err.reason) parts.push(`reason=${err.reason}`);
if (typeof err.filename === 'string' && err.filename) parts.push(`file=${err.filename}`);
if (typeof err.lineno === 'number' && err.lineno > 0) parts.push(`line=${err.lineno}`);
if (typeof err.colno === 'number' && err.colno > 0) parts.push(`col=${err.colno}`);
if (typeof err.error === 'string' && err.error) parts.push(`error=${err.error}`);
else if (err.error instanceof Error && err.error.message) parts.push(`error=${err.error.message}`);
if (parts.length > 0) return parts.join(' | ');
try {
const own = {};
for (const key of Object.getOwnPropertyNames(err)) {
own[key] = err[key];
}
const json = JSON.stringify(own);
if (json && json !== '{}') return json;
} catch {
// fall through
}
const tag = Object.prototype.toString.call(err);
if (tag && tag !== '[object Object]') return tag;
return 'unknown structured error';
}
return String(err);
}
// Pull pp/tg test results out of a metrics.tests array. Returns null if the
// requested test wasn't run (e.g. nPrompt=0 means no pp test).
function findTest(tests, prefix) {
if (!Array.isArray(tests)) return null;
return tests.find(t => typeof t.name === 'string' && t.name.startsWith(prefix)) || null;
}
function makeRecord(v, vr, machine, browser, wallTimeMs) {
const gpu = vr.gpu;
const tests = gpu?.metrics?.tests || null;
const pp = findTest(tests, 'pp');
const tg = findTest(tests, 'tg');
// Llama-bench shape lives under metrics.tests; flat prefill_tok_s /
// decode_tok_s are kept for backward compat with the existing dashboard
// table cells until those are migrated to read from tests directly.
const metrics = tests ? {
tests,
n_prompt: gpu.metrics.n_prompt,
n_gen: gpu.metrics.n_gen,
n_reps: gpu.metrics.n_reps,
iterations: gpu.metrics.n_reps,
prefill_tok_s: pp ? round2(pp.avg_ts) : 0,
decode_tok_s: tg ? round2(tg.avg_ts) : 0,
prefill_tok_s_stdev: pp ? round2(pp.stddev_ts) : 0,
decode_tok_s_stdev: tg ? round2(tg.stddev_ts) : 0,
prefill_samples: pp ? pp.samples_ts : [],
decode_samples: tg ? tg.samples_ts : [],
n_p_eval: pp ? pp.n_prompt : 0,
n_eval: tg ? tg.n_gen : 0,
t_p_eval_ms: pp ? round2(pp.avg_ns / 1e6) : 0,
t_eval_ms: tg ? round2(tg.avg_ns / 1e6) : 0,
} : null;
// CPU baseline now runs a 1-rep perf sweep alongside the consistency
// pass, so we have CPU-vs-GPU numbers to compare on the dashboard.
// n=1 means no stddev, so the dashboard cell renders just the avg.
const cpuTests = vr.cpu?.metrics?.tests;
const cpuPp = cpuTests?.find(t => t.name?.startsWith('pp')) || null;
const cpuTg = cpuTests?.find(t => t.name?.startsWith('tg')) || null;
const cpuBaseline = vr.cpu?.status === 'done' ? {
prefill_tok_s: cpuPp ? round2(cpuPp.avg_ts) : null,
decode_tok_s: cpuTg ? round2(cpuTg.avg_ts) : null,
} : null;
return {
status: vr.status,
error: describeError(vr.error) || null,
model: v.modelName,
variant: v.quant,
filename: v.filename,
repo: v.repo,
sizeMB: v.sizeMB,
browser,
nCtx: DEFAULT_N_CTX,
nPredict: DEFAULT_N_PREDICT,
nPrompt: gpu?.metrics?.n_prompt ?? 0,
nGen: gpu?.metrics?.n_gen ?? 0,
nDepth: gpu?.metrics?.n_depth ?? 0,
nReps: gpu?.metrics?.n_reps ?? 0,
nGpuLayers: DEFAULT_N_GPU_LAYERS,
timestamp: new Date().toISOString(),
wallTimeMs,
webgpuAvailable: gpu?.webgpuAvailable ?? !!navigator.gpu,
gpuAdapterInfo: gpu?.gpuAdapterInfo ?? null,
buildType: gpu?.buildType ?? null,
// llama.cpp version stamped from build-info.json. Lets us correlate
// result drift with llama.cpp upgrades over time.
llamaCppCommit: state.buildInfo?.llamaCppCommit ?? null,
llamaCppDescribe: state.buildInfo?.llamaCppDescribe ?? null,
dawnTag: state.buildInfo?.dawnTag ?? null,
metrics,
consistency: gpu?.consistency ?? null,
cpu_baseline: cpuBaseline,
output: gpu?.output || '',
machine,
// Memory snapshot llama.cpp captured immediately after bench_load β€”
// model_size, state_size, and per-device {free,total} from every ggml
// backend. Useful for spotting memory-pressured runs and for sanity-
// checking GPU memory headroom across machines.
memoryInfo: gpu?.memoryInfo ?? null,
// User-typed labels that override (or supplement) the auto-detected
// machine/browser fields. Auto-detection is unreliable across UA-string
// anonymization, deviceMemory rounding, and missing WebGPU adapter info.
userReported: { ...state.userReported },
source: `webgpu-bench/site (${state.surface})`,
};
}
function sleep(ms) { return new Promise(r => setTimeout(r, ms)); }
// ──────────────── crash-recovery trail ────────────────
//
// Mobile tabs often get reaped mid-run without warning β€” WebKit reloads the
// page and the user sees a silent reset. We stamp localStorage before each
// variant; if a stamp is present on page load and we can't match it against
// a successful result in lastRun, we assume a crash and surface a banner.
function writeRunIntent(v) {
try {
localStorage.setItem(RUN_INTENT_STORAGE_KEY, JSON.stringify({
model: v.modelName,
quant: v.quant,
filename: v.filename,
sizeMB: v.sizeMB,
when: Date.now(),
}));
} catch { /* quota / disabled */ }
}
function clearRunIntent() {
try { localStorage.removeItem(RUN_INTENT_STORAGE_KEY); } catch {}
}
function maybeShowCrashBanner() {
const banner = $('run-crash-banner');
const text = $('run-crash-banner-text');
const dismiss = $('run-crash-banner-dismiss');
if (!banner || !text || !dismiss) return;
let intent;
try {
const raw = localStorage.getItem(RUN_INTENT_STORAGE_KEY);
if (!raw) return;
intent = JSON.parse(raw);
} catch {
clearRunIntent();
return;
}
if (!intent || typeof intent.when !== 'number') {
clearRunIntent();
return;
}
if (Date.now() - intent.when < CRASH_STALE_MS) {
// Too fresh β€” another tab might still be running. Leave it alone.
return;
}
// Intent survived the page reload and is stale: the run almost certainly
// didn't finish cleanly (we clear the intent on success).
const size = intent.sizeMB ? formatSize(intent.sizeMB) : 'unknown size';
text.textContent =
`A previous run on "${intent.model} ${intent.quant}" (${size}) did not complete β€” the tab was likely reaped by the OS (low memory). Try a smaller quant.`;
banner.hidden = false;
dismiss.addEventListener('click', () => {
banner.hidden = true;
clearRunIntent();
}, { once: true });
}
// ──────────────── Output ────────────────
function renderOutput() {
const ta = $('output-textarea');
if (ta) ta.value = generateMarkdown(state.results);
// Reflect emptiness: collapse the textarea, disable copy/download.
const hasContent = !!ta?.value;
const outputCard = document.querySelector('.run-output');
if (outputCard) outputCard.classList.toggle('is-empty', !hasContent);
const copyBtn = $('btn-copy');
const dlJson = $('btn-download-json');
if (copyBtn) copyBtn.disabled = !hasContent;
if (dlJson) dlJson.disabled = !hasContent;
}
/* Hide the Progress scaffolding at mount so we don't show an empty
placeholder. `ensureProgressTable` un-hides it the moment a download or
run row appears. */
function hideProgressUntilFirstRow() {
const wrap = $('run-progress-wrapper');
if (!wrap) return;
const card = wrap.closest('.table-card');
if (card) card.hidden = true;
const header = card?.previousElementSibling;
if (header?.classList?.contains('section-header')) header.hidden = true;
}
function generateMarkdown(results) {
if (results.length === 0) return '';
const m = results[0].machine || {};
const header = [
`# WebGPU Benchmark Results`,
``,
`- Machine: \`${m.cpus || 'unknown'}\` Β· ${m.totalMemoryGB || 0} GB Β· ${m.platform || 'unknown'} (${m.arch || '?'})`,
`- Browser: \`${results[0].browser}\``,
`- Build: \`${results[0].buildType || '?'}\``,
`- WebGPU: ${results[0].webgpuAvailable ? 'yes' : 'no'}`,
`- Timestamp: ${new Date().toISOString()}`,
`- Variants run: ${results.length}`,
'',
].join('\n');
const passed = results.filter(r => r.status === 'done');
const failed = results.filter(r => r.status !== 'done');
let body = '';
if (passed.length) {
body += `## Passed (${passed.length})\n\n`;
// llama-bench-style markdown: separate pp / tg columns with avg \u00b1 stddev.
body += `| Model | Variant | Size | pp tok/s | tg tok/s | Wall s |\n`;
body += `|---|---|---:|---:|---:|---:|\n`;
const fmtTest = (tests, prefix) => {
const t = tests?.find(x => x.name?.startsWith(prefix));
return t ? `${t.avg_ts.toFixed(2)} \u00b1 ${t.stddev_ts.toFixed(2)} (${t.name})` : '\u2014';
};
for (const r of passed) {
body += `| ${r.model} | ${r.variant} | ${formatSize(r.sizeMB)} | ${
fmtTest(r.metrics?.tests, 'pp')} | ${fmtTest(r.metrics?.tests, 'tg')} | ${
(r.wallTimeMs / 1000).toFixed(1)} |\n`;
}
body += `\n`;
}
if (failed.length) {
body += `## Failed (${failed.length})\n\n`;
for (const r of failed) {
body += `- **${r.model}** ${r.variant}: \`${describeError(r.error) || 'unknown error'}\`\n`;
}
body += `\n`;
}
const json = JSON.stringify(results, null, 2);
body += `<details>\n<summary>Raw JSON (click to expand)</summary>\n\n\`\`\`json\n${json}\n\`\`\`\n</details>\n`;
return header + body;
}
function wireOutputHandlers() {
$('btn-copy')?.addEventListener('click', async () => {
const text = $('output-textarea').value;
try {
await navigator.clipboard.writeText(text);
flashButton($('btn-copy'), 'Copied!');
} catch {
$('output-textarea').select();
try { document.execCommand('copy'); flashButton($('btn-copy'), 'Copied!'); } catch {}
}
});
$('btn-download-json')?.addEventListener('click', () => {
if (state.results.length === 0) return;
const blob = new Blob([JSON.stringify(state.results, null, 2)], { type: 'application/json' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
const stamp = new Date().toISOString().replace(/[:T.]/g, '-').slice(0, 19);
a.download = `webgpu-bench-${stamp}.json`;
a.click();
setTimeout(() => URL.revokeObjectURL(url), 1000);
});
}
function flashButton(el, msg) {
const original = el.textContent;
el.textContent = msg;
setTimeout(() => { el.textContent = original; }, 1200);
}
// ──────────────── Abort / Purge / Hub ────────────────
function wireAbortHandler() {
$('btn-abort')?.addEventListener('click', () => {
state.aborted = true;
const ab = $('btn-abort');
if (ab) ab.disabled = true;
// Iterate every registered op (worker terminate, fetch AbortController):
// worker.terminate() alone leaves the Promise pending forever, and
// fetch without a signal can hang on slow connections. Each fn is
// expected to also resolve / reject its own awaiting promise.
const n = state.abortHandlers.size;
for (const fn of state.abortHandlers) {
try { fn(); } catch { /* keep iterating */ }
}
state.abortHandlers.clear();
logLine(n > 0
? `Abort requested β€” cancelled ${n} in-flight op${n === 1 ? '' : 's'}.`
: 'Abort requested β€” will stop between variants.');
});
}
function wirePurgeHandler() {
const btn = $('btn-purge');
if (!btn) return;
btn.addEventListener('click', async () => {
if (!confirm('Delete all cached GGUF files from OPFS? This frees browser storage but re-downloads will be needed.')) return;
try {
await purgeOpfs();
state.cacheStatus = {};
document.querySelectorAll('.run-variant-row').forEach(row => {
const v = state.variants.find(x => cacheKey(x) === row.dataset.key);
if (v) refreshCacheBadge(v);
});
updateButtons();
logLine('OPFS cache purged.');
} catch (err) {
logLine(`Purge failed: ${err.message}`);
}
});
}
function wireHubHandlers() {
const signinBtn = $('btn-signin');
const submitBtn = $('btn-submit');
if (signinBtn) {
signinBtn.addEventListener('click', async () => {
// Sign in / Sign out is disabled while a run is in flight; this guard
// catches a stale-event-during-state-change race and keeps results safe.
if (state.running) return;
try {
if (state.hfSession) {
signOutHF();
state.hfSession = null;
renderHfSection();
return;
}
await beginHFSignIn();
// beginHFSignIn redirects β€” unreachable after.
} catch (err) {
logLine(`Sign-in failed: ${err.message}`);
}
});
}
if (submitBtn) {
submitBtn.addEventListener('click', async () => {
if (!state.hfSession) return;
const eligible = submittableResults();
if (eligible.length === 0) return;
// Required user-reported fields gate the submission so the leaderboard
// doesn't accumulate anonymous rows. The Run buttons stay enabled
// even when these are blank β€” we only block at submit time.
const missing = USER_REPORTED_REQUIRED.filter(k => !state.userReported[k]);
if (missing.length > 0) {
const card = $('user-reported-card');
if (card) { card.open = true; card.scrollIntoView({ behavior: 'smooth', block: 'center' }); }
refreshUserReportedValidation();
logLine(`Submit blocked: fill in ${missing.join(', ')} in "Your machine".`);
return;
}
submitBtn.disabled = true;
const original = submitBtn.textContent;
submitBtn.textContent = 'Submitting…';
try {
const first = eligible[0];
const res = await submitResultsToDataset(eligible, {
token: state.hfSession.accessToken,
machineSlug: first.machine?.slug || 'unknown',
browser: first.browser || 'unknown-browser',
submittedBy: state.hfSession.userName ? {
name: state.hfSession.userName,
hubId: state.hfSession.hubId || null,
avatarUrl: state.hfSession.avatarUrl || null,
} : null,
});
const link = res.pullRequestUrl
|| `https://huggingface.co/datasets/${HF_DATASET_REPO}/discussions`;
logLine(`Opened PR with ${eligible.length} variant(s): ${link}`);
// Restore the real label before flashing so the post-flash revert
// doesn't snap back to "Submitting…".
submitBtn.textContent = original;
flashButton(submitBtn, 'Submitted!');
} catch (err) {
logLine(`Submit failed: ${err.message}`);
submitBtn.textContent = original;
} finally {
submitBtn.disabled = submittableResults().length === 0;
}
});
}
}
function wireRunHandlers() {
$('btn-download')?.addEventListener('click', onDownloadClick);
$('btn-run')?.addEventListener('click', onRunClick);
$('btn-run-study')?.addEventListener('click', onRunStudyClick);
}
// ──────────────── Public API ────────────────
export async function mountRunSection() {
if (state.mounted) return;
state.mounted = true;
state.surface = await detectSurface();
state.source = ggufSource();
state.budget = await getDeviceBudgetMB();
state.device = await describeDevice();
// Don't block mount on the build-info fetch β€” it's non-critical and the
// first record will pick it up on the next render once it resolves.
loadBuildInfo().then(info => {
state.buildInfo = info;
renderHeader();
}).catch(() => { /* keep buildInfo null */ });
try {
state.models = await loadModels();
} catch (err) {
const panel = $('run-models');
if (panel) panel.innerHTML = `<div class="empty-state">Could not load models.json β€” ${escapeText(err.message)}</div>`;
console.error(err);
return;
}
state.cacheStatus = await loadCacheStatus();
state.variants = flattenVariants(state.models);
if (state.surface === 'space') {
try { state.hfSession = await resumeHFSession(); } catch { /* ignore */ }
}
// Evict-after-run default depends on surface: hosted OPFS quota is tight
// and worth clawing back between runs; localhost's cache/models/ is
// commonly shared with CLI workflows, so leaving it populated is helpful.
const evictCheckbox = $('evict-after-run');
if (evictCheckbox) {
evictCheckbox.checked = state.surface === 'space';
}
renderHeader();
renderModels();
wireSelectionHandlers();
wireFilters();
wireFamilySearch();
wireBatchSelect();
wirePerfInputs();
wireRunHandlers();
wireAbortHandler();
wirePurgeHandler();
wireHubHandlers();
wireOutputHandlers();
wireUserReported();
// Restore the last completed run from localStorage so it survives a page
// reload β€” including the OAuth redirect taking the user to HF and back.
// Must run before updateButtons/renderOutput/hideProgress so they pick up
// the rehydrated state.results.
restoreSavedResults();
updateButtons();
renderOutput();
if (state.results.length === 0) hideProgressUntilFirstRow();
maybeShowCrashBanner();
}
const RESULTS_STORAGE_KEY = 'webgpu-bench:lastRun';
function restoreSavedResults() {
// Clean up the pre-migration localStorage entry β€” earlier builds wrote
// results there, which made them persist across full tab closes. The
// canonical location is now sessionStorage.
try { localStorage.removeItem(RESULTS_STORAGE_KEY); } catch { /* noop */ }
// Only restore when we just round-tripped through HF for sign-in
// (beginHFSignIn() sets HF_OAUTH_PENDING_KEY immediately before the
// redirect). A plain refresh has no such marker and should land on a
// clean progress table β€” old runs sticking around was the bug.
let oauthPending = false;
try { oauthPending = !!sessionStorage.getItem(HF_OAUTH_PENDING_KEY); } catch { /* noop */ }
if (!oauthPending) {
try { sessionStorage.removeItem(RESULTS_STORAGE_KEY); } catch { /* noop */ }
return;
}
// Consume the marker now so the next plain refresh doesn't restore again.
try { sessionStorage.removeItem(HF_OAUTH_PENDING_KEY); } catch { /* noop */ }
let saved;
try {
const raw = sessionStorage.getItem(RESULTS_STORAGE_KEY);
if (!raw) return;
saved = JSON.parse(raw);
} catch { return; }
if (!Array.isArray(saved) || saved.length === 0) return;
state.results = saved;
// Detect study mode from the saved records: if any (model, variant) cell
// has both nDepth=0 and nDepth>0 entries, the OAuth-round-tripped run
// was a Run Study and should restore into the depth-split layout.
const depthsByCell = new Map();
for (const r of saved) {
const k = `${r.model}::${r.variant}`;
if (!depthsByCell.has(k)) depthsByCell.set(k, new Set());
depthsByCell.get(k).add(r.nDepth ?? 0);
}
state.studyMode = [...depthsByCell.values()].some(s => s.has(0) && [...s].some(d => d > 0));
for (const record of saved) {
const v = state.variants.find(x => x.repo === record.repo && x.filename === record.filename);
if (!v) continue;
progressRowFor(v).fillFromRecord(record);
}
}
export function teardownRunSection() {
// Placeholder β€” no explicit teardown today. Future: abort in-flight runs,
// detach listeners. For now the Run tab just sits idle.
state.aborted = true;
}