// Thin adapter for runner.js (Playwright). Reads URL params, downloads the // model into OPFS, hands it to bench-worker.js, and forwards the worker's // progress/result onto window.__BENCH so the runner can poll. Inference // orchestration lives in site/js/run/bench-worker.js — same worker the // interactive Run page uses. import { ggufSource, OPFS_ROOT_NAME } from './js/run/source.js'; import { CONSISTENCY_PROMPT } from './js/run/config.js'; // Global error handlers — catch Emscripten abort() which may not throw. window.addEventListener('error', (e) => { if (window.__BENCH && window.__BENCH.status !== 'done') { window.__BENCH.error = window.__BENCH.error || e.message || 'Uncaught error'; window.__BENCH.status = 'error'; } }); window.addEventListener('unhandledrejection', (e) => { if (window.__BENCH && window.__BENCH.status !== 'done') { window.__BENCH.error = window.__BENCH.error || String(e.reason) || 'Unhandled rejection'; window.__BENCH.status = 'error'; } }); (async function () { const params = new URLSearchParams(window.location.search); const modelFile = params.get('model') || ''; const hfRepo = params.get('hfRepo') || 'unsloth/Llama-3.2-1B-Instruct-GGUF'; const consistencyPrompt = CONSISTENCY_PROMPT; const consistencyNPredict = parseInt(params.get('nPredict') || '128', 10); const nPrompt = parseInt(params.get('nPrompt') || '512', 10); const nGen = parseInt(params.get('nGen') || '128', 10); const nReps = parseInt(params.get('nReps') || '5', 10); const nDepth = parseInt(params.get('nDepth') || '0', 10); const nCtx = parseInt(params.get('nCtx') || '2048', 10); const nGpuLayers = parseInt(params.get('nGpuLayers') || '999', 10); const noWarmup = params.get('noWarmup') === '1'; const refTokenIds = params.get('refTokenIds') || null; // mode=perf → skip consistency entirely (e.g. for the GPU perf-only pass). // mode=consistency → skip perf (e.g. CPU baseline pass that just needs token_ids). // default 'both' runs both phases in one model load. const mode = params.get('mode') || 'both'; const runConsistency = mode !== 'perf'; const runPerf = mode !== 'consistency'; const hasJspi = 'Suspending' in WebAssembly; const buildType = hasJspi ? 'jspi' : 'asyncify'; window.__BENCH = { status: 'init', error: null, modelFile, buildType, webgpuAvailable: !!navigator.gpu, gpuAdapterInfo: null, downloadProgress: 0, metrics: null, output: '', }; const statusEl = document.getElementById('status'); const progressEl = document.getElementById('progress'); const logEl = document.getElementById('log'); function onStatus(status, msg) { window.__BENCH.status = status; if (statusEl) { statusEl.textContent = msg || status; statusEl.className = status === 'error' ? 'err' : status === 'done' ? 'ok' : ''; } } function onLog(msg) { const line = `[${new Date().toISOString().slice(11, 23)}] ${msg}`; console.log(line); if (logEl) logEl.textContent += line + '\n'; } function onProgress(fraction, downloaded, total) { window.__BENCH.downloadProgress = fraction; if (progressEl && total > 0) { const pct = (fraction * 100).toFixed(1); progressEl.textContent = `Downloaded: ${(downloaded / (1024 * 1024)).toFixed(1)} MB / ` + `${(total / (1024 * 1024)).toFixed(1)} MB (${pct}%)`; } } // Stage 1: download into OPFS on the main thread (sync access handles // are worker-only, but the downloading half runs fine here). let size; try { onStatus('downloading', `Downloading ${modelFile}...`); onLog(`Fetching ${hfRepo}/${modelFile} into OPFS`); const r = await ggufSource().opfsHandleForModel(hfRepo, modelFile, onProgress); size = r.size; } catch (err) { window.__BENCH.error = `opfsHandleForModel failed: ${err.message}`; window.__BENCH.status = 'error'; onStatus('error', window.__BENCH.error); onLog(`ERROR: ${window.__BENCH.error}`); return; } // Stage 2: hand the OPFS layout key to the worker. The worker re-resolves // the FileHandle locally (FileHandles don't structured-clone reliably on // iOS Safari) and opens a sync access handle inside its own thread. const result = await new Promise((resolve) => { let worker; try { worker = new Worker(new URL('./js/run/bench-worker.js', import.meta.url)); } catch (err) { resolve({ status: 'error', error: `worker construct failed: ${err.message}` }); return; } let settled = false; const finish = (record) => { if (settled) return; settled = true; try { worker.terminate(); } catch { /* noop */ } resolve(record); }; worker.onmessage = (e) => { const msg = e.data || {}; if (msg.type === 'status') onStatus(msg.status, msg.msg); else if (msg.type === 'progress') onProgress(msg.fraction, msg.downloaded, msg.total); else if (msg.type === 'log') onLog(msg.line); else if (msg.type === 'result') finish(msg.record); }; worker.onerror = (err) => { finish({ status: 'error', error: err?.message || 'worker error' }); }; worker.onmessageerror = () => { finish({ status: 'error', error: 'worker message deserialization failed' }); }; worker.postMessage({ type: 'run', params: { buildType, nCtx, nGpuLayers, consistencyPrompt: runConsistency ? consistencyPrompt : '', consistencyNPredict, refTokenIds, nPrompt: runPerf ? nPrompt : 0, nGen: runPerf ? nGen : 0, nReps, nDepth: runPerf ? nDepth : 0, noWarmup, }, opfsPath: { rootDir: OPFS_ROOT_NAME, repo: hfRepo, filename: modelFile }, }); }); // Merge worker result into window.__BENCH. downloadProgress was set // during stage 1 and is preserved. Object.assign(window.__BENCH, result); window.__BENCH._opfsSize = size; })();