Spaces:
Running
Running
| // Thin adapter for runner.js (Playwright). Reads URL params, downloads the | |
| // model into OPFS, hands it to bench-worker.js, and forwards the worker's | |
| // progress/result onto window.__BENCH so the runner can poll. Inference | |
| // orchestration lives in site/js/run/bench-worker.js — same worker the | |
| // interactive Run page uses. | |
| import { ggufSource, OPFS_ROOT_NAME } from './js/run/source.js'; | |
| import { CONSISTENCY_PROMPT } from './js/run/config.js'; | |
| // Global error handlers — catch Emscripten abort() which may not throw. | |
| window.addEventListener('error', (e) => { | |
| if (window.__BENCH && window.__BENCH.status !== 'done') { | |
| window.__BENCH.error = window.__BENCH.error || e.message || 'Uncaught error'; | |
| window.__BENCH.status = 'error'; | |
| } | |
| }); | |
| window.addEventListener('unhandledrejection', (e) => { | |
| if (window.__BENCH && window.__BENCH.status !== 'done') { | |
| window.__BENCH.error = window.__BENCH.error || String(e.reason) || 'Unhandled rejection'; | |
| window.__BENCH.status = 'error'; | |
| } | |
| }); | |
| (async function () { | |
| const params = new URLSearchParams(window.location.search); | |
| const modelFile = params.get('model') || ''; | |
| const hfRepo = params.get('hfRepo') || 'unsloth/Llama-3.2-1B-Instruct-GGUF'; | |
| const consistencyPrompt = CONSISTENCY_PROMPT; | |
| const consistencyNPredict = parseInt(params.get('nPredict') || '128', 10); | |
| const nPrompt = parseInt(params.get('nPrompt') || '512', 10); | |
| const nGen = parseInt(params.get('nGen') || '128', 10); | |
| const nReps = parseInt(params.get('nReps') || '5', 10); | |
| const nDepth = parseInt(params.get('nDepth') || '0', 10); | |
| const nCtx = parseInt(params.get('nCtx') || '2048', 10); | |
| const nGpuLayers = parseInt(params.get('nGpuLayers') || '999', 10); | |
| const noWarmup = params.get('noWarmup') === '1'; | |
| const refTokenIds = params.get('refTokenIds') || null; | |
| // mode=perf → skip consistency entirely (e.g. for the GPU perf-only pass). | |
| // mode=consistency → skip perf (e.g. CPU baseline pass that just needs token_ids). | |
| // default 'both' runs both phases in one model load. | |
| const mode = params.get('mode') || 'both'; | |
| const runConsistency = mode !== 'perf'; | |
| const runPerf = mode !== 'consistency'; | |
| const hasJspi = 'Suspending' in WebAssembly; | |
| const buildType = hasJspi ? 'jspi' : 'asyncify'; | |
| window.__BENCH = { | |
| status: 'init', | |
| error: null, | |
| modelFile, | |
| buildType, | |
| webgpuAvailable: !!navigator.gpu, | |
| gpuAdapterInfo: null, | |
| downloadProgress: 0, | |
| metrics: null, | |
| output: '', | |
| }; | |
| const statusEl = document.getElementById('status'); | |
| const progressEl = document.getElementById('progress'); | |
| const logEl = document.getElementById('log'); | |
| function onStatus(status, msg) { | |
| window.__BENCH.status = status; | |
| if (statusEl) { | |
| statusEl.textContent = msg || status; | |
| statusEl.className = status === 'error' ? 'err' : status === 'done' ? 'ok' : ''; | |
| } | |
| } | |
| function onLog(msg) { | |
| const line = `[${new Date().toISOString().slice(11, 23)}] ${msg}`; | |
| console.log(line); | |
| if (logEl) logEl.textContent += line + '\n'; | |
| } | |
| function onProgress(fraction, downloaded, total) { | |
| window.__BENCH.downloadProgress = fraction; | |
| if (progressEl && total > 0) { | |
| const pct = (fraction * 100).toFixed(1); | |
| progressEl.textContent = | |
| `Downloaded: ${(downloaded / (1024 * 1024)).toFixed(1)} MB / ` + | |
| `${(total / (1024 * 1024)).toFixed(1)} MB (${pct}%)`; | |
| } | |
| } | |
| // Stage 1: download into OPFS on the main thread (sync access handles | |
| // are worker-only, but the downloading half runs fine here). | |
| let size; | |
| try { | |
| onStatus('downloading', `Downloading ${modelFile}...`); | |
| onLog(`Fetching ${hfRepo}/${modelFile} into OPFS`); | |
| const r = await ggufSource().opfsHandleForModel(hfRepo, modelFile, onProgress); | |
| size = r.size; | |
| } catch (err) { | |
| window.__BENCH.error = `opfsHandleForModel failed: ${err.message}`; | |
| window.__BENCH.status = 'error'; | |
| onStatus('error', window.__BENCH.error); | |
| onLog(`ERROR: ${window.__BENCH.error}`); | |
| return; | |
| } | |
| // Stage 2: hand the OPFS layout key to the worker. The worker re-resolves | |
| // the FileHandle locally (FileHandles don't structured-clone reliably on | |
| // iOS Safari) and opens a sync access handle inside its own thread. | |
| const result = await new Promise((resolve) => { | |
| let worker; | |
| try { | |
| worker = new Worker(new URL('./js/run/bench-worker.js', import.meta.url)); | |
| } catch (err) { | |
| resolve({ status: 'error', error: `worker construct failed: ${err.message}` }); | |
| return; | |
| } | |
| let settled = false; | |
| const finish = (record) => { | |
| if (settled) return; | |
| settled = true; | |
| try { worker.terminate(); } catch { /* noop */ } | |
| resolve(record); | |
| }; | |
| worker.onmessage = (e) => { | |
| const msg = e.data || {}; | |
| if (msg.type === 'status') onStatus(msg.status, msg.msg); | |
| else if (msg.type === 'progress') onProgress(msg.fraction, msg.downloaded, msg.total); | |
| else if (msg.type === 'log') onLog(msg.line); | |
| else if (msg.type === 'result') finish(msg.record); | |
| }; | |
| worker.onerror = (err) => { | |
| finish({ status: 'error', error: err?.message || 'worker error' }); | |
| }; | |
| worker.onmessageerror = () => { | |
| finish({ status: 'error', error: 'worker message deserialization failed' }); | |
| }; | |
| worker.postMessage({ | |
| type: 'run', | |
| params: { | |
| buildType, | |
| nCtx, | |
| nGpuLayers, | |
| consistencyPrompt: runConsistency ? consistencyPrompt : '', | |
| consistencyNPredict, | |
| refTokenIds, | |
| nPrompt: runPerf ? nPrompt : 0, | |
| nGen: runPerf ? nGen : 0, | |
| nReps, | |
| nDepth: runPerf ? nDepth : 0, | |
| noWarmup, | |
| }, | |
| opfsPath: { rootDir: OPFS_ROOT_NAME, repo: hfRepo, filename: modelFile }, | |
| }); | |
| }); | |
| // Merge worker result into window.__BENCH. downloadProgress was set | |
| // during stage 1 and is preserved. | |
| Object.assign(window.__BENCH, result); | |
| window.__BENCH._opfsSize = size; | |
| })(); | |