Spaces:
Running
Running
File size: 6,189 Bytes
f221926 149fe2b f221926 149fe2b e72601b f221926 e72601b ee944ff e72601b 6df9ed0 e72601b 149fe2b f221926 149fe2b f221926 149fe2b f221926 ee944ff 6df9ed0 f221926 149fe2b f221926 149fe2b f221926 149fe2b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 | // Thin adapter for runner.js (Playwright). Reads URL params, downloads the
// model into OPFS, hands it to bench-worker.js, and forwards the worker's
// progress/result onto window.__BENCH so the runner can poll. Inference
// orchestration lives in site/js/run/bench-worker.js — same worker the
// interactive Run page uses.
import { ggufSource, OPFS_ROOT_NAME } from './js/run/source.js';
import { CONSISTENCY_PROMPT } from './js/run/config.js';
// Global error handlers — catch Emscripten abort() which may not throw.
window.addEventListener('error', (e) => {
if (window.__BENCH && window.__BENCH.status !== 'done') {
window.__BENCH.error = window.__BENCH.error || e.message || 'Uncaught error';
window.__BENCH.status = 'error';
}
});
window.addEventListener('unhandledrejection', (e) => {
if (window.__BENCH && window.__BENCH.status !== 'done') {
window.__BENCH.error = window.__BENCH.error || String(e.reason) || 'Unhandled rejection';
window.__BENCH.status = 'error';
}
});
(async function () {
const params = new URLSearchParams(window.location.search);
const modelFile = params.get('model') || '';
const hfRepo = params.get('hfRepo') || 'unsloth/Llama-3.2-1B-Instruct-GGUF';
const consistencyPrompt = CONSISTENCY_PROMPT;
const consistencyNPredict = parseInt(params.get('nPredict') || '128', 10);
const nPrompt = parseInt(params.get('nPrompt') || '512', 10);
const nGen = parseInt(params.get('nGen') || '128', 10);
const nReps = parseInt(params.get('nReps') || '5', 10);
const nDepth = parseInt(params.get('nDepth') || '0', 10);
const nCtx = parseInt(params.get('nCtx') || '2048', 10);
const nGpuLayers = parseInt(params.get('nGpuLayers') || '999', 10);
const noWarmup = params.get('noWarmup') === '1';
const refTokenIds = params.get('refTokenIds') || null;
// mode=perf → skip consistency entirely (e.g. for the GPU perf-only pass).
// mode=consistency → skip perf (e.g. CPU baseline pass that just needs token_ids).
// default 'both' runs both phases in one model load.
const mode = params.get('mode') || 'both';
const runConsistency = mode !== 'perf';
const runPerf = mode !== 'consistency';
const hasJspi = 'Suspending' in WebAssembly;
const buildType = hasJspi ? 'jspi' : 'asyncify';
window.__BENCH = {
status: 'init',
error: null,
modelFile,
buildType,
webgpuAvailable: !!navigator.gpu,
gpuAdapterInfo: null,
downloadProgress: 0,
metrics: null,
output: '',
};
const statusEl = document.getElementById('status');
const progressEl = document.getElementById('progress');
const logEl = document.getElementById('log');
function onStatus(status, msg) {
window.__BENCH.status = status;
if (statusEl) {
statusEl.textContent = msg || status;
statusEl.className = status === 'error' ? 'err' : status === 'done' ? 'ok' : '';
}
}
function onLog(msg) {
const line = `[${new Date().toISOString().slice(11, 23)}] ${msg}`;
console.log(line);
if (logEl) logEl.textContent += line + '\n';
}
function onProgress(fraction, downloaded, total) {
window.__BENCH.downloadProgress = fraction;
if (progressEl && total > 0) {
const pct = (fraction * 100).toFixed(1);
progressEl.textContent =
`Downloaded: ${(downloaded / (1024 * 1024)).toFixed(1)} MB / ` +
`${(total / (1024 * 1024)).toFixed(1)} MB (${pct}%)`;
}
}
// Stage 1: download into OPFS on the main thread (sync access handles
// are worker-only, but the downloading half runs fine here).
let size;
try {
onStatus('downloading', `Downloading ${modelFile}...`);
onLog(`Fetching ${hfRepo}/${modelFile} into OPFS`);
const r = await ggufSource().opfsHandleForModel(hfRepo, modelFile, onProgress);
size = r.size;
} catch (err) {
window.__BENCH.error = `opfsHandleForModel failed: ${err.message}`;
window.__BENCH.status = 'error';
onStatus('error', window.__BENCH.error);
onLog(`ERROR: ${window.__BENCH.error}`);
return;
}
// Stage 2: hand the OPFS layout key to the worker. The worker re-resolves
// the FileHandle locally (FileHandles don't structured-clone reliably on
// iOS Safari) and opens a sync access handle inside its own thread.
const result = await new Promise((resolve) => {
let worker;
try {
worker = new Worker(new URL('./js/run/bench-worker.js', import.meta.url));
} catch (err) {
resolve({ status: 'error', error: `worker construct failed: ${err.message}` });
return;
}
let settled = false;
const finish = (record) => {
if (settled) return;
settled = true;
try { worker.terminate(); } catch { /* noop */ }
resolve(record);
};
worker.onmessage = (e) => {
const msg = e.data || {};
if (msg.type === 'status') onStatus(msg.status, msg.msg);
else if (msg.type === 'progress') onProgress(msg.fraction, msg.downloaded, msg.total);
else if (msg.type === 'log') onLog(msg.line);
else if (msg.type === 'result') finish(msg.record);
};
worker.onerror = (err) => {
finish({ status: 'error', error: err?.message || 'worker error' });
};
worker.onmessageerror = () => {
finish({ status: 'error', error: 'worker message deserialization failed' });
};
worker.postMessage({
type: 'run',
params: {
buildType,
nCtx,
nGpuLayers,
consistencyPrompt: runConsistency ? consistencyPrompt : '',
consistencyNPredict,
refTokenIds,
nPrompt: runPerf ? nPrompt : 0,
nGen: runPerf ? nGen : 0,
nReps,
nDepth: runPerf ? nDepth : 0,
noWarmup,
},
opfsPath: { rootDir: OPFS_ROOT_NAME, repo: hfRepo, filename: modelFile },
});
});
// Merge worker result into window.__BENCH. downloadProgress was set
// during stage 1 and is preserved.
Object.assign(window.__BENCH, result);
window.__BENCH._opfsSize = size;
})();
|