Spaces:
Running
Running
GitHub Actions commited on
Commit Β·
44a16ab
1
Parent(s): b861182
sync from abhijitramesh/webgpu-bench@43e1f069db
Browse files- css/style.css +0 -6
- js/run/bench-worker.js +40 -14
- js/run/controller.js +27 -16
- js/run/core.js +35 -13
css/style.css
CHANGED
|
@@ -1918,12 +1918,6 @@ a:hover { color: var(--info); }
|
|
| 1918 |
font-size: 12px;
|
| 1919 |
font-family: var(--font-mono);
|
| 1920 |
}
|
| 1921 |
-
.run-family-warning {
|
| 1922 |
-
margin-left: auto;
|
| 1923 |
-
color: var(--accent-amber);
|
| 1924 |
-
font-size: 11px;
|
| 1925 |
-
font-weight: 600;
|
| 1926 |
-
}
|
| 1927 |
|
| 1928 |
.run-variant-list {
|
| 1929 |
display: flex;
|
|
|
|
| 1918 |
font-size: 12px;
|
| 1919 |
font-family: var(--font-mono);
|
| 1920 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1921 |
|
| 1922 |
.run-variant-list {
|
| 1923 |
display: flex;
|
js/run/bench-worker.js
CHANGED
|
@@ -35,7 +35,22 @@
|
|
| 35 |
|
| 36 |
const post = (msg) => self.postMessage(msg);
|
| 37 |
const log = (line) => post({ type: 'log', line });
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
// βββ OPFS-backed model loading (wllama-style) βββ
|
| 41 |
// For >2GB GGUFs we can't put the whole file on the WASM heap (TypedArray
|
|
@@ -283,7 +298,7 @@ async function runOne({ params, stream, buffer, opfsPath }) {
|
|
| 283 |
const Module = await self.createBenchModule({
|
| 284 |
locateFile: (filename) => `/build/${buildType}/${filename}`,
|
| 285 |
print: (text) => log(`[wasm] ${text}`),
|
| 286 |
-
printErr: (text) => log(`
|
| 287 |
onAbort: (reason) => {
|
| 288 |
const msg = `WASM aborted: ${reason}`;
|
| 289 |
result.error = msg;
|
|
@@ -405,7 +420,7 @@ async function runOne({ params, stream, buffer, opfsPath }) {
|
|
| 405 |
// useful pp/tg numbers via synthetic-token paths.
|
| 406 |
if (consistencyPrompt) {
|
| 407 |
try {
|
| 408 |
-
status('consistency', 'Running consistency check...');
|
| 409 |
log(`bench_run("...", ${consistencyNPredict}) β consistency phase`);
|
| 410 |
const raw = await Module.ccall(
|
| 411 |
'bench_run', 'string',
|
|
@@ -427,11 +442,18 @@ async function runOne({ params, stream, buffer, opfsPath }) {
|
|
| 427 |
);
|
| 428 |
const ev = parseBenchResult('bench_eval_tokens', evalRaw);
|
| 429 |
result.consistency = { ...result.consistency, ...ev };
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 435 |
}
|
| 436 |
} catch (err) {
|
| 437 |
log(`Consistency phase failed: ${err.message} β continuing to perf phase`);
|
|
@@ -450,14 +472,14 @@ async function runOne({ params, stream, buffer, opfsPath }) {
|
|
| 450 |
if (wantPp) {
|
| 451 |
try {
|
| 452 |
if (!noWarmup) {
|
| 453 |
-
status('perf', `warmup pp${nPrompt}`);
|
| 454 |
log(`bench_pp(${nPrompt}) β warmup`);
|
| 455 |
const raw = await Module.ccall('bench_pp', 'string', ['number'], [nPrompt], { async: true });
|
| 456 |
parseBenchResult('bench_pp warmup', raw);
|
| 457 |
}
|
| 458 |
const samples_ns = [];
|
| 459 |
for (let i = 0; i < nReps; i++) {
|
| 460 |
-
status('perf', `pp${nPrompt} ${i + 1}/${nReps}`);
|
| 461 |
const t0 = performance.now();
|
| 462 |
const raw = await Module.ccall('bench_pp', 'string', ['number'], [nPrompt], { async: true });
|
| 463 |
const t_ns = (performance.now() - t0) * 1e6;
|
|
@@ -474,14 +496,18 @@ async function runOne({ params, stream, buffer, opfsPath }) {
|
|
| 474 |
if (wantTg) {
|
| 475 |
try {
|
| 476 |
if (!noWarmup) {
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 480 |
parseBenchResult('bench_tg warmup', raw);
|
| 481 |
}
|
| 482 |
const samples_ns = [];
|
| 483 |
for (let i = 0; i < nReps; i++) {
|
| 484 |
-
status('perf', `tg${nGen} ${i + 1}/${nReps}`);
|
| 485 |
const t0 = performance.now();
|
| 486 |
const raw = await Module.ccall('bench_tg', 'string', ['number'], [nGen], { async: true });
|
| 487 |
const t_ns = (performance.now() - t0) * 1e6;
|
|
|
|
| 35 |
|
| 36 |
const post = (msg) => self.postMessage(msg);
|
| 37 |
const log = (line) => post({ type: 'log', line });
|
| 38 |
+
// sinceMs: optional epoch ms. Forwarded to controller so the row ticks an
|
| 39 |
+
// elapsed counter while a long-running ccall (warmup, big-model rep) is in
|
| 40 |
+
// flight β JSPI doesn't yield often enough on CPU paths to drive ticks here.
|
| 41 |
+
const status = (s, msg, sinceMs) => post({ type: 'status', status: s, msg, sinceMs });
|
| 42 |
+
|
| 43 |
+
// Below this many compared tokens, the consistency agreement rate is
|
| 44 |
+
// statistical noise (e.g. early-EOS models that produce 1 token always
|
| 45 |
+
// report 100%). Mirror of CONSISTENCY_MIN_TOKENS in core.js.
|
| 46 |
+
const CONSISTENCY_MIN_TOKENS = 8;
|
| 47 |
+
|
| 48 |
+
// llama.cpp/ggml emit info, warnings, AND errors all to stderr. Tag only the
|
| 49 |
+
// actually-bad lines as :err so real failures stand out. Mirror in core.js.
|
| 50 |
+
function classifyWasmStderr(text) {
|
| 51 |
+
return /\b(error|abort(ed)?|failed|fatal|panic|assert)\b|GGML_ASSERT/i.test(text)
|
| 52 |
+
? '[wasm:err]' : '[wasm]';
|
| 53 |
+
}
|
| 54 |
|
| 55 |
// βββ OPFS-backed model loading (wllama-style) βββ
|
| 56 |
// For >2GB GGUFs we can't put the whole file on the WASM heap (TypedArray
|
|
|
|
| 298 |
const Module = await self.createBenchModule({
|
| 299 |
locateFile: (filename) => `/build/${buildType}/${filename}`,
|
| 300 |
print: (text) => log(`[wasm] ${text}`),
|
| 301 |
+
printErr: (text) => log(`${classifyWasmStderr(text)} ${text}`),
|
| 302 |
onAbort: (reason) => {
|
| 303 |
const msg = `WASM aborted: ${reason}`;
|
| 304 |
result.error = msg;
|
|
|
|
| 420 |
// useful pp/tg numbers via synthetic-token paths.
|
| 421 |
if (consistencyPrompt) {
|
| 422 |
try {
|
| 423 |
+
status('consistency', 'Running consistency check...', Date.now());
|
| 424 |
log(`bench_run("...", ${consistencyNPredict}) β consistency phase`);
|
| 425 |
const raw = await Module.ccall(
|
| 426 |
'bench_run', 'string',
|
|
|
|
| 442 |
);
|
| 443 |
const ev = parseBenchResult('bench_eval_tokens', evalRaw);
|
| 444 |
result.consistency = { ...result.consistency, ...ev };
|
| 445 |
+
if (ev.n_tokens < CONSISTENCY_MIN_TOKENS) {
|
| 446 |
+
log(
|
| 447 |
+
`Consistency: insufficient samples (${ev.n_tokens} token` +
|
| 448 |
+
`${ev.n_tokens === 1 ? '' : 's'} before EOS) β agreement rate not meaningful`
|
| 449 |
+
);
|
| 450 |
+
} else {
|
| 451 |
+
log(
|
| 452 |
+
`Consistency: ${(ev.agreement_rate * 100).toFixed(1)}% top-1 agreement (` +
|
| 453 |
+
`${ev.n_agree}/${ev.n_tokens})` +
|
| 454 |
+
(ev.first_disagreement >= 0 ? ` β first diverge @ ${ev.first_disagreement}` : '')
|
| 455 |
+
);
|
| 456 |
+
}
|
| 457 |
}
|
| 458 |
} catch (err) {
|
| 459 |
log(`Consistency phase failed: ${err.message} β continuing to perf phase`);
|
|
|
|
| 472 |
if (wantPp) {
|
| 473 |
try {
|
| 474 |
if (!noWarmup) {
|
| 475 |
+
status('perf', `warmup pp${nPrompt}`, Date.now());
|
| 476 |
log(`bench_pp(${nPrompt}) β warmup`);
|
| 477 |
const raw = await Module.ccall('bench_pp', 'string', ['number'], [nPrompt], { async: true });
|
| 478 |
parseBenchResult('bench_pp warmup', raw);
|
| 479 |
}
|
| 480 |
const samples_ns = [];
|
| 481 |
for (let i = 0; i < nReps; i++) {
|
| 482 |
+
status('perf', `pp${nPrompt} ${i + 1}/${nReps}`, Date.now());
|
| 483 |
const t0 = performance.now();
|
| 484 |
const raw = await Module.ccall('bench_pp', 'string', ['number'], [nPrompt], { async: true });
|
| 485 |
const t_ns = (performance.now() - t0) * 1e6;
|
|
|
|
| 496 |
if (wantTg) {
|
| 497 |
try {
|
| 498 |
if (!noWarmup) {
|
| 499 |
+
// Run the full nGen-token decode loop as warmup (was bench_tg(1)).
|
| 500 |
+
// A 1-token warmup exercises the decode kernel once, which leaves
|
| 501 |
+
// the first timed rep absorbing pipeline-cache / shader-specialize
|
| 502 |
+
// cost on every subsequent step.
|
| 503 |
+
status('perf', `warmup tg${nGen}`, Date.now());
|
| 504 |
+
log(`bench_tg(${nGen}) β warmup`);
|
| 505 |
+
const raw = await Module.ccall('bench_tg', 'string', ['number'], [nGen], { async: true });
|
| 506 |
parseBenchResult('bench_tg warmup', raw);
|
| 507 |
}
|
| 508 |
const samples_ns = [];
|
| 509 |
for (let i = 0; i < nReps; i++) {
|
| 510 |
+
status('perf', `tg${nGen} ${i + 1}/${nReps}`, Date.now());
|
| 511 |
const t0 = performance.now();
|
| 512 |
const raw = await Module.ccall('bench_tg', 'string', ['number'], [nGen], { async: true });
|
| 513 |
const t_ns = (performance.now() - t0) * 1e6;
|
js/run/controller.js
CHANGED
|
@@ -168,10 +168,10 @@ function isQuickVariant(v) {
|
|
| 168 |
}
|
| 169 |
|
| 170 |
function computeWarnings(modelName, quant) {
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
return
|
| 175 |
}
|
| 176 |
|
| 177 |
function cacheKey(v) { return `${v.repo}/${v.filename}`; }
|
|
@@ -432,13 +432,6 @@ function renderModels() {
|
|
| 432 |
stats.textContent = `${variants.length} variants Β· ${fitsCount} fit Β· ${quickFitCount} quick`;
|
| 433 |
|
| 434 |
header.append(toggleBtn, selectAll, nameLabel, paramChip, stats);
|
| 435 |
-
|
| 436 |
-
if (/^granite-4/i.test(family)) {
|
| 437 |
-
const w = document.createElement('span');
|
| 438 |
-
w.className = 'run-family-warning';
|
| 439 |
-
w.textContent = 'β needs SSM_SCAN in llama.cpp';
|
| 440 |
-
header.appendChild(w);
|
| 441 |
-
}
|
| 442 |
familyEl.appendChild(header);
|
| 443 |
|
| 444 |
const list = document.createElement('div');
|
|
@@ -856,12 +849,29 @@ function progressRowFor(v) {
|
|
| 856 |
`;
|
| 857 |
tbody.appendChild(tr);
|
| 858 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 859 |
return {
|
| 860 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 861 |
tr.className = `run-row-${rowClassFor(status)}`;
|
| 862 |
-
tr.querySelector('.status')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 863 |
},
|
| 864 |
setProgress(fraction, downloaded, total) {
|
|
|
|
| 865 |
const pct = (fraction * 100).toFixed(1);
|
| 866 |
const detail = total > 0
|
| 867 |
? `${pct}% (${formatSize(downloaded / (1024 * 1024))} / ${formatSize(total / (1024 * 1024))})`
|
|
@@ -869,6 +879,7 @@ function progressRowFor(v) {
|
|
| 869 |
tr.querySelector('.status').textContent = detail ? `downloading ${detail}` : 'downloading';
|
| 870 |
},
|
| 871 |
fillFromRecord(record) {
|
|
|
|
| 872 |
tr.className = `run-row-${record.status === 'done' ? 'ok' : 'error'}`;
|
| 873 |
tr.querySelector('.status').textContent = record.status;
|
| 874 |
// Format llama-bench style: "avg \u00b1 stddev" with the test name as
|
|
@@ -1219,7 +1230,7 @@ function runInWorker({
|
|
| 1219 |
|
| 1220 |
worker.onmessage = (e) => {
|
| 1221 |
const msg = e.data || {};
|
| 1222 |
-
if (msg.type === 'status') onStatus?.(msg.status, msg.msg);
|
| 1223 |
else if (msg.type === 'progress') onProgress?.(msg.fraction, msg.downloaded, msg.total);
|
| 1224 |
else if (msg.type === 'log') onLog?.(msg.line);
|
| 1225 |
else if (msg.type === 'result') finish(msg.record);
|
|
@@ -1454,7 +1465,7 @@ async function runVariantWithIterations(v, row) {
|
|
| 1454 |
nCtx: DEFAULT_N_CTX,
|
| 1455 |
nGpuLayers: 0,
|
| 1456 |
}, {
|
| 1457 |
-
onStatus: (status, msg) => row.setStatus(`cpu/${status}`, msg),
|
| 1458 |
onProgress: (fr, downloaded, total) => row.setProgress(fr, downloaded, total),
|
| 1459 |
onLog: logLine,
|
| 1460 |
});
|
|
@@ -1499,7 +1510,7 @@ async function runVariantWithIterations(v, row) {
|
|
| 1499 |
nCtx: DEFAULT_N_CTX,
|
| 1500 |
nGpuLayers: DEFAULT_N_GPU_LAYERS,
|
| 1501 |
}, {
|
| 1502 |
-
onStatus: (s, m) => row.setStatus(`gpu/${s}`, m),
|
| 1503 |
onProgress: (fr, d, t) => row.setProgress(fr, d, t),
|
| 1504 |
onLog: logLine,
|
| 1505 |
});
|
|
|
|
| 168 |
}
|
| 169 |
|
| 170 |
function computeWarnings(modelName, quant) {
|
| 171 |
+
// SSM_SCAN and Q1_0 are both supported in the bundled llama.cpp
|
| 172 |
+
// (ggml-webgpu.cpp). granite-4 ran cleanly in the apr-30 run; Q1_0 is
|
| 173 |
+
// wired into the fast-path dequant table. No warnings to surface today.
|
| 174 |
+
return [];
|
| 175 |
}
|
| 176 |
|
| 177 |
function cacheKey(v) { return `${v.repo}/${v.filename}`; }
|
|
|
|
| 432 |
stats.textContent = `${variants.length} variants Β· ${fitsCount} fit Β· ${quickFitCount} quick`;
|
| 433 |
|
| 434 |
header.append(toggleBtn, selectAll, nameLabel, paramChip, stats);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 435 |
familyEl.appendChild(header);
|
| 436 |
|
| 437 |
const list = document.createElement('div');
|
|
|
|
| 849 |
`;
|
| 850 |
tbody.appendChild(tr);
|
| 851 |
}
|
| 852 |
+
let tickInterval = null;
|
| 853 |
+
const stopTicker = () => {
|
| 854 |
+
if (tickInterval !== null) { clearInterval(tickInterval); tickInterval = null; }
|
| 855 |
+
};
|
| 856 |
return {
|
| 857 |
+
// sinceMs: optional epoch ms. When set, the cell ticks once a second so
|
| 858 |
+
// long-running phases (CPU pp512 warmup, big-model rep calls) show
|
| 859 |
+
// wall-clock progress instead of looking hung. Cleared on next setStatus.
|
| 860 |
+
setStatus(status, msg, sinceMs) {
|
| 861 |
+
stopTicker();
|
| 862 |
tr.className = `run-row-${rowClassFor(status)}`;
|
| 863 |
+
const cell = tr.querySelector('.status');
|
| 864 |
+
const render = () => {
|
| 865 |
+
const base = msg ? `${status} β ${msg}` : status;
|
| 866 |
+
cell.textContent = sinceMs
|
| 867 |
+
? `${base} (${Math.floor((Date.now() - sinceMs) / 1000)}s)`
|
| 868 |
+
: base;
|
| 869 |
+
};
|
| 870 |
+
render();
|
| 871 |
+
if (sinceMs) tickInterval = setInterval(render, 1000);
|
| 872 |
},
|
| 873 |
setProgress(fraction, downloaded, total) {
|
| 874 |
+
stopTicker();
|
| 875 |
const pct = (fraction * 100).toFixed(1);
|
| 876 |
const detail = total > 0
|
| 877 |
? `${pct}% (${formatSize(downloaded / (1024 * 1024))} / ${formatSize(total / (1024 * 1024))})`
|
|
|
|
| 879 |
tr.querySelector('.status').textContent = detail ? `downloading ${detail}` : 'downloading';
|
| 880 |
},
|
| 881 |
fillFromRecord(record) {
|
| 882 |
+
stopTicker();
|
| 883 |
tr.className = `run-row-${record.status === 'done' ? 'ok' : 'error'}`;
|
| 884 |
tr.querySelector('.status').textContent = record.status;
|
| 885 |
// Format llama-bench style: "avg \u00b1 stddev" with the test name as
|
|
|
|
| 1230 |
|
| 1231 |
worker.onmessage = (e) => {
|
| 1232 |
const msg = e.data || {};
|
| 1233 |
+
if (msg.type === 'status') onStatus?.(msg.status, msg.msg, msg.sinceMs);
|
| 1234 |
else if (msg.type === 'progress') onProgress?.(msg.fraction, msg.downloaded, msg.total);
|
| 1235 |
else if (msg.type === 'log') onLog?.(msg.line);
|
| 1236 |
else if (msg.type === 'result') finish(msg.record);
|
|
|
|
| 1465 |
nCtx: DEFAULT_N_CTX,
|
| 1466 |
nGpuLayers: 0,
|
| 1467 |
}, {
|
| 1468 |
+
onStatus: (status, msg, sinceMs) => row.setStatus(`cpu/${status}`, msg, sinceMs),
|
| 1469 |
onProgress: (fr, downloaded, total) => row.setProgress(fr, downloaded, total),
|
| 1470 |
onLog: logLine,
|
| 1471 |
});
|
|
|
|
| 1510 |
nCtx: DEFAULT_N_CTX,
|
| 1511 |
nGpuLayers: DEFAULT_N_GPU_LAYERS,
|
| 1512 |
}, {
|
| 1513 |
+
onStatus: (s, m, sinceMs) => row.setStatus(`gpu/${s}`, m, sinceMs),
|
| 1514 |
onProgress: (fr, d, t) => row.setProgress(fr, d, t),
|
| 1515 |
onLog: logLine,
|
| 1516 |
});
|
js/run/core.js
CHANGED
|
@@ -10,6 +10,17 @@ const DEFAULT_N_PROMPT = 512;
|
|
| 10 |
const DEFAULT_N_GEN = 128;
|
| 11 |
const DEFAULT_N_REPS = 5;
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
async function loadBenchScriptOnce(buildType) {
|
| 14 |
if (typeof globalThis.createBenchModule === 'function') return;
|
| 15 |
const script = document.createElement('script');
|
|
@@ -96,7 +107,7 @@ async function runBenchActions(Module, {
|
|
| 96 |
// aborting the whole run.
|
| 97 |
if (consistencyPrompt) {
|
| 98 |
try {
|
| 99 |
-
onStatus?.('consistency', 'Running consistency check...');
|
| 100 |
onLog?.(`bench_run("...", ${consistencyNPredict}) β consistency phase`);
|
| 101 |
const raw = await Module.ccall(
|
| 102 |
'bench_run', 'string',
|
|
@@ -118,11 +129,18 @@ async function runBenchActions(Module, {
|
|
| 118 |
);
|
| 119 |
const ev = parseBenchResult('bench_eval_tokens', evalRaw);
|
| 120 |
out.consistency = { ...out.consistency, ...ev };
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
}
|
| 127 |
} catch (err) {
|
| 128 |
onLog?.(`Consistency phase failed: ${err.message} β continuing to perf phase`);
|
|
@@ -142,14 +160,14 @@ async function runBenchActions(Module, {
|
|
| 142 |
if (wantPp) {
|
| 143 |
try {
|
| 144 |
if (!noWarmup) {
|
| 145 |
-
onStatus?.('perf', `warmup pp${nPrompt}`);
|
| 146 |
onLog?.(`bench_pp(${nPrompt}) β warmup`);
|
| 147 |
const raw = await Module.ccall('bench_pp', 'string', ['number'], [nPrompt], { async: true });
|
| 148 |
parseBenchResult('bench_pp warmup', raw);
|
| 149 |
}
|
| 150 |
const samples_ns = [];
|
| 151 |
for (let i = 0; i < nReps; i++) {
|
| 152 |
-
onStatus?.('perf', `pp${nPrompt} ${i + 1}/${nReps}`);
|
| 153 |
const t0 = performance.now();
|
| 154 |
const raw = await Module.ccall('bench_pp', 'string', ['number'], [nPrompt], { async: true });
|
| 155 |
const t_ns = (performance.now() - t0) * 1e6;
|
|
@@ -166,14 +184,18 @@ async function runBenchActions(Module, {
|
|
| 166 |
if (wantTg) {
|
| 167 |
try {
|
| 168 |
if (!noWarmup) {
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
parseBenchResult('bench_tg warmup', raw);
|
| 173 |
}
|
| 174 |
const samples_ns = [];
|
| 175 |
for (let i = 0; i < nReps; i++) {
|
| 176 |
-
onStatus?.('perf', `tg${nGen} ${i + 1}/${nReps}`);
|
| 177 |
const t0 = performance.now();
|
| 178 |
const raw = await Module.ccall('bench_tg', 'string', ['number'], [nGen], { async: true });
|
| 179 |
const t_ns = (performance.now() - t0) * 1e6;
|
|
@@ -269,7 +291,7 @@ export async function runBenchmarkCore({
|
|
| 269 |
|
| 270 |
Module = await globalThis.createBenchModule({
|
| 271 |
print: (text) => onLog(`[wasm] ${text}`),
|
| 272 |
-
printErr: (text) => onLog(`
|
| 273 |
onAbort: (reason) => {
|
| 274 |
const msg = `WASM aborted: ${reason}`;
|
| 275 |
result.error = msg;
|
|
|
|
| 10 |
const DEFAULT_N_GEN = 128;
|
| 11 |
const DEFAULT_N_REPS = 5;
|
| 12 |
|
| 13 |
+
// Below this many compared tokens, the agreement rate is statistical noise
|
| 14 |
+
// (e.g. early-EOS models that produce 1 token will always report 100%).
|
| 15 |
+
const CONSISTENCY_MIN_TOKENS = 8;
|
| 16 |
+
|
| 17 |
+
// llama.cpp/ggml emit info, warnings, AND errors all to stderr. Tag only the
|
| 18 |
+
// actually-bad lines as :err so real failures stand out from routine output.
|
| 19 |
+
function classifyWasmStderr(text) {
|
| 20 |
+
return /\b(error|abort(ed)?|failed|fatal|panic|assert)\b|GGML_ASSERT/i.test(text)
|
| 21 |
+
? '[wasm:err]' : '[wasm]';
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
async function loadBenchScriptOnce(buildType) {
|
| 25 |
if (typeof globalThis.createBenchModule === 'function') return;
|
| 26 |
const script = document.createElement('script');
|
|
|
|
| 107 |
// aborting the whole run.
|
| 108 |
if (consistencyPrompt) {
|
| 109 |
try {
|
| 110 |
+
onStatus?.('consistency', 'Running consistency check...', Date.now());
|
| 111 |
onLog?.(`bench_run("...", ${consistencyNPredict}) β consistency phase`);
|
| 112 |
const raw = await Module.ccall(
|
| 113 |
'bench_run', 'string',
|
|
|
|
| 129 |
);
|
| 130 |
const ev = parseBenchResult('bench_eval_tokens', evalRaw);
|
| 131 |
out.consistency = { ...out.consistency, ...ev };
|
| 132 |
+
if (ev.n_tokens < CONSISTENCY_MIN_TOKENS) {
|
| 133 |
+
onLog?.(
|
| 134 |
+
`Consistency: insufficient samples (${ev.n_tokens} token` +
|
| 135 |
+
`${ev.n_tokens === 1 ? '' : 's'} before EOS) β agreement rate not meaningful`
|
| 136 |
+
);
|
| 137 |
+
} else {
|
| 138 |
+
onLog?.(
|
| 139 |
+
`Consistency: ${(ev.agreement_rate * 100).toFixed(1)}% top-1 agreement (` +
|
| 140 |
+
`${ev.n_agree}/${ev.n_tokens})` +
|
| 141 |
+
(ev.first_disagreement >= 0 ? ` β first diverge @ ${ev.first_disagreement}` : '')
|
| 142 |
+
);
|
| 143 |
+
}
|
| 144 |
}
|
| 145 |
} catch (err) {
|
| 146 |
onLog?.(`Consistency phase failed: ${err.message} β continuing to perf phase`);
|
|
|
|
| 160 |
if (wantPp) {
|
| 161 |
try {
|
| 162 |
if (!noWarmup) {
|
| 163 |
+
onStatus?.('perf', `warmup pp${nPrompt}`, Date.now());
|
| 164 |
onLog?.(`bench_pp(${nPrompt}) β warmup`);
|
| 165 |
const raw = await Module.ccall('bench_pp', 'string', ['number'], [nPrompt], { async: true });
|
| 166 |
parseBenchResult('bench_pp warmup', raw);
|
| 167 |
}
|
| 168 |
const samples_ns = [];
|
| 169 |
for (let i = 0; i < nReps; i++) {
|
| 170 |
+
onStatus?.('perf', `pp${nPrompt} ${i + 1}/${nReps}`, Date.now());
|
| 171 |
const t0 = performance.now();
|
| 172 |
const raw = await Module.ccall('bench_pp', 'string', ['number'], [nPrompt], { async: true });
|
| 173 |
const t_ns = (performance.now() - t0) * 1e6;
|
|
|
|
| 184 |
if (wantTg) {
|
| 185 |
try {
|
| 186 |
if (!noWarmup) {
|
| 187 |
+
// Run the full nGen-token decode loop as warmup (was bench_tg(1)).
|
| 188 |
+
// A 1-token warmup exercises the decode kernel once, which leaves
|
| 189 |
+
// the first timed rep absorbing pipeline-cache / shader-specialize
|
| 190 |
+
// cost on every subsequent step.
|
| 191 |
+
onStatus?.('perf', `warmup tg${nGen}`, Date.now());
|
| 192 |
+
onLog?.(`bench_tg(${nGen}) β warmup`);
|
| 193 |
+
const raw = await Module.ccall('bench_tg', 'string', ['number'], [nGen], { async: true });
|
| 194 |
parseBenchResult('bench_tg warmup', raw);
|
| 195 |
}
|
| 196 |
const samples_ns = [];
|
| 197 |
for (let i = 0; i < nReps; i++) {
|
| 198 |
+
onStatus?.('perf', `tg${nGen} ${i + 1}/${nReps}`, Date.now());
|
| 199 |
const t0 = performance.now();
|
| 200 |
const raw = await Module.ccall('bench_tg', 'string', ['number'], [nGen], { async: true });
|
| 201 |
const t_ns = (performance.now() - t0) * 1e6;
|
|
|
|
| 291 |
|
| 292 |
Module = await globalThis.createBenchModule({
|
| 293 |
print: (text) => onLog(`[wasm] ${text}`),
|
| 294 |
+
printErr: (text) => onLog(`${classifyWasmStderr(text)} ${text}`),
|
| 295 |
onAbort: (reason) => {
|
| 296 |
const msg = `WASM aborted: ${reason}`;
|
| 297 |
result.error = msg;
|