Spaces:
Running
Running
GitHub Actions commited on
Commit Β·
43a358a
1
Parent(s): 9f7edbf
sync from abhijitramesh/webgpu-bench@bcae90cf03
Browse files- js/run/bench-worker.js +26 -5
- js/run/controller.js +60 -16
- js/run/device.js +229 -45
- js/run/source.js +11 -4
js/run/bench-worker.js
CHANGED
|
@@ -102,6 +102,23 @@ function patchMEMFS(Module) {
|
|
| 102 |
m.MEMFS.ops_table.file.stream.mmap = m.MEMFS.stream_ops.mmap;
|
| 103 |
}
|
| 104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
async function opfsAlloc(Module, name, fileHandle) {
|
| 106 |
// createSyncAccessHandle is worker-only and exclusive β only one writer
|
| 107 |
// per OPFS file at a time. Caller must ensure no createWritable session
|
|
@@ -189,7 +206,7 @@ self.onmessage = async (e) => {
|
|
| 189 |
}
|
| 190 |
};
|
| 191 |
|
| 192 |
-
async function runOne({ params, stream, buffer,
|
| 193 |
const {
|
| 194 |
buildType,
|
| 195 |
contentLength,
|
|
@@ -206,13 +223,16 @@ async function runOne({ params, stream, buffer, fileHandle }) {
|
|
| 206 |
noWarmup,
|
| 207 |
} = params;
|
| 208 |
// Three input modes are supported:
|
| 209 |
-
//
|
|
|
|
|
|
|
|
|
|
| 210 |
// stream β heap-stream mode (zero-copy WASM-heap, transferable)
|
| 211 |
// buffer β buffered fallback for browsers without transferable streams
|
| 212 |
// Exactly one must be provided.
|
| 213 |
-
const inputCount = (
|
| 214 |
if (inputCount !== 1) {
|
| 215 |
-
throw new Error('runOne: exactly one of `
|
| 216 |
}
|
| 217 |
|
| 218 |
const result = {
|
|
@@ -282,10 +302,11 @@ async function runOne({ params, stream, buffer, fileHandle }) {
|
|
| 282 |
// in, register a heap-backed MEMFS file. Faster (mmap'd
|
| 283 |
// zero-copy at load time) but caps at ~2GB.
|
| 284 |
let modelPtr = 0; // tracks heap-path allocation for cleanup
|
| 285 |
-
const useOpfsPath = !!
|
| 286 |
|
| 287 |
if (useOpfsPath) {
|
| 288 |
status('opfs', 'Linking OPFS-backed model into MEMFS...');
|
|
|
|
| 289 |
patchMEMFS(Module);
|
| 290 |
const size = await opfsAlloc(Module, 'model.gguf', fileHandle);
|
| 291 |
log(`OPFS-backed model.gguf registered (${(size / (1024 * 1024)).toFixed(1)} MB)`);
|
|
|
|
| 102 |
m.MEMFS.ops_table.file.stream.mmap = m.MEMFS.stream_ops.mmap;
|
| 103 |
}
|
| 104 |
|
| 105 |
+
// Resolve an OPFS path (rootDir + repo segments + filename) to a
|
| 106 |
+
// FileSystemFileHandle inside this worker. Works around the iOS Safari
|
| 107 |
+
// limitation that FileSystemFileHandle isn't structured-cloneable across
|
| 108 |
+
// postMessage β main thread sends the layout key, worker opens the
|
| 109 |
+
// handle locally.
|
| 110 |
+
async function resolveOpfsHandle({ rootDir, repo, filename }) {
|
| 111 |
+
if (!self.navigator?.storage?.getDirectory) {
|
| 112 |
+
throw new Error('OPFS not available in this worker');
|
| 113 |
+
}
|
| 114 |
+
let dir = await self.navigator.storage.getDirectory();
|
| 115 |
+
dir = await dir.getDirectoryHandle(rootDir, { create: false });
|
| 116 |
+
for (const seg of String(repo).split('/').filter(Boolean)) {
|
| 117 |
+
dir = await dir.getDirectoryHandle(seg, { create: false });
|
| 118 |
+
}
|
| 119 |
+
return dir.getFileHandle(filename, { create: false });
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
async function opfsAlloc(Module, name, fileHandle) {
|
| 123 |
// createSyncAccessHandle is worker-only and exclusive β only one writer
|
| 124 |
// per OPFS file at a time. Caller must ensure no createWritable session
|
|
|
|
| 206 |
}
|
| 207 |
};
|
| 208 |
|
| 209 |
+
async function runOne({ params, stream, buffer, opfsPath }) {
|
| 210 |
const {
|
| 211 |
buildType,
|
| 212 |
contentLength,
|
|
|
|
| 223 |
noWarmup,
|
| 224 |
} = params;
|
| 225 |
// Three input modes are supported:
|
| 226 |
+
// opfsPath β wllama-style OPFS-streaming load (preferred for >2GB).
|
| 227 |
+
// Resolved to a FileSystemFileHandle inside the worker
|
| 228 |
+
// via navigator.storage.getDirectory() β FileHandles
|
| 229 |
+
// themselves don't structured-clone reliably (iOS Safari).
|
| 230 |
// stream β heap-stream mode (zero-copy WASM-heap, transferable)
|
| 231 |
// buffer β buffered fallback for browsers without transferable streams
|
| 232 |
// Exactly one must be provided.
|
| 233 |
+
const inputCount = (opfsPath ? 1 : 0) + (stream ? 1 : 0) + (buffer ? 1 : 0);
|
| 234 |
if (inputCount !== 1) {
|
| 235 |
+
throw new Error('runOne: exactly one of `opfsPath`, `stream`, or `buffer` must be provided');
|
| 236 |
}
|
| 237 |
|
| 238 |
const result = {
|
|
|
|
| 302 |
// in, register a heap-backed MEMFS file. Faster (mmap'd
|
| 303 |
// zero-copy at load time) but caps at ~2GB.
|
| 304 |
let modelPtr = 0; // tracks heap-path allocation for cleanup
|
| 305 |
+
const useOpfsPath = !!opfsPath;
|
| 306 |
|
| 307 |
if (useOpfsPath) {
|
| 308 |
status('opfs', 'Linking OPFS-backed model into MEMFS...');
|
| 309 |
+
const fileHandle = await resolveOpfsHandle(opfsPath);
|
| 310 |
patchMEMFS(Module);
|
| 311 |
const size = await opfsAlloc(Module, 'model.gguf', fileHandle);
|
| 312 |
log(`OPFS-backed model.gguf registered (${(size / (1024 * 1024)).toFixed(1)} MB)`);
|
js/run/controller.js
CHANGED
|
@@ -3,8 +3,8 @@
|
|
| 3 |
// classes. Detects `surface` (localhost / space / pages) to gate the
|
| 4 |
// server save checkbox and the HF hub sign-in/submit row.
|
| 5 |
|
| 6 |
-
import { localSource, hostedSource, inventoryOpfs, purgeOpfs } from './source.js';
|
| 7 |
-
import { getDeviceBudgetMB, variantFits, describeDevice } from './device.js';
|
| 8 |
import {
|
| 9 |
resumeHFSession, beginHFSignIn, signOutHF, submitResultsToDataset,
|
| 10 |
HF_OAUTH_PENDING_KEY,
|
|
@@ -14,7 +14,6 @@ import { isHubConfigured, HF_DATASET_REPO } from './config.js';
|
|
| 14 |
const RUN_INTENT_STORAGE_KEY = 'webgpu-bench:runIntent';
|
| 15 |
const CRASH_STALE_MS = 10_000;
|
| 16 |
|
| 17 |
-
const OVERHEAD = 1.5;
|
| 18 |
const DEFAULT_PROMPT =
|
| 19 |
'Explain quantum computing to a software engineer in four concise paragraphs. ' +
|
| 20 |
'Cover superposition, entanglement, quantum gates, and one practical use case.';
|
|
@@ -176,7 +175,15 @@ function computeWarnings(modelName, quant) {
|
|
| 176 |
}
|
| 177 |
|
| 178 |
function cacheKey(v) { return `${v.repo}/${v.filename}`; }
|
| 179 |
-
function variantFitsDevice(v) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
function isCached(v) {
|
| 181 |
const entry = state.cacheStatus[cacheKey(v)];
|
| 182 |
return !!entry && entry.cachedBytes > 0;
|
|
@@ -272,9 +279,14 @@ function renderHeader() {
|
|
| 272 |
const memStr = b.memGB !== null ? `${b.memGB} GB` : 'β';
|
| 273 |
$('device-memory').textContent = memStr;
|
| 274 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
const budgetGB = (b.budgetMB / 1024).toFixed(1);
|
|
|
|
| 276 |
$('device-budget').textContent = `${budgetGB} GB`;
|
| 277 |
-
$('device-budget-source').textContent = `
|
| 278 |
|
| 279 |
const webgpuCell = $('device-webgpu');
|
| 280 |
if (webgpuCell) {
|
|
@@ -1047,14 +1059,28 @@ async function onRunClick() {
|
|
| 1047 |
state.sessionDownloads = new Set();
|
| 1048 |
updateButtons();
|
| 1049 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1050 |
const machine = await machineInfo();
|
| 1051 |
const browser = browserInfo();
|
| 1052 |
const evictAfter = !!$('evict-after-run')?.checked;
|
| 1053 |
|
| 1054 |
// One-ahead prefetch: while variant i runs, we may have variant i+1
|
| 1055 |
// downloading. Only one prefetch in flight at a time.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1056 |
const prefetchFor = async (v) => {
|
| 1057 |
if (!v || isCached(v)) return;
|
|
|
|
| 1058 |
const row = progressRowFor(v);
|
| 1059 |
row.setStatus('prefetching', '');
|
| 1060 |
try {
|
|
@@ -1090,7 +1116,10 @@ async function onRunClick() {
|
|
| 1090 |
// Wait for variant i to be cached (either via prefetch or pre-existing).
|
| 1091 |
await prefetchPromise;
|
| 1092 |
if (state.aborted) break;
|
| 1093 |
-
|
|
|
|
|
|
|
|
|
|
| 1094 |
row.setStatus('error', 'not cached after prefetch');
|
| 1095 |
prefetchPromise = prefetchFor(variants[i + 1]);
|
| 1096 |
continue;
|
|
@@ -1164,7 +1193,7 @@ async function onRunClick() {
|
|
| 1164 |
function runInWorker({
|
| 1165 |
params,
|
| 1166 |
stream,
|
| 1167 |
-
|
| 1168 |
onStatus,
|
| 1169 |
onProgress,
|
| 1170 |
onLog,
|
|
@@ -1206,14 +1235,16 @@ function runInWorker({
|
|
| 1206 |
};
|
| 1207 |
|
| 1208 |
// Three transport modes β see bench-worker.js runOne() for matching shape.
|
| 1209 |
-
if (
|
| 1210 |
-
// OPFS path:
|
| 1211 |
-
//
|
| 1212 |
-
//
|
|
|
|
|
|
|
| 1213 |
try {
|
| 1214 |
-
worker.postMessage({ type: 'run', params,
|
| 1215 |
} catch (err) {
|
| 1216 |
-
finish({ status: 'error', error: `postMessage(
|
| 1217 |
}
|
| 1218 |
return;
|
| 1219 |
}
|
|
@@ -1333,21 +1364,34 @@ async function runBenchmarkInWorker(v, params, callbacks) {
|
|
| 1333 |
};
|
| 1334 |
|
| 1335 |
if (useOpfs) {
|
| 1336 |
-
let
|
| 1337 |
try {
|
| 1338 |
callbacks.onStatus?.('downloading', 'Downloading model to OPFS...');
|
| 1339 |
const r = await state.source.opfsHandleForModel(
|
| 1340 |
v.repo, v.filename,
|
| 1341 |
callbacks.onProgress,
|
| 1342 |
);
|
| 1343 |
-
fileHandle = r.handle;
|
| 1344 |
contentLength = r.size;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1345 |
} catch (err) {
|
| 1346 |
return { status: 'error', error: `opfsHandleForModel failed: ${err.message}` };
|
| 1347 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1348 |
return runInWorker({
|
| 1349 |
params: { ...baseParams, contentLength },
|
| 1350 |
-
|
| 1351 |
onStatus: callbacks.onStatus,
|
| 1352 |
onProgress: callbacks.onProgress,
|
| 1353 |
onLog: callbacks.onLog,
|
|
|
|
| 3 |
// classes. Detects `surface` (localhost / space / pages) to gate the
|
| 4 |
// server save checkbox and the HF hub sign-in/submit row.
|
| 5 |
|
| 6 |
+
import { localSource, hostedSource, inventoryOpfs, purgeOpfs, OPFS_ROOT_NAME } from './source.js';
|
| 7 |
+
import { getDeviceBudgetMB, variantFits, describeDevice, isMobileDevice } from './device.js';
|
| 8 |
import {
|
| 9 |
resumeHFSession, beginHFSignIn, signOutHF, submitResultsToDataset,
|
| 10 |
HF_OAUTH_PENDING_KEY,
|
|
|
|
| 14 |
const RUN_INTENT_STORAGE_KEY = 'webgpu-bench:runIntent';
|
| 15 |
const CRASH_STALE_MS = 10_000;
|
| 16 |
|
|
|
|
| 17 |
const DEFAULT_PROMPT =
|
| 18 |
'Explain quantum computing to a software engineer in four concise paragraphs. ' +
|
| 19 |
'Cover superposition, entanglement, quantum gates, and one practical use case.';
|
|
|
|
| 175 |
}
|
| 176 |
|
| 177 |
function cacheKey(v) { return `${v.repo}/${v.filename}`; }
|
| 178 |
+
function variantFitsDevice(v) {
|
| 179 |
+
// New variantFits signature: pass both budgets so the predicate can
|
| 180 |
+
// check (a) model fits in GPU memory + small overhead, and (b) WASM
|
| 181 |
+
// heap can hold the working set. See device.js for the rationale.
|
| 182 |
+
return variantFits(v.sizeMB, {
|
| 183 |
+
gpuBudgetMB: state.budget.gpuBudgetMB,
|
| 184 |
+
heapBudgetMB: state.budget.heapBudgetMB,
|
| 185 |
+
});
|
| 186 |
+
}
|
| 187 |
function isCached(v) {
|
| 188 |
const entry = state.cacheStatus[cacheKey(v)];
|
| 189 |
return !!entry && entry.cachedBytes > 0;
|
|
|
|
| 279 |
const memStr = b.memGB !== null ? `${b.memGB} GB` : 'β';
|
| 280 |
$('device-memory').textContent = memStr;
|
| 281 |
|
| 282 |
+
// budgetMB is now the GPU-memory budget (per device.js _computeBudget),
|
| 283 |
+
// since with OPFS streaming the model lives in WebGPU buffers, not the
|
| 284 |
+
// WASM heap. We surface the heap budget separately in the source line so
|
| 285 |
+
// a curious reader can see both probes' results.
|
| 286 |
const budgetGB = (b.budgetMB / 1024).toFixed(1);
|
| 287 |
+
const heapGB = (b.heapBudgetMB / 1024).toFixed(1);
|
| 288 |
$('device-budget').textContent = `${budgetGB} GB`;
|
| 289 |
+
$('device-budget-source').textContent = `GPU memory Β· WASM heap: ${heapGB} GB`;
|
| 290 |
|
| 291 |
const webgpuCell = $('device-webgpu');
|
| 292 |
if (webgpuCell) {
|
|
|
|
| 1059 |
state.sessionDownloads = new Set();
|
| 1060 |
updateButtons();
|
| 1061 |
|
| 1062 |
+
if (isMobileDevice()) {
|
| 1063 |
+
logLine(
|
| 1064 |
+
'Mobile device β running with sequential downloads (no parallel prefetch). ' +
|
| 1065 |
+
'Each variant downloads, runs, evicts, then the next begins.',
|
| 1066 |
+
);
|
| 1067 |
+
}
|
| 1068 |
+
|
| 1069 |
const machine = await machineInfo();
|
| 1070 |
const browser = browserInfo();
|
| 1071 |
const evictAfter = !!$('evict-after-run')?.checked;
|
| 1072 |
|
| 1073 |
// One-ahead prefetch: while variant i runs, we may have variant i+1
|
| 1074 |
// downloading. Only one prefetch in flight at a time.
|
| 1075 |
+
// On mobile, the overlap is a measurement hazard β concurrent download
|
| 1076 |
+
// contends with inference for SoC power, memory bandwidth, and OPFS
|
| 1077 |
+
// write queues. Skip the prefetch entirely; runBenchmarkInWorker's
|
| 1078 |
+
// opfsHandleForModel does the download inline (with the same progress
|
| 1079 |
+
// events the prefetch row would have shown).
|
| 1080 |
+
const skipPrefetch = isMobileDevice();
|
| 1081 |
const prefetchFor = async (v) => {
|
| 1082 |
if (!v || isCached(v)) return;
|
| 1083 |
+
if (skipPrefetch) return;
|
| 1084 |
const row = progressRowFor(v);
|
| 1085 |
row.setStatus('prefetching', '');
|
| 1086 |
try {
|
|
|
|
| 1116 |
// Wait for variant i to be cached (either via prefetch or pre-existing).
|
| 1117 |
await prefetchPromise;
|
| 1118 |
if (state.aborted) break;
|
| 1119 |
+
// When skipPrefetch is on (mobile), variants arrive uncached and
|
| 1120 |
+
// runBenchmarkInWorker β opfsHandleForModel handles the inline
|
| 1121 |
+
// download. Skip the cache-check error path in that case.
|
| 1122 |
+
if (!skipPrefetch && !isCached(v)) {
|
| 1123 |
row.setStatus('error', 'not cached after prefetch');
|
| 1124 |
prefetchPromise = prefetchFor(variants[i + 1]);
|
| 1125 |
continue;
|
|
|
|
| 1193 |
function runInWorker({
|
| 1194 |
params,
|
| 1195 |
stream,
|
| 1196 |
+
opfsPath,
|
| 1197 |
onStatus,
|
| 1198 |
onProgress,
|
| 1199 |
onLog,
|
|
|
|
| 1235 |
};
|
| 1236 |
|
| 1237 |
// Three transport modes β see bench-worker.js runOne() for matching shape.
|
| 1238 |
+
if (opfsPath) {
|
| 1239 |
+
// OPFS path: send the layout key only (rootDir + repo + filename).
|
| 1240 |
+
// The worker re-resolves to a FileSystemFileHandle via
|
| 1241 |
+
// navigator.storage.getDirectory() itself. Plain JSON-serializable β
|
| 1242 |
+
// works on iOS Safari, where FileSystemFileHandle structured-clone
|
| 1243 |
+
// is not implemented.
|
| 1244 |
try {
|
| 1245 |
+
worker.postMessage({ type: 'run', params, opfsPath });
|
| 1246 |
} catch (err) {
|
| 1247 |
+
finish({ status: 'error', error: `postMessage(opfsPath) failed: ${err.message}` });
|
| 1248 |
}
|
| 1249 |
return;
|
| 1250 |
}
|
|
|
|
| 1364 |
};
|
| 1365 |
|
| 1366 |
if (useOpfs) {
|
| 1367 |
+
let contentLength;
|
| 1368 |
try {
|
| 1369 |
callbacks.onStatus?.('downloading', 'Downloading model to OPFS...');
|
| 1370 |
const r = await state.source.opfsHandleForModel(
|
| 1371 |
v.repo, v.filename,
|
| 1372 |
callbacks.onProgress,
|
| 1373 |
);
|
|
|
|
| 1374 |
contentLength = r.size;
|
| 1375 |
+
// When the prefetch is skipped (mobile path), the inline download
|
| 1376 |
+
// above is the variant's first arrival in OPFS. Mark it as
|
| 1377 |
+
// session-downloaded so the post-run eviction logic frees it before
|
| 1378 |
+
// the next variant starts β keeping disk usage flat.
|
| 1379 |
+
if (r.wasDownloaded) {
|
| 1380 |
+
state.sessionDownloads.add(cacheKey(v));
|
| 1381 |
+
state.cacheStatus[cacheKey(v)] = { cachedBytes: r.size };
|
| 1382 |
+
refreshCacheBadge(v);
|
| 1383 |
+
}
|
| 1384 |
} catch (err) {
|
| 1385 |
return { status: 'error', error: `opfsHandleForModel failed: ${err.message}` };
|
| 1386 |
}
|
| 1387 |
+
// Pass the OPFS path components, not the FileHandle. iOS Safari
|
| 1388 |
+
// (and some older Chromium/Firefox versions) can't structured-clone
|
| 1389 |
+
// FileSystemFileHandle across postMessage. The worker re-resolves the
|
| 1390 |
+
// handle via navigator.storage.getDirectory() itself, which works
|
| 1391 |
+
// everywhere OPFS is supported.
|
| 1392 |
return runInWorker({
|
| 1393 |
params: { ...baseParams, contentLength },
|
| 1394 |
+
opfsPath: { rootDir: OPFS_ROOT_NAME, repo: v.repo, filename: v.filename },
|
| 1395 |
onStatus: callbacks.onStatus,
|
| 1396 |
onProgress: callbacks.onProgress,
|
| 1397 |
onLog: callbacks.onLog,
|
js/run/device.js
CHANGED
|
@@ -1,29 +1,66 @@
|
|
| 1 |
// Device-fit helpers for the interactive bench page.
|
| 2 |
//
|
| 3 |
-
//
|
| 4 |
-
// growth on this device, mirroring how llama.cpp itself allocates (a single
|
| 5 |
-
// WebAssembly.Memory grown in pages). The probe runs in a worker so an
|
| 6 |
-
// allocation failure dies harmlessly. We fall back to deviceMemory /
|
| 7 |
-
// storage.estimate heuristics if the probe can't run.
|
| 8 |
//
|
| 9 |
-
//
|
| 10 |
-
//
|
| 11 |
-
//
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
//
|
| 13 |
// On wasm32 the linear memory caps at 4 GiB no matter how much physical
|
| 14 |
-
// RAM the device has, so probe results above 4096 MB cannot exist.
|
| 15 |
|
| 16 |
const DEFAULT_BUDGET_MB = 2 * 1024;
|
| 17 |
const HOSTED_QUOTA_FRACTION = 0.4;
|
| 18 |
const HOSTED_QUOTA_CAP_MB = 8 * 1024;
|
| 19 |
|
| 20 |
-
// Hard ceiling on mobile regardless of probe result. iOS/Android
|
| 21 |
-
// the tab under system memory pressure without raising a JS error
|
| 22 |
-
// probe could observe, so an "ok at 4 GiB" result is not safe to trust
|
| 23 |
-
// a phone
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
const PROBE_TIMEOUT_MS = 15_000;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
export function isMobileDevice() {
|
| 29 |
if (typeof navigator === 'undefined') return false;
|
|
@@ -32,6 +69,8 @@ export function isMobileDevice() {
|
|
| 32 |
return /iPhone|iPad|iPod|Android.*Mobile/.test(ua);
|
| 33 |
}
|
| 34 |
|
|
|
|
|
|
|
| 35 |
// Spawn the probe worker, wait for a result, clean up. Returns
|
| 36 |
// { probedMB } on success, or { probedMB: 0, error } on any failure mode
|
| 37 |
// (timeout, worker construct error, worker onerror β typically the probe
|
|
@@ -67,9 +106,107 @@ export function probeHeapBudgetMB({ stepPages, maxPages, timeoutMs = PROBE_TIMEO
|
|
| 67 |
});
|
| 68 |
}
|
| 69 |
|
| 70 |
-
//
|
| 71 |
-
|
| 72 |
-
//
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
let _budgetPromise = null;
|
| 74 |
|
| 75 |
export async function getDeviceBudgetMB() {
|
|
@@ -90,49 +227,96 @@ async function _computeBudget() {
|
|
| 90 |
|
| 91 |
const isMobile = isMobileDevice();
|
| 92 |
|
| 93 |
-
//
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
let
|
| 101 |
-
let
|
| 102 |
-
if (probedMB > 0) {
|
| 103 |
-
|
| 104 |
-
|
| 105 |
} else if (memGB !== null) {
|
| 106 |
-
|
| 107 |
-
|
| 108 |
} else if (quotaMB !== null) {
|
| 109 |
-
|
| 110 |
-
|
| 111 |
} else {
|
| 112 |
-
|
| 113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
}
|
| 115 |
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
}
|
| 120 |
|
| 121 |
return {
|
| 122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
memGB,
|
| 124 |
quotaMB,
|
| 125 |
-
probedMB,
|
| 126 |
-
|
|
|
|
|
|
|
| 127 |
isMobile,
|
| 128 |
-
source
|
|
|
|
|
|
|
|
|
|
| 129 |
};
|
| 130 |
}
|
| 131 |
|
| 132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
if (typeof sizeMB !== 'number' || sizeMB <= 0) return false;
|
| 134 |
-
|
| 135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
}
|
| 137 |
|
| 138 |
export async function describeDevice() {
|
|
|
|
| 1 |
// Device-fit helpers for the interactive bench page.
|
| 2 |
//
|
| 3 |
+
// Two budget probes drive the per-variant fit decision:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
//
|
| 5 |
+
// getDeviceBudgetMB() β empirical WASM heap probe. Grows a
|
| 6 |
+
// WebAssembly.Memory page-by-page in a worker until it fails. Caps
|
| 7 |
+
// the working set (KV cache + compute scratch + JS heap headroom)
|
| 8 |
+
// llama.cpp consumes during inference.
|
| 9 |
+
//
|
| 10 |
+
// probeGpuBudgetMB() β empirical WebGPU memory probe. Allocates real
|
| 11 |
+
// buffers with mappedAtCreation=true on the actual adapter until OOM.
|
| 12 |
+
// Caps the size of model weights llama.cpp can hold in GPU buffers,
|
| 13 |
+
// since OPFS-streaming keeps model bytes off the WASM heap.
|
| 14 |
+
//
|
| 15 |
+
// variantFits() then checks both: model size + GPU overhead β€ GPU budget,
|
| 16 |
+
// AND heap working-set floor β€ heap budget. wllama doesn't probe at all
|
| 17 |
+
// β they let load attempts fail naturally β but our auto-select buttons
|
| 18 |
+
// ("All fit", "Run study") need a fit predicate, so we err on the side
|
| 19 |
+
// of measuring rather than guessing.
|
| 20 |
//
|
| 21 |
// On wasm32 the linear memory caps at 4 GiB no matter how much physical
|
| 22 |
+
// RAM the device has, so heap probe results above 4096 MB cannot exist.
|
| 23 |
|
| 24 |
const DEFAULT_BUDGET_MB = 2 * 1024;
|
| 25 |
const HOSTED_QUOTA_FRACTION = 0.4;
|
| 26 |
const HOSTED_QUOTA_CAP_MB = 8 * 1024;
|
| 27 |
|
| 28 |
+
// Hard ceiling on mobile WASM heap regardless of probe result. iOS/Android
|
| 29 |
+
// can reap the tab under system memory pressure without raising a JS error
|
| 30 |
+
// the probe could observe, so an "ok at 4 GiB" result is not safe to trust
|
| 31 |
+
// on a phone.
|
| 32 |
+
//
|
| 33 |
+
// Empirically iOS Safari tabs get reaped well below the WebAssembly.Memory
|
| 34 |
+
// engine cap (~1 GiB on iPhone), and Android Chrome on mid-range devices
|
| 35 |
+
// behaves similarly. Below 500 MB heap usage tends to be safe across
|
| 36 |
+
// modern phones; above that we start seeing tab kills mid-run. The OPFS-
|
| 37 |
+
// streaming model load means model bytes no longer live on the WASM heap,
|
| 38 |
+
// so this budget caps the per-step working set, not the model file.
|
| 39 |
+
const MOBILE_HEAP_CEILING_MB = 500;
|
| 40 |
+
|
| 41 |
+
// Hard ceiling on mobile GPU memory probe result. Even when the probe
|
| 42 |
+
// succeeds at higher numbers, the OS may evict the GPU process or the tab
|
| 43 |
+
// before we can actually use it. iPhone WebGPU (Metal-3 under the hood)
|
| 44 |
+
// typically gives a tab 1.5β3 GB usable depending on device class; cap at
|
| 45 |
+
// 3 GB as a conservative ceiling that won't reject anything reasonable.
|
| 46 |
+
const MOBILE_GPU_CEILING_MB = 3 * 1024;
|
| 47 |
|
| 48 |
const PROBE_TIMEOUT_MS = 15_000;
|
| 49 |
+
const GPU_PROBE_STEP_MB = 256;
|
| 50 |
+
const GPU_PROBE_MAX_MB = 8 * 1024;
|
| 51 |
+
const GPU_PROBE_TIMEOUT_MS = 8_000;
|
| 52 |
+
|
| 53 |
+
// Working-set floor in the WASM heap. KV cache + compute buffers + JS heap
|
| 54 |
+
// headroom for a typical 1B model at n_ctx=2048 add up to ~400 MB; we
|
| 55 |
+
// require 500 to leave a margin. Bigger contexts scale this up β not
|
| 56 |
+
// modeled yet (worth revisiting if we benchmark at n_ctx >> 2048).
|
| 57 |
+
const HEAP_WORKING_SET_FLOOR_MB = 500;
|
| 58 |
+
|
| 59 |
+
// Per-variant overhead added on top of the model file size when checking
|
| 60 |
+
// GPU fit. Covers compute buffers, alignment padding, and the KV cache
|
| 61 |
+
// mirror that the WebGPU backend keeps. A flat 200 MB is a conservative
|
| 62 |
+
// approximation; in practice it scales somewhat with model + context size.
|
| 63 |
+
const GPU_VARIANT_OVERHEAD_MB = 200;
|
| 64 |
|
| 65 |
export function isMobileDevice() {
|
| 66 |
if (typeof navigator === 'undefined') return false;
|
|
|
|
| 69 |
return /iPhone|iPad|iPod|Android.*Mobile/.test(ua);
|
| 70 |
}
|
| 71 |
|
| 72 |
+
// ββββββββββββββββ WASM heap probe ββββββββββββββββ
|
| 73 |
+
|
| 74 |
// Spawn the probe worker, wait for a result, clean up. Returns
|
| 75 |
// { probedMB } on success, or { probedMB: 0, error } on any failure mode
|
| 76 |
// (timeout, worker construct error, worker onerror β typically the probe
|
|
|
|
| 106 |
});
|
| 107 |
}
|
| 108 |
|
| 109 |
+
// ββββββββββββββββ GPU memory probe ββββββββββββββββ
|
| 110 |
+
|
| 111 |
+
// Allocate WebGPU buffers in stepMB increments until OOM, return the
|
| 112 |
+
// total committed bytes as the GPU memory budget. Uses
|
| 113 |
+
// mappedAtCreation=true to force real memory commit (some drivers lazy-
|
| 114 |
+
// allocate until first use otherwise) and captures OOM via the
|
| 115 |
+
// 'out-of-memory' error scope, with device.lost as a backstop.
|
| 116 |
+
//
|
| 117 |
+
// Caveats:
|
| 118 |
+
// - The GPU process is shared with other tabs. If they're holding GPU
|
| 119 |
+
// memory the probe undercounts. (Same as wllama's heap probe β best
|
| 120 |
+
// we can do without a richer browser API.)
|
| 121 |
+
// - Some drivers (notably iOS Metal under WebKit) lazy-fail at dispatch
|
| 122 |
+
// time rather than at createBuffer; this probe's number is therefore
|
| 123 |
+
// an upper bound, not a guarantee. Mobile cap below mitigates.
|
| 124 |
+
export async function probeGpuBudgetMB({
|
| 125 |
+
stepMB = GPU_PROBE_STEP_MB,
|
| 126 |
+
maxMB = GPU_PROBE_MAX_MB,
|
| 127 |
+
timeoutMs = GPU_PROBE_TIMEOUT_MS,
|
| 128 |
+
} = {}) {
|
| 129 |
+
if (!navigator.gpu) {
|
| 130 |
+
return { probedMB: 0, error: 'WebGPU not available' };
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
let adapter, device;
|
| 134 |
+
try {
|
| 135 |
+
adapter = await navigator.gpu.requestAdapter();
|
| 136 |
+
if (!adapter) return { probedMB: 0, error: 'no WebGPU adapter' };
|
| 137 |
+
// Request the maximum the adapter can give us; defaults are often
|
| 138 |
+
// smaller than what the hardware supports.
|
| 139 |
+
const requiredLimits = {};
|
| 140 |
+
const cap = (k) => {
|
| 141 |
+
const v = adapter.limits?.[k];
|
| 142 |
+
if (typeof v === 'number') requiredLimits[k] = v;
|
| 143 |
+
};
|
| 144 |
+
cap('maxBufferSize');
|
| 145 |
+
cap('maxStorageBufferBindingSize');
|
| 146 |
+
device = await adapter.requestDevice({ requiredLimits });
|
| 147 |
+
} catch (err) {
|
| 148 |
+
return { probedMB: 0, error: `adapter/device init failed: ${err.message}` };
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
let deviceLost = false;
|
| 152 |
+
device.lost.then(() => { deviceLost = true; }).catch(() => {});
|
| 153 |
+
|
| 154 |
+
const buffers = [];
|
| 155 |
+
const stepBytes = stepMB * 1024 * 1024;
|
| 156 |
+
let totalBytes = 0;
|
| 157 |
+
const start = performance.now();
|
| 158 |
+
|
| 159 |
+
try {
|
| 160 |
+
while (totalBytes + stepBytes <= maxMB * 1024 * 1024) {
|
| 161 |
+
if (deviceLost) break;
|
| 162 |
+
if (performance.now() - start > timeoutMs) break;
|
| 163 |
+
|
| 164 |
+
device.pushErrorScope('out-of-memory');
|
| 165 |
+
let buffer;
|
| 166 |
+
try {
|
| 167 |
+
buffer = device.createBuffer({
|
| 168 |
+
size: stepBytes,
|
| 169 |
+
usage: GPUBufferUsage.STORAGE,
|
| 170 |
+
mappedAtCreation: true,
|
| 171 |
+
});
|
| 172 |
+
// Touch the start of the mapped range to force a real commit.
|
| 173 |
+
// Drivers can lazy-back the allocation until first write, which
|
| 174 |
+
// would fool the probe into thinking it has more headroom than it
|
| 175 |
+
// really does.
|
| 176 |
+
const touchBytes = Math.min(stepBytes, 64 * 1024);
|
| 177 |
+
new Uint8Array(buffer.getMappedRange(0, touchBytes))[0] = 1;
|
| 178 |
+
buffer.unmap();
|
| 179 |
+
} catch (err) {
|
| 180 |
+
await device.popErrorScope().catch(() => null);
|
| 181 |
+
break;
|
| 182 |
+
}
|
| 183 |
+
|
| 184 |
+
const error = await device.popErrorScope().catch(() => null);
|
| 185 |
+
if (error) {
|
| 186 |
+
try { buffer.destroy(); } catch { /* noop */ }
|
| 187 |
+
break;
|
| 188 |
+
}
|
| 189 |
+
|
| 190 |
+
buffers.push(buffer);
|
| 191 |
+
totalBytes += stepBytes;
|
| 192 |
+
|
| 193 |
+
// Yield so we don't starve the main thread / GC.
|
| 194 |
+
await new Promise((r) => setTimeout(r, 0));
|
| 195 |
+
}
|
| 196 |
+
} finally {
|
| 197 |
+
for (const b of buffers) {
|
| 198 |
+
try { b.destroy(); } catch { /* noop */ }
|
| 199 |
+
}
|
| 200 |
+
try { device.destroy(); } catch { /* noop */ }
|
| 201 |
+
}
|
| 202 |
+
|
| 203 |
+
return { probedMB: Math.floor(totalBytes / (1024 * 1024)) };
|
| 204 |
+
}
|
| 205 |
+
|
| 206 |
+
// ββββββββββββββββ public budget API ββββββββββββββββ
|
| 207 |
+
|
| 208 |
+
// Cache the full budget for the lifetime of the page load. Both probes
|
| 209 |
+
// take 1β8 s; we don't want to pay that twice for the same surface.
|
| 210 |
let _budgetPromise = null;
|
| 211 |
|
| 212 |
export async function getDeviceBudgetMB() {
|
|
|
|
| 227 |
|
| 228 |
const isMobile = isMobileDevice();
|
| 229 |
|
| 230 |
+
// Run both probes in parallel.
|
| 231 |
+
const [heapProbe, gpuProbe] = await Promise.all([
|
| 232 |
+
probeHeapBudgetMB(),
|
| 233 |
+
probeGpuBudgetMB(),
|
| 234 |
+
]);
|
| 235 |
+
|
| 236 |
+
// ββ Heap budget ββ
|
| 237 |
+
let heapBudgetMB;
|
| 238 |
+
let heapSource;
|
| 239 |
+
if (heapProbe.probedMB > 0) {
|
| 240 |
+
heapBudgetMB = heapProbe.probedMB;
|
| 241 |
+
heapSource = `probe (WASM heap, ${heapProbe.probedMB} MB committed)`;
|
| 242 |
} else if (memGB !== null) {
|
| 243 |
+
heapBudgetMB = memGB * 1024 * 0.6;
|
| 244 |
+
heapSource = 'navigator.deviceMemory (heap probe failed)';
|
| 245 |
} else if (quotaMB !== null) {
|
| 246 |
+
heapBudgetMB = Math.min(quotaMB * HOSTED_QUOTA_FRACTION, HOSTED_QUOTA_CAP_MB);
|
| 247 |
+
heapSource = 'navigator.storage.estimate().quota (heap probe failed)';
|
| 248 |
} else {
|
| 249 |
+
heapBudgetMB = DEFAULT_BUDGET_MB;
|
| 250 |
+
heapSource = 'default (heap probe failed)';
|
| 251 |
+
}
|
| 252 |
+
if (isMobile && heapBudgetMB > MOBILE_HEAP_CEILING_MB) {
|
| 253 |
+
heapBudgetMB = MOBILE_HEAP_CEILING_MB;
|
| 254 |
+
heapSource += ' β mobile-capped';
|
| 255 |
}
|
| 256 |
|
| 257 |
+
// ββ GPU budget ββ
|
| 258 |
+
let gpuBudgetMB = gpuProbe.probedMB;
|
| 259 |
+
let gpuSource = gpuProbe.probedMB > 0
|
| 260 |
+
? `probe (WebGPU buffers, ${gpuProbe.probedMB} MB allocated)`
|
| 261 |
+
: `probe failed: ${gpuProbe.error || 'unknown'}`;
|
| 262 |
+
if (isMobile && gpuBudgetMB > MOBILE_GPU_CEILING_MB) {
|
| 263 |
+
gpuBudgetMB = MOBILE_GPU_CEILING_MB;
|
| 264 |
+
gpuSource += ' β mobile-capped';
|
| 265 |
}
|
| 266 |
|
| 267 |
return {
|
| 268 |
+
// Combined headline budget β what the UI shows as "Max model size".
|
| 269 |
+
// GPU memory is now the constraint that varies per device; heap
|
| 270 |
+
// budget is a separate floor check.
|
| 271 |
+
budgetMB: gpuBudgetMB,
|
| 272 |
+
gpuBudgetMB,
|
| 273 |
+
heapBudgetMB,
|
| 274 |
memGB,
|
| 275 |
quotaMB,
|
| 276 |
+
probedMB: heapProbe.probedMB,
|
| 277 |
+
gpuProbedMB: gpuProbe.probedMB,
|
| 278 |
+
probeError: heapProbe.error || null,
|
| 279 |
+
gpuProbeError: gpuProbe.error || null,
|
| 280 |
isMobile,
|
| 281 |
+
// Two-line source string so the UI stays compact while still
|
| 282 |
+
// surfacing both probes in the device card tooltip.
|
| 283 |
+
source: gpuSource,
|
| 284 |
+
heapSource,
|
| 285 |
};
|
| 286 |
}
|
| 287 |
|
| 288 |
+
// variantFits decides whether a model file of `sizeMB` bytes can be
|
| 289 |
+
// loaded and run on this device. Two checks must pass:
|
| 290 |
+
//
|
| 291 |
+
// 1. sizeMB + GPU_VARIANT_OVERHEAD_MB β€ gpuBudgetMB
|
| 292 |
+
// Model weights live in WebGPU buffers (since OPFS streaming
|
| 293 |
+
// keeps them off the WASM heap). The overhead covers compute
|
| 294 |
+
// scratch + alignment + KV cache mirror.
|
| 295 |
+
//
|
| 296 |
+
// 2. heapBudgetMB β₯ HEAP_WORKING_SET_FLOOR_MB
|
| 297 |
+
// The WASM heap still has to fit the working set: KV cache,
|
| 298 |
+
// ggml compute buffers, and JS heap headroom. Roughly constant
|
| 299 |
+
// per inference regardless of model size at fixed n_ctx.
|
| 300 |
+
//
|
| 301 |
+
// Backwards-compat: if the second arg is a plain number, treat it as
|
| 302 |
+
// the legacy heap-only budget and apply the prior 1.5Γ sizeMB overhead.
|
| 303 |
+
// New callers should pass { gpuBudgetMB, heapBudgetMB }.
|
| 304 |
+
export function variantFits(sizeMB, budget) {
|
| 305 |
if (typeof sizeMB !== 'number' || sizeMB <= 0) return false;
|
| 306 |
+
|
| 307 |
+
if (typeof budget === 'number') {
|
| 308 |
+
return budget > 0 && sizeMB * 1.5 <= budget;
|
| 309 |
+
}
|
| 310 |
+
if (!budget || typeof budget !== 'object') return false;
|
| 311 |
+
|
| 312 |
+
const { gpuBudgetMB, heapBudgetMB } = budget;
|
| 313 |
+
if (typeof gpuBudgetMB !== 'number' || sizeMB + GPU_VARIANT_OVERHEAD_MB > gpuBudgetMB) {
|
| 314 |
+
return false;
|
| 315 |
+
}
|
| 316 |
+
if (typeof heapBudgetMB !== 'number' || heapBudgetMB < HEAP_WORKING_SET_FLOOR_MB) {
|
| 317 |
+
return false;
|
| 318 |
+
}
|
| 319 |
+
return true;
|
| 320 |
}
|
| 321 |
|
| 322 |
export async function describeDevice() {
|
js/run/source.js
CHANGED
|
@@ -56,7 +56,12 @@ export function localSource() {
|
|
| 56 |
|
| 57 |
// ββββββββββββββββ hosted / OPFS ββββββββββββββββ
|
| 58 |
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
async function getOpfsRoot() {
|
| 62 |
if (!navigator.storage?.getDirectory) {
|
|
@@ -100,14 +105,16 @@ export function hostedSource() {
|
|
| 100 |
// path: the worker opens a sync access handle on this FileHandle and
|
| 101 |
// routes MEMFS reads through it, never copying the model into the
|
| 102 |
// WASM heap. onProgress is called during the download leg with
|
| 103 |
-
// (fraction, downloaded, total).
|
|
|
|
|
|
|
| 104 |
async opfsHandleForModel(repo, file, onProgress) {
|
| 105 |
const cached = await getOpfsFileHandle(repo, file, { create: false }).catch(() => null);
|
| 106 |
if (cached) {
|
| 107 |
const f = await cached.getFile();
|
| 108 |
if (f.size > 0) {
|
| 109 |
onProgress?.(1, f.size, f.size);
|
| 110 |
-
return { handle: cached, size: f.size };
|
| 111 |
}
|
| 112 |
}
|
| 113 |
|
|
@@ -136,7 +143,7 @@ export function hostedSource() {
|
|
| 136 |
if (contentLength > 0) onProgress?.(downloaded / contentLength, downloaded, contentLength);
|
| 137 |
}
|
| 138 |
await writable.close();
|
| 139 |
-
return { handle, size: downloaded };
|
| 140 |
} catch (err) {
|
| 141 |
try { await writable.abort(err); } catch { /* ignore */ }
|
| 142 |
throw err;
|
|
|
|
| 56 |
|
| 57 |
// ββββββββββββββββ hosted / OPFS ββββββββββββββββ
|
| 58 |
|
| 59 |
+
// Exported so bench-worker.js can re-resolve the OPFS file handle inside
|
| 60 |
+
// the worker. We can't transfer FileSystemFileHandle directly across
|
| 61 |
+
// postMessage on every browser (iOS Safari structured-clone is missing
|
| 62 |
+
// the implementation), so instead we send the layout key (rootDir +
|
| 63 |
+
// repo segments + filename) and let the worker open it itself.
|
| 64 |
+
export const OPFS_ROOT_NAME = 'models';
|
| 65 |
|
| 66 |
async function getOpfsRoot() {
|
| 67 |
if (!navigator.storage?.getDirectory) {
|
|
|
|
| 105 |
// path: the worker opens a sync access handle on this FileHandle and
|
| 106 |
// routes MEMFS reads through it, never copying the model into the
|
| 107 |
// WASM heap. onProgress is called during the download leg with
|
| 108 |
+
// (fraction, downloaded, total). The returned `wasDownloaded` flag
|
| 109 |
+
// distinguishes a fresh download from a cache hit so the caller can
|
| 110 |
+
// decide whether to mark the variant for post-run eviction.
|
| 111 |
async opfsHandleForModel(repo, file, onProgress) {
|
| 112 |
const cached = await getOpfsFileHandle(repo, file, { create: false }).catch(() => null);
|
| 113 |
if (cached) {
|
| 114 |
const f = await cached.getFile();
|
| 115 |
if (f.size > 0) {
|
| 116 |
onProgress?.(1, f.size, f.size);
|
| 117 |
+
return { handle: cached, size: f.size, wasDownloaded: false };
|
| 118 |
}
|
| 119 |
}
|
| 120 |
|
|
|
|
| 143 |
if (contentLength > 0) onProgress?.(downloaded / contentLength, downloaded, contentLength);
|
| 144 |
}
|
| 145 |
await writable.close();
|
| 146 |
+
return { handle, size: downloaded, wasDownloaded: true };
|
| 147 |
} catch (err) {
|
| 148 |
try { await writable.abort(err); } catch { /* ignore */ }
|
| 149 |
throw err;
|