GitHub Actions commited on
Commit
a474e4b
Β·
1 Parent(s): ba19f79

sync from abhijitramesh/webgpu-bench@f59fbdb982

Browse files
Files changed (1) hide show
  1. js/run/device.js +41 -123
js/run/device.js CHANGED
@@ -62,59 +62,28 @@ const ANDROID_HEAP_BUDGET_MB = 800;
62
 
63
  // GPU budgets = available GPU-buffer capacity for model weights + KV
64
  // mirror, sized below the Jetsam tab ceiling minus working-set headroom.
65
- // These are *fallback* values. On mobile we run a bounded GPU probe
66
- // (capped well below the Jetsam ceiling, with yields between steps) and
67
- // only fall back to the static value when the probe trips, returns less
68
- // than the static floor, or maxBufferSize is too small to bother.
 
 
 
69
  //
70
  // iPhone: empirical β€” 1200 MB caused tab reloads on first variant of a
71
  // Run study (Llama-3.2-1B Q2_K, 554 MB) on iPhone 17 Pro Max. 700 MB
72
  // keeps Llama-1B variants out of variantFits while still allowing the
73
- // 250–500 MB tier (gemma-3-270m Q8, Qwen3-0.6B Q4, etc.) β€” the band
74
- // that was missing under the old 450 MB shared cap.
 
 
 
 
 
75
  const IPHONE_GPU_BUDGET_MB = 700;
76
- const IPAD_GPU_BUDGET_MB = 2500;
77
  const ANDROID_GPU_BUDGET_MB = 1500;
78
 
79
- // Bounded mobile GPU probe β€” small steps + yields keep allocation rate
80
- // below the spike threshold that triggers Jetsam, and a tier-based hard
81
- // cap keeps the probe ceiling well below the device's known crash point.
82
- const MOBILE_PROBE_STEP_MB = 128;
83
- const MOBILE_PROBE_TIMEOUT_MS = 10_000;
84
- const MOBILE_PROBE_YIELD_MS = 50;
85
- const MOBILE_PROBE_SAFETY_MARGIN_MB = 150;
86
-
87
- // SessionStorage sentinel: written before the probe, cleared after. If
88
- // we see it on the next page load, the previous probe crashed the tab β€”
89
- // skip probing and use the static fallback so we don't loop forever
90
- // with the user staring at a tab that keeps reloading. Cleared at end
91
- // of probe so subsequent loads in the same session re-probe normally.
92
- const MOBILE_PROBE_SENTINEL_KEY = 'webgpu-bench:mobile-gpu-probe-in-progress';
93
-
94
- // Probe ceiling per family Γ— maxBufferSize tier. Caps are deliberately
95
- // conservative β€” a probe that completes successfully gives `cap - margin`,
96
- // while a probe that OOMs partway gives `probed - margin`. We never
97
- // exceed `cap`, so even a successful probe sits below the empirical
98
- // crash point on the worst-case device we've seen for that tier.
99
- function getMobileProbeCapMB(family, maxBufferSizeMB) {
100
- if (family === 'iphone') {
101
- if (maxBufferSizeMB >= 900) return 1000;
102
- if (maxBufferSizeMB >= 500) return 800;
103
- return 400;
104
- }
105
- if (family === 'ipad') {
106
- if (maxBufferSizeMB >= 900) return 3000;
107
- if (maxBufferSizeMB >= 500) return 1800;
108
- return 1000;
109
- }
110
- if (family === 'android') {
111
- if (maxBufferSizeMB >= 900) return 2000;
112
- if (maxBufferSizeMB >= 500) return 1500;
113
- return 800;
114
- }
115
- return 700;
116
- }
117
-
118
  function detectMobileFamily() {
119
  if (typeof navigator === 'undefined') return null;
120
  const ua = navigator.userAgent || '';
@@ -319,25 +288,31 @@ async function _computeBudget() {
319
  const mobileFamily = detectMobileFamily();
320
  const isMobile = mobileFamily !== null;
321
 
322
- // ── Mobile path: static heap budget, bounded GPU probe ──
323
  //
324
- // Heap stays static β€” the heap probe itself can trip Jetsam (commit
325
- // 6f33b5d), and the working-set floor matters more than a precise
326
- // number anyway.
 
 
 
 
 
 
 
327
  //
328
- // GPU runs a *bounded* probe: we read maxBufferSize from the adapter
329
- // (free, no allocation), pick a per-tier hard cap from
330
- // getMobileProbeCapMB, then probe with small 128 MB steps and 50 ms
331
- // yields up to that cap. This gives us a real measurement on capable
332
- // devices (e.g. iPhone 17 Pro Max gets ~850 MB instead of the 700 MB
333
- // static fallback) without risking the unbounded behavior that tripped
334
- // Jetsam in commit 4f567a5. If the probe OOMs partway, we use
335
- // `probed - margin`. If it returns less than the static fallback or
336
- // fails entirely, we use the static fallback.
337
  if (isMobile) {
338
- const { heap: heapBudgetMB, gpu: staticGpuBudgetMB } = getMobileBudgetMB(mobileFamily);
339
 
340
- // Read adapter limits without allocating a device buffer.
 
341
  let maxBufferSizeMB = 0;
342
  let adapterReadError = null;
343
  try {
@@ -354,67 +329,10 @@ async function _computeBudget() {
354
  adapterReadError = err.message;
355
  }
356
 
357
- const probeCap = getMobileProbeCapMB(mobileFamily, maxBufferSizeMB);
358
- const probeBestCase = probeCap - MOBILE_PROBE_SAFETY_MARGIN_MB;
359
-
360
- // Skip the probe if even a successful run can't beat the static
361
- // fallback β€” allocating ~probeCap of GPU buffers on a low-RAM iPhone
362
- // (e.g. iPhone 13 with 6 GB) can itself trip Jetsam, and there's
363
- // no payoff if we'd have used staticGpuBudgetMB regardless.
364
- const probeWorthIt = probeBestCase > staticGpuBudgetMB;
365
-
366
- // Crash-loop guard: if a previous probe in this session crashed the
367
- // tab, we never made it back to the post-probe sentinel clear, so the
368
- // sentinel is still set on this load. Skip the probe until the user
369
- // closes the tab (clears sessionStorage).
370
- let prevProbeCrashed = false;
371
- try {
372
- prevProbeCrashed = sessionStorage.getItem(MOBILE_PROBE_SENTINEL_KEY) === '1';
373
- } catch { /* sessionStorage may be disabled */ }
374
-
375
- let gpuProbe = { probedMB: 0, error: null };
376
- let probeSkipReason = null;
377
- if (prevProbeCrashed) {
378
- probeSkipReason = 'previous probe crashed tab';
379
- } else if (!probeWorthIt) {
380
- probeSkipReason = `probe ceiling ${probeBestCase} MB ≀ static ${staticGpuBudgetMB} MB`;
381
- } else {
382
- try { sessionStorage.setItem(MOBILE_PROBE_SENTINEL_KEY, '1'); } catch { /* noop */ }
383
- gpuProbe = await probeGpuBudgetMB({
384
- stepMB: MOBILE_PROBE_STEP_MB,
385
- maxMB: probeCap,
386
- timeoutMs: MOBILE_PROBE_TIMEOUT_MS,
387
- yieldMs: MOBILE_PROBE_YIELD_MS,
388
- });
389
- try { sessionStorage.removeItem(MOBILE_PROBE_SENTINEL_KEY); } catch { /* noop */ }
390
- }
391
-
392
- const margined = gpuProbe.probedMB - MOBILE_PROBE_SAFETY_MARGIN_MB;
393
- let gpuBudgetMB;
394
- let source;
395
- if (probeSkipReason) {
396
- gpuBudgetMB = staticGpuBudgetMB;
397
- source = `mobile probe skipped (${probeSkipReason}), using static ${staticGpuBudgetMB} MB for ${mobileFamily}`;
398
- } else if (gpuProbe.probedMB > 0 && margined > staticGpuBudgetMB) {
399
- gpuBudgetMB = margined;
400
- const hitCap = gpuProbe.probedMB + MOBILE_PROBE_STEP_MB > probeCap;
401
- const detail = hitCap
402
- ? `hit cap ${probeCap} MB`
403
- : `stopped at ${gpuProbe.probedMB} MB (OOM)`;
404
- source = `mobile probe β€” ${mobileFamily}, ${detail}, using ${gpuBudgetMB} MB (βˆ’ ${MOBILE_PROBE_SAFETY_MARGIN_MB} MB margin)`;
405
- } else {
406
- gpuBudgetMB = staticGpuBudgetMB;
407
- if (gpuProbe.probedMB > 0) {
408
- source = `mobile probe β€” ${mobileFamily}, only ${gpuProbe.probedMB} MB measured (below static floor), using static ${staticGpuBudgetMB} MB`;
409
- } else {
410
- source = `mobile probe failed (${gpuProbe.error || 'unknown'}), using static ${staticGpuBudgetMB} MB for ${mobileFamily}`;
411
- }
412
- }
413
-
414
  const adapterDetail = adapterReadError
415
  ? ` (adapter read failed: ${adapterReadError})`
416
  : maxBufferSizeMB > 0
417
- ? ` (maxBufferSize ${maxBufferSizeMB} MB β†’ probe cap ${probeCap} MB)`
418
  : '';
419
 
420
  return {
@@ -424,12 +342,12 @@ async function _computeBudget() {
424
  memGB,
425
  quotaMB,
426
  probedMB: 0,
427
- gpuProbedMB: gpuProbe.probedMB,
428
- probeError: 'skipped on mobile (heap probe can trip Jetsam)',
429
- gpuProbeError: gpuProbe.error || (probeSkipReason ? `skipped: ${probeSkipReason}` : null),
430
  isMobile: true,
431
  mobileFamily,
432
- source: source + adapterDetail,
433
  heapSource: `mobile static budget β€” ${mobileFamily} (WASM heap ${heapBudgetMB} MB for KV + compute scratch)`,
434
  };
435
  }
 
62
 
63
  // GPU budgets = available GPU-buffer capacity for model weights + KV
64
  // mirror, sized below the Jetsam tab ceiling minus working-set headroom.
65
+ // Static per-family numbers β€” we don't probe on mobile because the
66
+ // probe's allocation pulse itself triggers Jetsam on lower-RAM devices,
67
+ // and WebKit doesn't expose a signal that distinguishes (e.g.) iPhone 13
68
+ // from iPhone 17 Pro Max (same maxBufferSize 1024 MB on both, same
69
+ // deviceMemory clamp). See "mobile probe" history in git: bounded
70
+ // probe shipped, then disabled because the iPhone 13 / mid-RAM iPad
71
+ // classes still Jetsamed during or right after the probe pulse.
72
  //
73
  // iPhone: empirical β€” 1200 MB caused tab reloads on first variant of a
74
  // Run study (Llama-3.2-1B Q2_K, 554 MB) on iPhone 17 Pro Max. 700 MB
75
  // keeps Llama-1B variants out of variantFits while still allowing the
76
+ // 250–500 MB tier (gemma-3-270m Q8, Qwen3-0.6B Q4, etc.).
77
+ //
78
+ // iPad: empirical β€” 2500 MB Jetsamed on Llama-3.2-1B (likely Q4_K_M
79
+ // = 770 MB or Q8_0 = 1259 MB). 1500 MB excludes Llama-1B Q8_0 (1459 MB
80
+ // after overhead) but allows Q4_K_M (970 MB), keeping the standard
81
+ // study quant runnable. High-end iPad Pro M-class probably tolerates
82
+ // more, but we have no way to detect device class.
83
  const IPHONE_GPU_BUDGET_MB = 700;
84
+ const IPAD_GPU_BUDGET_MB = 1500;
85
  const ANDROID_GPU_BUDGET_MB = 1500;
86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  function detectMobileFamily() {
88
  if (typeof navigator === 'undefined') return null;
89
  const ua = navigator.userAgent || '';
 
288
  const mobileFamily = detectMobileFamily();
289
  const isMobile = mobileFamily !== null;
290
 
291
+ // ── Mobile path: pure static budgets ──
292
  //
293
+ // No probes on mobile. Both the heap probe and the GPU probe have been
294
+ // shown to themselves trigger Jetsam:
295
+ // - Heap probe: commit 6f33b5d (terminated worker).
296
+ // - GPU probe (unbounded): commit 4f567a5.
297
+ // - GPU probe (bounded): the 1000 MB peak allocation pulse on
298
+ // iPhone 13 / mid-RAM iPad classes still pushed the WebContent
299
+ // process over the Jetsam threshold during or right after the
300
+ // probe β€” even though the probe itself completed cleanly,
301
+ // subsequent OPFS writes hit "unknown transient" errors and the
302
+ // next inference allocation tipped the tab over.
303
  //
304
+ // We can't distinguish iPhone 13 (6 GB) from iPhone 17 Pro Max (12 GB)
305
+ // via WebGPU adapter info or navigator.deviceMemory, so we err on the
306
+ // side of the lower-RAM device. The budgets are tuned to admit the
307
+ // 250–500 MB tier (gemma-3-270m, Qwen3-0.6B, LFM2.5-350M) and to
308
+ // exclude variants that empirically caused crashes on the smallest
309
+ // device in each family. We still surface adapter.limits.maxBufferSize
310
+ // in the source string for diagnostics.
 
 
311
  if (isMobile) {
312
+ const { heap: heapBudgetMB, gpu: gpuBudgetMB } = getMobileBudgetMB(mobileFamily);
313
 
314
+ // Read adapter limits without allocating a device buffer β€” purely
315
+ // informational for the device card / log line.
316
  let maxBufferSizeMB = 0;
317
  let adapterReadError = null;
318
  try {
 
329
  adapterReadError = err.message;
330
  }
331
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
332
  const adapterDetail = adapterReadError
333
  ? ` (adapter read failed: ${adapterReadError})`
334
  : maxBufferSizeMB > 0
335
+ ? ` (maxBufferSize ${maxBufferSizeMB} MB)`
336
  : '';
337
 
338
  return {
 
342
  memGB,
343
  quotaMB,
344
  probedMB: 0,
345
+ gpuProbedMB: 0,
346
+ probeError: 'skipped on mobile (probes themselves trigger Jetsam)',
347
+ gpuProbeError: 'skipped on mobile (probes themselves trigger Jetsam)',
348
  isMobile: true,
349
  mobileFamily,
350
+ source: `mobile static budget β€” ${mobileFamily} (GPU ${gpuBudgetMB} MB for OPFS-streamed weights)${adapterDetail}`,
351
  heapSource: `mobile static budget β€” ${mobileFamily} (WASM heap ${heapBudgetMB} MB for KV + compute scratch)`,
352
  };
353
  }