GitHub Actions commited on
Commit
abf5e96
Β·
1 Parent(s): 4aa88a2

sync from abhijitramesh/webgpu-bench@93d50bbc3c

Browse files
Files changed (1) hide show
  1. js/run/device.js +52 -48
js/run/device.js CHANGED
@@ -25,36 +25,48 @@ const DEFAULT_BUDGET_MB = 2 * 1024;
25
  const HOSTED_QUOTA_FRACTION = 0.4;
26
  const HOSTED_QUOTA_CAP_MB = 8 * 1024;
27
 
28
- // Mobile per-tab budgets. iOS Jetsam reaps the tab when the total of
29
- // (WASM heap + WebGPU buffers + JS heap + native overhead) crosses the
30
- // device's threshold β€” heap and GPU memory aren't separately budgeted
31
- // the way they are on desktop, they share one tab-process pool. So on
32
- // mobile we use a single budget number for both heapBudgetMB and
33
- // gpuBudgetMB; variantFits then checks the model fits within that
34
- // shared pool.
35
  //
36
- // Numbers below are derived from public reports / Apple docs:
 
 
 
 
 
 
37
  //
38
- // - iPhone WASM practical limit: 300–450 MB
 
 
39
  // lapcatsoftware.com/articles/2026/1/7.html
40
  // news.ycombinator.com/item?id=39039593
41
  // github.com/emscripten-core/emscripten/issues/19374
42
- // github.com/godotengine/godot/issues/70621 (Godot reduced
43
- // WASM_MEM_MAX to 256 MB to eliminate iOS OOM)
44
  //
45
  // - iOS Safari WebGPU maxBufferSize: 256 MB on iPhone 6 / older,
46
  // 993 MB on iPad Pro M-class. Per-buffer cap, not total.
47
  // Apple WWDC 2025 "Unlock GPU computing with WebGPU"
48
  //
49
- // - iPhone 12 Pro reports OOM around 1.5–3 GB but this is the upper
50
- // bound; Jetsam typically intervenes much earlier.
51
  // developer.apple.com/forums/thread/761666
52
  //
53
- // We undershoot to leave margin for KV cache + compute scratch +
54
- // JS/native overhead inside the same budget.
55
- const IPHONE_TAB_BUDGET_MB = 450;
56
- const IPAD_TAB_BUDGET_MB = 1500;
57
- const ANDROID_TAB_BUDGET_MB = 800;
 
 
 
 
 
 
 
58
 
59
  function detectMobileFamily() {
60
  if (typeof navigator === 'undefined') return null;
@@ -69,11 +81,11 @@ function detectMobileFamily() {
69
  return null;
70
  }
71
 
72
- function getMobileTabBudgetMB(family) {
73
- if (family === 'ipad') return IPAD_TAB_BUDGET_MB;
74
- if (family === 'iphone') return IPHONE_TAB_BUDGET_MB;
75
- if (family === 'android') return ANDROID_TAB_BUDGET_MB;
76
- return IPHONE_TAB_BUDGET_MB; // safest default
77
  }
78
 
79
  const PROBE_TIMEOUT_MS = 15_000;
@@ -81,14 +93,10 @@ const GPU_PROBE_STEP_MB = 256;
81
  const GPU_PROBE_MAX_MB = 8 * 1024;
82
  const GPU_PROBE_TIMEOUT_MS = 8_000;
83
 
84
- // Working-set floor in the WASM heap on desktop. KV cache + compute
85
- // buffers + JS heap headroom for a typical 1B model at n_ctx=2048 add
86
- // up to ~400 MB; we require 500 to leave a margin. On mobile the
87
- // heapBudgetMB === gpuBudgetMB === tabBudget so this floor is checked
88
- // against the same number β€” works as long as tabBudget β‰₯ 500, but iPhone
89
- // is below that, which means iPhone always fails this check. That's
90
- // intentional: variantFits also has the GPU-fit check, and the floor
91
- // here is just preventing absurdly-tiny working sets.
92
  const HEAP_WORKING_SET_FLOOR_MB = 256;
93
 
94
  // Per-variant overhead added on top of the model file size when checking
@@ -260,23 +268,19 @@ async function _computeBudget() {
260
  const mobileFamily = detectMobileFamily();
261
  const isMobile = mobileFamily !== null;
262
 
263
- // ── Mobile path: single shared tab budget ──
264
- // iOS Jetsam (and Android equivalents) cap the entire tab process β€”
265
- // WASM heap + WebGPU buffers + JS heap + native overhead all draw from
266
- // one pool. The heap probe and GPU probe both burn capacity from that
267
- // same pool and can themselves trip Jetsam (the GPU probe especially
268
- // β€” see commit history for the refresh-loop bug). So on mobile we
269
- // skip both probes and use a fixed per-device-class budget derived
270
- // from public reports of WebKit/Jetsam thresholds (see comments at
271
- // top of this file). heapBudgetMB === gpuBudgetMB === tabBudget; the
272
- // variantFits check ends up being effectively `model + overhead ≀
273
- // tabBudget`.
274
  if (isMobile) {
275
- const tabBudget = getMobileTabBudgetMB(mobileFamily);
276
  return {
277
- budgetMB: tabBudget,
278
- gpuBudgetMB: tabBudget,
279
- heapBudgetMB: tabBudget,
280
  memGB,
281
  quotaMB,
282
  probedMB: 0,
@@ -285,8 +289,8 @@ async function _computeBudget() {
285
  gpuProbeError: 'skipped on mobile (probes can themselves trip Jetsam)',
286
  isMobile: true,
287
  mobileFamily,
288
- source: `mobile shared tab budget β€” ${mobileFamily} (${tabBudget} MB total for heap + GPU)`,
289
- heapSource: 'same pool as GPU on mobile',
290
  };
291
  }
292
 
 
25
  const HOSTED_QUOTA_FRACTION = 0.4;
26
  const HOSTED_QUOTA_CAP_MB = 8 * 1024;
27
 
28
+ // Mobile per-device budgets. Two independent caps, mirroring the desktop
29
+ // path β€” model weights stream from OPFS into WebGPU buffers (see
30
+ // bench-worker.js:patchMEMFS / opfsAlloc), so the model size constrains
31
+ // `gpuBudgetMB`, not `heapBudgetMB`. The WASM heap only has to hold the
32
+ // working set (KV cache + ggml compute scratch + JS heap headroom).
 
 
33
  //
34
+ // Earlier we collapsed both into a single tab budget on the theory that
35
+ // iOS Jetsam treats the whole tab process as one pool, so any allocation
36
+ // counts the same. That's true for Jetsam β€” but it conflates *where* the
37
+ // memory lives with *how much* the platform can hand out: the WASM heap
38
+ // has a much tighter practical ceiling than the GPU side, and counting
39
+ // model bytes against the heap ceiling rejected models that load fine
40
+ // via OPFS streaming.
41
  //
42
+ // Numbers come from public reports / Apple docs:
43
+ //
44
+ // - iPhone WASM practical limit: 300–450 MB β†’ heap budget
45
  // lapcatsoftware.com/articles/2026/1/7.html
46
  // news.ycombinator.com/item?id=39039593
47
  // github.com/emscripten-core/emscripten/issues/19374
48
+ // github.com/godotengine/godot/issues/70621
 
49
  //
50
  // - iOS Safari WebGPU maxBufferSize: 256 MB on iPhone 6 / older,
51
  // 993 MB on iPad Pro M-class. Per-buffer cap, not total.
52
  // Apple WWDC 2025 "Unlock GPU computing with WebGPU"
53
  //
54
+ // - iPhone 12 Pro reports tab OOM around 1.5–3 GB; Jetsam intervenes
55
+ // earlier under pressure. We undershoot the lower bound for headroom.
56
  // developer.apple.com/forums/thread/761666
57
  //
58
+ // Heap budgets = WASM heap practical limits.
59
+ const IPHONE_HEAP_BUDGET_MB = 450;
60
+ const IPAD_HEAP_BUDGET_MB = 1500;
61
+ const ANDROID_HEAP_BUDGET_MB = 800;
62
+
63
+ // GPU budgets = available GPU-buffer capacity for model weights + KV
64
+ // mirror, sized below the Jetsam tab ceiling minus working-set headroom.
65
+ // We can't probe on mobile (the GPU probe itself trips Jetsam β€” see
66
+ // commit 4f567a5), so these are static per-family estimates.
67
+ const IPHONE_GPU_BUDGET_MB = 1200;
68
+ const IPAD_GPU_BUDGET_MB = 2500;
69
+ const ANDROID_GPU_BUDGET_MB = 1500;
70
 
71
  function detectMobileFamily() {
72
  if (typeof navigator === 'undefined') return null;
 
81
  return null;
82
  }
83
 
84
+ function getMobileBudgetMB(family) {
85
+ if (family === 'ipad') return { heap: IPAD_HEAP_BUDGET_MB, gpu: IPAD_GPU_BUDGET_MB };
86
+ if (family === 'iphone') return { heap: IPHONE_HEAP_BUDGET_MB, gpu: IPHONE_GPU_BUDGET_MB };
87
+ if (family === 'android') return { heap: ANDROID_HEAP_BUDGET_MB, gpu: ANDROID_GPU_BUDGET_MB };
88
+ return { heap: IPHONE_HEAP_BUDGET_MB, gpu: IPHONE_GPU_BUDGET_MB }; // safest default
89
  }
90
 
91
  const PROBE_TIMEOUT_MS = 15_000;
 
93
  const GPU_PROBE_MAX_MB = 8 * 1024;
94
  const GPU_PROBE_TIMEOUT_MS = 8_000;
95
 
96
+ // Working-set floor in the WASM heap. KV cache + compute buffers + JS
97
+ // heap headroom for a typical 1B model at n_ctx=2048 add up to a few
98
+ // hundred MB. Floor at 256 so an absurdly-tiny heap (or a probe failure
99
+ // that returned 0) doesn't pass variantFits.
 
 
 
 
100
  const HEAP_WORKING_SET_FLOOR_MB = 256;
101
 
102
  // Per-variant overhead added on top of the model file size when checking
 
268
  const mobileFamily = detectMobileFamily();
269
  const isMobile = mobileFamily !== null;
270
 
271
+ // ── Mobile path: static per-family budgets, separate heap and GPU ──
272
+ // Same shape as desktop (independent gpuBudgetMB / heapBudgetMB) so
273
+ // variantFits checks `model + overhead ≀ gpuBudget` against the GPU-
274
+ // resident weights and `heapBudget β‰₯ working-set floor` against the
275
+ // WASM-heap working set. We can't probe on mobile (both probes can
276
+ // themselves trip Jetsam β€” see commits 4f567a5 and 6f33b5d), so we use
277
+ // researched per-family numbers from the constants block above.
 
 
 
 
278
  if (isMobile) {
279
+ const { heap: heapBudgetMB, gpu: gpuBudgetMB } = getMobileBudgetMB(mobileFamily);
280
  return {
281
+ budgetMB: gpuBudgetMB,
282
+ gpuBudgetMB,
283
+ heapBudgetMB,
284
  memGB,
285
  quotaMB,
286
  probedMB: 0,
 
289
  gpuProbeError: 'skipped on mobile (probes can themselves trip Jetsam)',
290
  isMobile: true,
291
  mobileFamily,
292
+ source: `mobile static budget β€” ${mobileFamily} (GPU ${gpuBudgetMB} MB for OPFS-streamed weights)`,
293
+ heapSource: `mobile static budget β€” ${mobileFamily} (WASM heap ${heapBudgetMB} MB for KV + compute scratch)`,
294
  };
295
  }
296