GitHub Actions commited on
Commit
fad27cb
Β·
1 Parent(s): 07d904e

sync from abhijitramesh/webgpu-bench@28c26b866a

Browse files
Files changed (2) hide show
  1. js/run/controller.js +3 -0
  2. js/run/device.js +114 -18
js/run/controller.js CHANGED
@@ -1275,6 +1275,9 @@ async function onRunClick({ studyMode = false } = {}) {
1275
  `${(MOBILE_YIELD_BETWEEN_RUNS_MS / 1000).toFixed(1)} s cooldown between runs ` +
1276
  'so iOS can release WebGPU buffers before the next load.',
1277
  );
 
 
 
1278
  }
1279
 
1280
  const machine = await machineInfo();
 
1275
  `${(MOBILE_YIELD_BETWEEN_RUNS_MS / 1000).toFixed(1)} s cooldown between runs ` +
1276
  'so iOS can release WebGPU buffers before the next load.',
1277
  );
1278
+ if (state.budget?.source) {
1279
+ logLine(`GPU budget: ${state.budget.source}`);
1280
+ }
1281
  }
1282
 
1283
  const machine = await machineInfo();
js/run/device.js CHANGED
@@ -62,19 +62,52 @@ const ANDROID_HEAP_BUDGET_MB = 800;
62
 
63
  // GPU budgets = available GPU-buffer capacity for model weights + KV
64
  // mirror, sized below the Jetsam tab ceiling minus working-set headroom.
65
- // We can't probe on mobile (the GPU probe itself trips Jetsam β€” see
66
- // commit 4f567a5), so these are static per-family estimates.
 
 
67
  //
68
  // iPhone: empirical β€” 1200 MB caused tab reloads on first variant of a
69
  // Run study (Llama-3.2-1B Q2_K, 554 MB) on iPhone 17 Pro Max. 700 MB
70
  // keeps Llama-1B variants out of variantFits while still allowing the
71
  // 250–500 MB tier (gemma-3-270m Q8, Qwen3-0.6B Q4, etc.) β€” the band
72
- // that was missing under the old 450 MB shared cap. Bump back up only
73
- // when we have data showing a specific device tolerates more.
74
  const IPHONE_GPU_BUDGET_MB = 700;
75
  const IPAD_GPU_BUDGET_MB = 2500;
76
  const ANDROID_GPU_BUDGET_MB = 1500;
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  function detectMobileFamily() {
79
  if (typeof navigator === 'undefined') return null;
80
  const ua = navigator.userAgent || '';
@@ -172,6 +205,7 @@ export async function probeGpuBudgetMB({
172
  stepMB = GPU_PROBE_STEP_MB,
173
  maxMB = GPU_PROBE_MAX_MB,
174
  timeoutMs = GPU_PROBE_TIMEOUT_MS,
 
175
  } = {}) {
176
  if (!navigator.gpu) {
177
  return { probedMB: 0, error: 'WebGPU not available' };
@@ -237,8 +271,11 @@ export async function probeGpuBudgetMB({
237
  buffers.push(buffer);
238
  totalBytes += stepBytes;
239
 
240
- // Yield so we don't starve the main thread / GC.
241
- await new Promise((r) => setTimeout(r, 0));
 
 
 
242
  }
243
  } finally {
244
  for (const b of buffers) {
@@ -275,15 +312,74 @@ async function _computeBudget() {
275
  const mobileFamily = detectMobileFamily();
276
  const isMobile = mobileFamily !== null;
277
 
278
- // ── Mobile path: static per-family budgets, separate heap and GPU ──
279
- // Same shape as desktop (independent gpuBudgetMB / heapBudgetMB) so
280
- // variantFits checks `model + overhead ≀ gpuBudget` against the GPU-
281
- // resident weights and `heapBudget β‰₯ working-set floor` against the
282
- // WASM-heap working set. We can't probe on mobile (both probes can
283
- // themselves trip Jetsam β€” see commits 4f567a5 and 6f33b5d), so we use
284
- // researched per-family numbers from the constants block above.
 
 
 
 
 
 
 
 
285
  if (isMobile) {
286
- const { heap: heapBudgetMB, gpu: gpuBudgetMB } = getMobileBudgetMB(mobileFamily);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
  return {
288
  budgetMB: gpuBudgetMB,
289
  gpuBudgetMB,
@@ -291,12 +387,12 @@ async function _computeBudget() {
291
  memGB,
292
  quotaMB,
293
  probedMB: 0,
294
- gpuProbedMB: 0,
295
- probeError: 'skipped on mobile (probes can themselves trip Jetsam)',
296
- gpuProbeError: 'skipped on mobile (probes can themselves trip Jetsam)',
297
  isMobile: true,
298
  mobileFamily,
299
- source: `mobile static budget β€” ${mobileFamily} (GPU ${gpuBudgetMB} MB for OPFS-streamed weights)`,
300
  heapSource: `mobile static budget β€” ${mobileFamily} (WASM heap ${heapBudgetMB} MB for KV + compute scratch)`,
301
  };
302
  }
 
62
 
63
  // GPU budgets = available GPU-buffer capacity for model weights + KV
64
  // mirror, sized below the Jetsam tab ceiling minus working-set headroom.
65
+ // These are *fallback* values. On mobile we run a bounded GPU probe
66
+ // (capped well below the Jetsam ceiling, with yields between steps) and
67
+ // only fall back to the static value when the probe trips, returns less
68
+ // than the static floor, or maxBufferSize is too small to bother.
69
  //
70
  // iPhone: empirical β€” 1200 MB caused tab reloads on first variant of a
71
  // Run study (Llama-3.2-1B Q2_K, 554 MB) on iPhone 17 Pro Max. 700 MB
72
  // keeps Llama-1B variants out of variantFits while still allowing the
73
  // 250–500 MB tier (gemma-3-270m Q8, Qwen3-0.6B Q4, etc.) β€” the band
74
+ // that was missing under the old 450 MB shared cap.
 
75
  const IPHONE_GPU_BUDGET_MB = 700;
76
  const IPAD_GPU_BUDGET_MB = 2500;
77
  const ANDROID_GPU_BUDGET_MB = 1500;
78
 
79
+ // Bounded mobile GPU probe β€” small steps + yields keep allocation rate
80
+ // below the spike threshold that triggers Jetsam, and a tier-based hard
81
+ // cap keeps the probe ceiling well below the device's known crash point.
82
+ const MOBILE_PROBE_STEP_MB = 128;
83
+ const MOBILE_PROBE_TIMEOUT_MS = 10_000;
84
+ const MOBILE_PROBE_YIELD_MS = 50;
85
+ const MOBILE_PROBE_SAFETY_MARGIN_MB = 150;
86
+
87
+ // Probe ceiling per family Γ— maxBufferSize tier. Caps are deliberately
88
+ // conservative β€” a probe that completes successfully gives `cap - margin`,
89
+ // while a probe that OOMs partway gives `probed - margin`. We never
90
+ // exceed `cap`, so even a successful probe sits below the empirical
91
+ // crash point on the worst-case device we've seen for that tier.
92
+ function getMobileProbeCapMB(family, maxBufferSizeMB) {
93
+ if (family === 'iphone') {
94
+ if (maxBufferSizeMB >= 900) return 1000;
95
+ if (maxBufferSizeMB >= 500) return 800;
96
+ return 400;
97
+ }
98
+ if (family === 'ipad') {
99
+ if (maxBufferSizeMB >= 900) return 3000;
100
+ if (maxBufferSizeMB >= 500) return 1800;
101
+ return 1000;
102
+ }
103
+ if (family === 'android') {
104
+ if (maxBufferSizeMB >= 900) return 2000;
105
+ if (maxBufferSizeMB >= 500) return 1500;
106
+ return 800;
107
+ }
108
+ return 700;
109
+ }
110
+
111
  function detectMobileFamily() {
112
  if (typeof navigator === 'undefined') return null;
113
  const ua = navigator.userAgent || '';
 
205
  stepMB = GPU_PROBE_STEP_MB,
206
  maxMB = GPU_PROBE_MAX_MB,
207
  timeoutMs = GPU_PROBE_TIMEOUT_MS,
208
+ yieldMs = 0,
209
  } = {}) {
210
  if (!navigator.gpu) {
211
  return { probedMB: 0, error: 'WebGPU not available' };
 
271
  buffers.push(buffer);
272
  totalBytes += stepBytes;
273
 
274
+ // Yield so we don't starve the main thread / GC. On mobile a
275
+ // longer yield also gives the OS a chance to update its memory
276
+ // accounting between steps so a fast burst doesn't look like a
277
+ // spike to Jetsam.
278
+ await new Promise((r) => setTimeout(r, yieldMs));
279
  }
280
  } finally {
281
  for (const b of buffers) {
 
312
  const mobileFamily = detectMobileFamily();
313
  const isMobile = mobileFamily !== null;
314
 
315
+ // ── Mobile path: static heap budget, bounded GPU probe ──
316
+ //
317
+ // Heap stays static β€” the heap probe itself can trip Jetsam (commit
318
+ // 6f33b5d), and the working-set floor matters more than a precise
319
+ // number anyway.
320
+ //
321
+ // GPU runs a *bounded* probe: we read maxBufferSize from the adapter
322
+ // (free, no allocation), pick a per-tier hard cap from
323
+ // getMobileProbeCapMB, then probe with small 128 MB steps and 50 ms
324
+ // yields up to that cap. This gives us a real measurement on capable
325
+ // devices (e.g. iPhone 17 Pro Max gets ~850 MB instead of the 700 MB
326
+ // static fallback) without risking the unbounded behavior that tripped
327
+ // Jetsam in commit 4f567a5. If the probe OOMs partway, we use
328
+ // `probed - margin`. If it returns less than the static fallback or
329
+ // fails entirely, we use the static fallback.
330
  if (isMobile) {
331
+ const { heap: heapBudgetMB, gpu: staticGpuBudgetMB } = getMobileBudgetMB(mobileFamily);
332
+
333
+ // Read adapter limits without allocating a device buffer.
334
+ let maxBufferSizeMB = 0;
335
+ let adapterReadError = null;
336
+ try {
337
+ if (navigator.gpu) {
338
+ const adapter = await navigator.gpu.requestAdapter();
339
+ const lim = adapter?.limits?.maxBufferSize;
340
+ if (typeof lim === 'number') {
341
+ maxBufferSizeMB = Math.floor(lim / (1024 * 1024));
342
+ }
343
+ } else {
344
+ adapterReadError = 'WebGPU not available';
345
+ }
346
+ } catch (err) {
347
+ adapterReadError = err.message;
348
+ }
349
+
350
+ const probeCap = getMobileProbeCapMB(mobileFamily, maxBufferSizeMB);
351
+ const gpuProbe = await probeGpuBudgetMB({
352
+ stepMB: MOBILE_PROBE_STEP_MB,
353
+ maxMB: probeCap,
354
+ timeoutMs: MOBILE_PROBE_TIMEOUT_MS,
355
+ yieldMs: MOBILE_PROBE_YIELD_MS,
356
+ });
357
+
358
+ const margined = gpuProbe.probedMB - MOBILE_PROBE_SAFETY_MARGIN_MB;
359
+ let gpuBudgetMB;
360
+ let source;
361
+ if (gpuProbe.probedMB > 0 && margined > staticGpuBudgetMB) {
362
+ gpuBudgetMB = margined;
363
+ const hitCap = gpuProbe.probedMB + MOBILE_PROBE_STEP_MB > probeCap;
364
+ const detail = hitCap
365
+ ? `hit cap ${probeCap} MB`
366
+ : `stopped at ${gpuProbe.probedMB} MB (OOM)`;
367
+ source = `mobile probe β€” ${mobileFamily}, ${detail}, using ${gpuBudgetMB} MB (βˆ’ ${MOBILE_PROBE_SAFETY_MARGIN_MB} MB margin)`;
368
+ } else {
369
+ gpuBudgetMB = staticGpuBudgetMB;
370
+ if (gpuProbe.probedMB > 0) {
371
+ source = `mobile probe β€” ${mobileFamily}, only ${gpuProbe.probedMB} MB measured (below static floor), using static ${staticGpuBudgetMB} MB`;
372
+ } else {
373
+ source = `mobile probe failed (${gpuProbe.error || 'unknown'}), using static ${staticGpuBudgetMB} MB for ${mobileFamily}`;
374
+ }
375
+ }
376
+
377
+ const adapterDetail = adapterReadError
378
+ ? ` (adapter read failed: ${adapterReadError})`
379
+ : maxBufferSizeMB > 0
380
+ ? ` (maxBufferSize ${maxBufferSizeMB} MB β†’ probe cap ${probeCap} MB)`
381
+ : '';
382
+
383
  return {
384
  budgetMB: gpuBudgetMB,
385
  gpuBudgetMB,
 
387
  memGB,
388
  quotaMB,
389
  probedMB: 0,
390
+ gpuProbedMB: gpuProbe.probedMB,
391
+ probeError: 'skipped on mobile (heap probe can trip Jetsam)',
392
+ gpuProbeError: gpuProbe.error || null,
393
  isMobile: true,
394
  mobileFamily,
395
+ source: source + adapterDetail,
396
  heapSource: `mobile static budget β€” ${mobileFamily} (WASM heap ${heapBudgetMB} MB for KV + compute scratch)`,
397
  };
398
  }