Spaces:
Running
Running
GitHub Actions commited on
Commit Β·
fad27cb
1
Parent(s): 07d904e
sync from abhijitramesh/webgpu-bench@28c26b866a
Browse files- js/run/controller.js +3 -0
- js/run/device.js +114 -18
js/run/controller.js
CHANGED
|
@@ -1275,6 +1275,9 @@ async function onRunClick({ studyMode = false } = {}) {
|
|
| 1275 |
`${(MOBILE_YIELD_BETWEEN_RUNS_MS / 1000).toFixed(1)} s cooldown between runs ` +
|
| 1276 |
'so iOS can release WebGPU buffers before the next load.',
|
| 1277 |
);
|
|
|
|
|
|
|
|
|
|
| 1278 |
}
|
| 1279 |
|
| 1280 |
const machine = await machineInfo();
|
|
|
|
| 1275 |
`${(MOBILE_YIELD_BETWEEN_RUNS_MS / 1000).toFixed(1)} s cooldown between runs ` +
|
| 1276 |
'so iOS can release WebGPU buffers before the next load.',
|
| 1277 |
);
|
| 1278 |
+
if (state.budget?.source) {
|
| 1279 |
+
logLine(`GPU budget: ${state.budget.source}`);
|
| 1280 |
+
}
|
| 1281 |
}
|
| 1282 |
|
| 1283 |
const machine = await machineInfo();
|
js/run/device.js
CHANGED
|
@@ -62,19 +62,52 @@ const ANDROID_HEAP_BUDGET_MB = 800;
|
|
| 62 |
|
| 63 |
// GPU budgets = available GPU-buffer capacity for model weights + KV
|
| 64 |
// mirror, sized below the Jetsam tab ceiling minus working-set headroom.
|
| 65 |
-
//
|
| 66 |
-
//
|
|
|
|
|
|
|
| 67 |
//
|
| 68 |
// iPhone: empirical β 1200 MB caused tab reloads on first variant of a
|
| 69 |
// Run study (Llama-3.2-1B Q2_K, 554 MB) on iPhone 17 Pro Max. 700 MB
|
| 70 |
// keeps Llama-1B variants out of variantFits while still allowing the
|
| 71 |
// 250β500 MB tier (gemma-3-270m Q8, Qwen3-0.6B Q4, etc.) β the band
|
| 72 |
-
// that was missing under the old 450 MB shared cap.
|
| 73 |
-
// when we have data showing a specific device tolerates more.
|
| 74 |
const IPHONE_GPU_BUDGET_MB = 700;
|
| 75 |
const IPAD_GPU_BUDGET_MB = 2500;
|
| 76 |
const ANDROID_GPU_BUDGET_MB = 1500;
|
| 77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
function detectMobileFamily() {
|
| 79 |
if (typeof navigator === 'undefined') return null;
|
| 80 |
const ua = navigator.userAgent || '';
|
|
@@ -172,6 +205,7 @@ export async function probeGpuBudgetMB({
|
|
| 172 |
stepMB = GPU_PROBE_STEP_MB,
|
| 173 |
maxMB = GPU_PROBE_MAX_MB,
|
| 174 |
timeoutMs = GPU_PROBE_TIMEOUT_MS,
|
|
|
|
| 175 |
} = {}) {
|
| 176 |
if (!navigator.gpu) {
|
| 177 |
return { probedMB: 0, error: 'WebGPU not available' };
|
|
@@ -237,8 +271,11 @@ export async function probeGpuBudgetMB({
|
|
| 237 |
buffers.push(buffer);
|
| 238 |
totalBytes += stepBytes;
|
| 239 |
|
| 240 |
-
// Yield so we don't starve the main thread / GC.
|
| 241 |
-
|
|
|
|
|
|
|
|
|
|
| 242 |
}
|
| 243 |
} finally {
|
| 244 |
for (const b of buffers) {
|
|
@@ -275,15 +312,74 @@ async function _computeBudget() {
|
|
| 275 |
const mobileFamily = detectMobileFamily();
|
| 276 |
const isMobile = mobileFamily !== null;
|
| 277 |
|
| 278 |
-
// ββ Mobile path: static
|
| 279 |
-
//
|
| 280 |
-
//
|
| 281 |
-
//
|
| 282 |
-
//
|
| 283 |
-
//
|
| 284 |
-
//
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
if (isMobile) {
|
| 286 |
-
const { heap: heapBudgetMB, gpu:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
return {
|
| 288 |
budgetMB: gpuBudgetMB,
|
| 289 |
gpuBudgetMB,
|
|
@@ -291,12 +387,12 @@ async function _computeBudget() {
|
|
| 291 |
memGB,
|
| 292 |
quotaMB,
|
| 293 |
probedMB: 0,
|
| 294 |
-
gpuProbedMB:
|
| 295 |
-
probeError: 'skipped on mobile (
|
| 296 |
-
gpuProbeError:
|
| 297 |
isMobile: true,
|
| 298 |
mobileFamily,
|
| 299 |
-
source:
|
| 300 |
heapSource: `mobile static budget β ${mobileFamily} (WASM heap ${heapBudgetMB} MB for KV + compute scratch)`,
|
| 301 |
};
|
| 302 |
}
|
|
|
|
| 62 |
|
| 63 |
// GPU budgets = available GPU-buffer capacity for model weights + KV
|
| 64 |
// mirror, sized below the Jetsam tab ceiling minus working-set headroom.
|
| 65 |
+
// These are *fallback* values. On mobile we run a bounded GPU probe
|
| 66 |
+
// (capped well below the Jetsam ceiling, with yields between steps) and
|
| 67 |
+
// only fall back to the static value when the probe trips, returns less
|
| 68 |
+
// than the static floor, or maxBufferSize is too small to bother.
|
| 69 |
//
|
| 70 |
// iPhone: empirical β 1200 MB caused tab reloads on first variant of a
|
| 71 |
// Run study (Llama-3.2-1B Q2_K, 554 MB) on iPhone 17 Pro Max. 700 MB
|
| 72 |
// keeps Llama-1B variants out of variantFits while still allowing the
|
| 73 |
// 250β500 MB tier (gemma-3-270m Q8, Qwen3-0.6B Q4, etc.) β the band
|
| 74 |
+
// that was missing under the old 450 MB shared cap.
|
|
|
|
| 75 |
const IPHONE_GPU_BUDGET_MB = 700;
|
| 76 |
const IPAD_GPU_BUDGET_MB = 2500;
|
| 77 |
const ANDROID_GPU_BUDGET_MB = 1500;
|
| 78 |
|
| 79 |
+
// Bounded mobile GPU probe β small steps + yields keep allocation rate
|
| 80 |
+
// below the spike threshold that triggers Jetsam, and a tier-based hard
|
| 81 |
+
// cap keeps the probe ceiling well below the device's known crash point.
|
| 82 |
+
const MOBILE_PROBE_STEP_MB = 128;
|
| 83 |
+
const MOBILE_PROBE_TIMEOUT_MS = 10_000;
|
| 84 |
+
const MOBILE_PROBE_YIELD_MS = 50;
|
| 85 |
+
const MOBILE_PROBE_SAFETY_MARGIN_MB = 150;
|
| 86 |
+
|
| 87 |
+
// Probe ceiling per family Γ maxBufferSize tier. Caps are deliberately
|
| 88 |
+
// conservative β a probe that completes successfully gives `cap - margin`,
|
| 89 |
+
// while a probe that OOMs partway gives `probed - margin`. We never
|
| 90 |
+
// exceed `cap`, so even a successful probe sits below the empirical
|
| 91 |
+
// crash point on the worst-case device we've seen for that tier.
|
| 92 |
+
function getMobileProbeCapMB(family, maxBufferSizeMB) {
|
| 93 |
+
if (family === 'iphone') {
|
| 94 |
+
if (maxBufferSizeMB >= 900) return 1000;
|
| 95 |
+
if (maxBufferSizeMB >= 500) return 800;
|
| 96 |
+
return 400;
|
| 97 |
+
}
|
| 98 |
+
if (family === 'ipad') {
|
| 99 |
+
if (maxBufferSizeMB >= 900) return 3000;
|
| 100 |
+
if (maxBufferSizeMB >= 500) return 1800;
|
| 101 |
+
return 1000;
|
| 102 |
+
}
|
| 103 |
+
if (family === 'android') {
|
| 104 |
+
if (maxBufferSizeMB >= 900) return 2000;
|
| 105 |
+
if (maxBufferSizeMB >= 500) return 1500;
|
| 106 |
+
return 800;
|
| 107 |
+
}
|
| 108 |
+
return 700;
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
function detectMobileFamily() {
|
| 112 |
if (typeof navigator === 'undefined') return null;
|
| 113 |
const ua = navigator.userAgent || '';
|
|
|
|
| 205 |
stepMB = GPU_PROBE_STEP_MB,
|
| 206 |
maxMB = GPU_PROBE_MAX_MB,
|
| 207 |
timeoutMs = GPU_PROBE_TIMEOUT_MS,
|
| 208 |
+
yieldMs = 0,
|
| 209 |
} = {}) {
|
| 210 |
if (!navigator.gpu) {
|
| 211 |
return { probedMB: 0, error: 'WebGPU not available' };
|
|
|
|
| 271 |
buffers.push(buffer);
|
| 272 |
totalBytes += stepBytes;
|
| 273 |
|
| 274 |
+
// Yield so we don't starve the main thread / GC. On mobile a
|
| 275 |
+
// longer yield also gives the OS a chance to update its memory
|
| 276 |
+
// accounting between steps so a fast burst doesn't look like a
|
| 277 |
+
// spike to Jetsam.
|
| 278 |
+
await new Promise((r) => setTimeout(r, yieldMs));
|
| 279 |
}
|
| 280 |
} finally {
|
| 281 |
for (const b of buffers) {
|
|
|
|
| 312 |
const mobileFamily = detectMobileFamily();
|
| 313 |
const isMobile = mobileFamily !== null;
|
| 314 |
|
| 315 |
+
// ββ Mobile path: static heap budget, bounded GPU probe ββ
|
| 316 |
+
//
|
| 317 |
+
// Heap stays static β the heap probe itself can trip Jetsam (commit
|
| 318 |
+
// 6f33b5d), and the working-set floor matters more than a precise
|
| 319 |
+
// number anyway.
|
| 320 |
+
//
|
| 321 |
+
// GPU runs a *bounded* probe: we read maxBufferSize from the adapter
|
| 322 |
+
// (free, no allocation), pick a per-tier hard cap from
|
| 323 |
+
// getMobileProbeCapMB, then probe with small 128 MB steps and 50 ms
|
| 324 |
+
// yields up to that cap. This gives us a real measurement on capable
|
| 325 |
+
// devices (e.g. iPhone 17 Pro Max gets ~850 MB instead of the 700 MB
|
| 326 |
+
// static fallback) without risking the unbounded behavior that tripped
|
| 327 |
+
// Jetsam in commit 4f567a5. If the probe OOMs partway, we use
|
| 328 |
+
// `probed - margin`. If it returns less than the static fallback or
|
| 329 |
+
// fails entirely, we use the static fallback.
|
| 330 |
if (isMobile) {
|
| 331 |
+
const { heap: heapBudgetMB, gpu: staticGpuBudgetMB } = getMobileBudgetMB(mobileFamily);
|
| 332 |
+
|
| 333 |
+
// Read adapter limits without allocating a device buffer.
|
| 334 |
+
let maxBufferSizeMB = 0;
|
| 335 |
+
let adapterReadError = null;
|
| 336 |
+
try {
|
| 337 |
+
if (navigator.gpu) {
|
| 338 |
+
const adapter = await navigator.gpu.requestAdapter();
|
| 339 |
+
const lim = adapter?.limits?.maxBufferSize;
|
| 340 |
+
if (typeof lim === 'number') {
|
| 341 |
+
maxBufferSizeMB = Math.floor(lim / (1024 * 1024));
|
| 342 |
+
}
|
| 343 |
+
} else {
|
| 344 |
+
adapterReadError = 'WebGPU not available';
|
| 345 |
+
}
|
| 346 |
+
} catch (err) {
|
| 347 |
+
adapterReadError = err.message;
|
| 348 |
+
}
|
| 349 |
+
|
| 350 |
+
const probeCap = getMobileProbeCapMB(mobileFamily, maxBufferSizeMB);
|
| 351 |
+
const gpuProbe = await probeGpuBudgetMB({
|
| 352 |
+
stepMB: MOBILE_PROBE_STEP_MB,
|
| 353 |
+
maxMB: probeCap,
|
| 354 |
+
timeoutMs: MOBILE_PROBE_TIMEOUT_MS,
|
| 355 |
+
yieldMs: MOBILE_PROBE_YIELD_MS,
|
| 356 |
+
});
|
| 357 |
+
|
| 358 |
+
const margined = gpuProbe.probedMB - MOBILE_PROBE_SAFETY_MARGIN_MB;
|
| 359 |
+
let gpuBudgetMB;
|
| 360 |
+
let source;
|
| 361 |
+
if (gpuProbe.probedMB > 0 && margined > staticGpuBudgetMB) {
|
| 362 |
+
gpuBudgetMB = margined;
|
| 363 |
+
const hitCap = gpuProbe.probedMB + MOBILE_PROBE_STEP_MB > probeCap;
|
| 364 |
+
const detail = hitCap
|
| 365 |
+
? `hit cap ${probeCap} MB`
|
| 366 |
+
: `stopped at ${gpuProbe.probedMB} MB (OOM)`;
|
| 367 |
+
source = `mobile probe β ${mobileFamily}, ${detail}, using ${gpuBudgetMB} MB (β ${MOBILE_PROBE_SAFETY_MARGIN_MB} MB margin)`;
|
| 368 |
+
} else {
|
| 369 |
+
gpuBudgetMB = staticGpuBudgetMB;
|
| 370 |
+
if (gpuProbe.probedMB > 0) {
|
| 371 |
+
source = `mobile probe β ${mobileFamily}, only ${gpuProbe.probedMB} MB measured (below static floor), using static ${staticGpuBudgetMB} MB`;
|
| 372 |
+
} else {
|
| 373 |
+
source = `mobile probe failed (${gpuProbe.error || 'unknown'}), using static ${staticGpuBudgetMB} MB for ${mobileFamily}`;
|
| 374 |
+
}
|
| 375 |
+
}
|
| 376 |
+
|
| 377 |
+
const adapterDetail = adapterReadError
|
| 378 |
+
? ` (adapter read failed: ${adapterReadError})`
|
| 379 |
+
: maxBufferSizeMB > 0
|
| 380 |
+
? ` (maxBufferSize ${maxBufferSizeMB} MB β probe cap ${probeCap} MB)`
|
| 381 |
+
: '';
|
| 382 |
+
|
| 383 |
return {
|
| 384 |
budgetMB: gpuBudgetMB,
|
| 385 |
gpuBudgetMB,
|
|
|
|
| 387 |
memGB,
|
| 388 |
quotaMB,
|
| 389 |
probedMB: 0,
|
| 390 |
+
gpuProbedMB: gpuProbe.probedMB,
|
| 391 |
+
probeError: 'skipped on mobile (heap probe can trip Jetsam)',
|
| 392 |
+
gpuProbeError: gpuProbe.error || null,
|
| 393 |
isMobile: true,
|
| 394 |
mobileFamily,
|
| 395 |
+
source: source + adapterDetail,
|
| 396 |
heapSource: `mobile static budget β ${mobileFamily} (WASM heap ${heapBudgetMB} MB for KV + compute scratch)`,
|
| 397 |
};
|
| 398 |
}
|