Spaces:
Running
Running
GitHub Actions commited on
Commit Β·
abf5e96
1
Parent(s): 4aa88a2
sync from abhijitramesh/webgpu-bench@93d50bbc3c
Browse files- js/run/device.js +52 -48
js/run/device.js
CHANGED
|
@@ -25,36 +25,48 @@ const DEFAULT_BUDGET_MB = 2 * 1024;
|
|
| 25 |
const HOSTED_QUOTA_FRACTION = 0.4;
|
| 26 |
const HOSTED_QUOTA_CAP_MB = 8 * 1024;
|
| 27 |
|
| 28 |
-
// Mobile per-
|
| 29 |
-
//
|
| 30 |
-
//
|
| 31 |
-
//
|
| 32 |
-
//
|
| 33 |
-
// gpuBudgetMB; variantFits then checks the model fits within that
|
| 34 |
-
// shared pool.
|
| 35 |
//
|
| 36 |
-
//
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
//
|
| 38 |
-
//
|
|
|
|
|
|
|
| 39 |
// lapcatsoftware.com/articles/2026/1/7.html
|
| 40 |
// news.ycombinator.com/item?id=39039593
|
| 41 |
// github.com/emscripten-core/emscripten/issues/19374
|
| 42 |
-
// github.com/godotengine/godot/issues/70621
|
| 43 |
-
// WASM_MEM_MAX to 256 MB to eliminate iOS OOM)
|
| 44 |
//
|
| 45 |
// - iOS Safari WebGPU maxBufferSize: 256 MB on iPhone 6 / older,
|
| 46 |
// 993 MB on iPad Pro M-class. Per-buffer cap, not total.
|
| 47 |
// Apple WWDC 2025 "Unlock GPU computing with WebGPU"
|
| 48 |
//
|
| 49 |
-
// - iPhone 12 Pro reports OOM around 1.5β3 GB
|
| 50 |
-
//
|
| 51 |
// developer.apple.com/forums/thread/761666
|
| 52 |
//
|
| 53 |
-
//
|
| 54 |
-
|
| 55 |
-
const
|
| 56 |
-
const
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
function detectMobileFamily() {
|
| 60 |
if (typeof navigator === 'undefined') return null;
|
|
@@ -69,11 +81,11 @@ function detectMobileFamily() {
|
|
| 69 |
return null;
|
| 70 |
}
|
| 71 |
|
| 72 |
-
function
|
| 73 |
-
if (family === 'ipad') return
|
| 74 |
-
if (family === 'iphone') return
|
| 75 |
-
if (family === 'android') return
|
| 76 |
-
return
|
| 77 |
}
|
| 78 |
|
| 79 |
const PROBE_TIMEOUT_MS = 15_000;
|
|
@@ -81,14 +93,10 @@ const GPU_PROBE_STEP_MB = 256;
|
|
| 81 |
const GPU_PROBE_MAX_MB = 8 * 1024;
|
| 82 |
const GPU_PROBE_TIMEOUT_MS = 8_000;
|
| 83 |
|
| 84 |
-
// Working-set floor in the WASM heap
|
| 85 |
-
//
|
| 86 |
-
//
|
| 87 |
-
//
|
| 88 |
-
// against the same number β works as long as tabBudget β₯ 500, but iPhone
|
| 89 |
-
// is below that, which means iPhone always fails this check. That's
|
| 90 |
-
// intentional: variantFits also has the GPU-fit check, and the floor
|
| 91 |
-
// here is just preventing absurdly-tiny working sets.
|
| 92 |
const HEAP_WORKING_SET_FLOOR_MB = 256;
|
| 93 |
|
| 94 |
// Per-variant overhead added on top of the model file size when checking
|
|
@@ -260,23 +268,19 @@ async function _computeBudget() {
|
|
| 260 |
const mobileFamily = detectMobileFamily();
|
| 261 |
const isMobile = mobileFamily !== null;
|
| 262 |
|
| 263 |
-
// ββ Mobile path:
|
| 264 |
-
//
|
| 265 |
-
//
|
| 266 |
-
//
|
| 267 |
-
//
|
| 268 |
-
//
|
| 269 |
-
//
|
| 270 |
-
// from public reports of WebKit/Jetsam thresholds (see comments at
|
| 271 |
-
// top of this file). heapBudgetMB === gpuBudgetMB === tabBudget; the
|
| 272 |
-
// variantFits check ends up being effectively `model + overhead β€
|
| 273 |
-
// tabBudget`.
|
| 274 |
if (isMobile) {
|
| 275 |
-
const
|
| 276 |
return {
|
| 277 |
-
budgetMB:
|
| 278 |
-
gpuBudgetMB
|
| 279 |
-
heapBudgetMB
|
| 280 |
memGB,
|
| 281 |
quotaMB,
|
| 282 |
probedMB: 0,
|
|
@@ -285,8 +289,8 @@ async function _computeBudget() {
|
|
| 285 |
gpuProbeError: 'skipped on mobile (probes can themselves trip Jetsam)',
|
| 286 |
isMobile: true,
|
| 287 |
mobileFamily,
|
| 288 |
-
source: `mobile
|
| 289 |
-
heapSource:
|
| 290 |
};
|
| 291 |
}
|
| 292 |
|
|
|
|
| 25 |
const HOSTED_QUOTA_FRACTION = 0.4;
|
| 26 |
const HOSTED_QUOTA_CAP_MB = 8 * 1024;
|
| 27 |
|
| 28 |
+
// Mobile per-device budgets. Two independent caps, mirroring the desktop
|
| 29 |
+
// path β model weights stream from OPFS into WebGPU buffers (see
|
| 30 |
+
// bench-worker.js:patchMEMFS / opfsAlloc), so the model size constrains
|
| 31 |
+
// `gpuBudgetMB`, not `heapBudgetMB`. The WASM heap only has to hold the
|
| 32 |
+
// working set (KV cache + ggml compute scratch + JS heap headroom).
|
|
|
|
|
|
|
| 33 |
//
|
| 34 |
+
// Earlier we collapsed both into a single tab budget on the theory that
|
| 35 |
+
// iOS Jetsam treats the whole tab process as one pool, so any allocation
|
| 36 |
+
// counts the same. That's true for Jetsam β but it conflates *where* the
|
| 37 |
+
// memory lives with *how much* the platform can hand out: the WASM heap
|
| 38 |
+
// has a much tighter practical ceiling than the GPU side, and counting
|
| 39 |
+
// model bytes against the heap ceiling rejected models that load fine
|
| 40 |
+
// via OPFS streaming.
|
| 41 |
//
|
| 42 |
+
// Numbers come from public reports / Apple docs:
|
| 43 |
+
//
|
| 44 |
+
// - iPhone WASM practical limit: 300β450 MB β heap budget
|
| 45 |
// lapcatsoftware.com/articles/2026/1/7.html
|
| 46 |
// news.ycombinator.com/item?id=39039593
|
| 47 |
// github.com/emscripten-core/emscripten/issues/19374
|
| 48 |
+
// github.com/godotengine/godot/issues/70621
|
|
|
|
| 49 |
//
|
| 50 |
// - iOS Safari WebGPU maxBufferSize: 256 MB on iPhone 6 / older,
|
| 51 |
// 993 MB on iPad Pro M-class. Per-buffer cap, not total.
|
| 52 |
// Apple WWDC 2025 "Unlock GPU computing with WebGPU"
|
| 53 |
//
|
| 54 |
+
// - iPhone 12 Pro reports tab OOM around 1.5β3 GB; Jetsam intervenes
|
| 55 |
+
// earlier under pressure. We undershoot the lower bound for headroom.
|
| 56 |
// developer.apple.com/forums/thread/761666
|
| 57 |
//
|
| 58 |
+
// Heap budgets = WASM heap practical limits.
|
| 59 |
+
const IPHONE_HEAP_BUDGET_MB = 450;
|
| 60 |
+
const IPAD_HEAP_BUDGET_MB = 1500;
|
| 61 |
+
const ANDROID_HEAP_BUDGET_MB = 800;
|
| 62 |
+
|
| 63 |
+
// GPU budgets = available GPU-buffer capacity for model weights + KV
|
| 64 |
+
// mirror, sized below the Jetsam tab ceiling minus working-set headroom.
|
| 65 |
+
// We can't probe on mobile (the GPU probe itself trips Jetsam β see
|
| 66 |
+
// commit 4f567a5), so these are static per-family estimates.
|
| 67 |
+
const IPHONE_GPU_BUDGET_MB = 1200;
|
| 68 |
+
const IPAD_GPU_BUDGET_MB = 2500;
|
| 69 |
+
const ANDROID_GPU_BUDGET_MB = 1500;
|
| 70 |
|
| 71 |
function detectMobileFamily() {
|
| 72 |
if (typeof navigator === 'undefined') return null;
|
|
|
|
| 81 |
return null;
|
| 82 |
}
|
| 83 |
|
| 84 |
+
function getMobileBudgetMB(family) {
|
| 85 |
+
if (family === 'ipad') return { heap: IPAD_HEAP_BUDGET_MB, gpu: IPAD_GPU_BUDGET_MB };
|
| 86 |
+
if (family === 'iphone') return { heap: IPHONE_HEAP_BUDGET_MB, gpu: IPHONE_GPU_BUDGET_MB };
|
| 87 |
+
if (family === 'android') return { heap: ANDROID_HEAP_BUDGET_MB, gpu: ANDROID_GPU_BUDGET_MB };
|
| 88 |
+
return { heap: IPHONE_HEAP_BUDGET_MB, gpu: IPHONE_GPU_BUDGET_MB }; // safest default
|
| 89 |
}
|
| 90 |
|
| 91 |
const PROBE_TIMEOUT_MS = 15_000;
|
|
|
|
| 93 |
const GPU_PROBE_MAX_MB = 8 * 1024;
|
| 94 |
const GPU_PROBE_TIMEOUT_MS = 8_000;
|
| 95 |
|
| 96 |
+
// Working-set floor in the WASM heap. KV cache + compute buffers + JS
|
| 97 |
+
// heap headroom for a typical 1B model at n_ctx=2048 add up to a few
|
| 98 |
+
// hundred MB. Floor at 256 so an absurdly-tiny heap (or a probe failure
|
| 99 |
+
// that returned 0) doesn't pass variantFits.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
const HEAP_WORKING_SET_FLOOR_MB = 256;
|
| 101 |
|
| 102 |
// Per-variant overhead added on top of the model file size when checking
|
|
|
|
| 268 |
const mobileFamily = detectMobileFamily();
|
| 269 |
const isMobile = mobileFamily !== null;
|
| 270 |
|
| 271 |
+
// ββ Mobile path: static per-family budgets, separate heap and GPU ββ
|
| 272 |
+
// Same shape as desktop (independent gpuBudgetMB / heapBudgetMB) so
|
| 273 |
+
// variantFits checks `model + overhead β€ gpuBudget` against the GPU-
|
| 274 |
+
// resident weights and `heapBudget β₯ working-set floor` against the
|
| 275 |
+
// WASM-heap working set. We can't probe on mobile (both probes can
|
| 276 |
+
// themselves trip Jetsam β see commits 4f567a5 and 6f33b5d), so we use
|
| 277 |
+
// researched per-family numbers from the constants block above.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 278 |
if (isMobile) {
|
| 279 |
+
const { heap: heapBudgetMB, gpu: gpuBudgetMB } = getMobileBudgetMB(mobileFamily);
|
| 280 |
return {
|
| 281 |
+
budgetMB: gpuBudgetMB,
|
| 282 |
+
gpuBudgetMB,
|
| 283 |
+
heapBudgetMB,
|
| 284 |
memGB,
|
| 285 |
quotaMB,
|
| 286 |
probedMB: 0,
|
|
|
|
| 289 |
gpuProbeError: 'skipped on mobile (probes can themselves trip Jetsam)',
|
| 290 |
isMobile: true,
|
| 291 |
mobileFamily,
|
| 292 |
+
source: `mobile static budget β ${mobileFamily} (GPU ${gpuBudgetMB} MB for OPFS-streamed weights)`,
|
| 293 |
+
heapSource: `mobile static budget β ${mobileFamily} (WASM heap ${heapBudgetMB} MB for KV + compute scratch)`,
|
| 294 |
};
|
| 295 |
}
|
| 296 |
|