File size: 25,468 Bytes
4721a6e
 
 
 
 
 
2dc46fb
4721a6e
 
 
df975ba
67d4061
4721a6e
 
 
 
 
 
 
 
 
 
 
 
 
67d4061
4721a6e
 
 
2dc46fb
67d4061
 
df975ba
4721a6e
 
 
 
 
 
 
 
 
 
 
 
 
 
2dc46fb
 
4721a6e
 
 
c104969
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d19cea
c104969
1d19cea
 
c104969
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d19cea
 
 
 
c104969
 
 
 
 
 
 
 
 
4748b05
 
 
 
 
 
 
 
 
 
c104969
 
 
78b069f
 
4721a6e
78b069f
4721a6e
 
2ee9bac
 
 
 
4748b05
2ee9bac
 
 
 
 
 
 
 
 
 
4721a6e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2174987
4721a6e
 
 
 
 
 
 
 
 
 
 
bc8e1d3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4721a6e
2dc46fb
4721a6e
df975ba
 
 
 
 
 
 
4721a6e
 
 
 
 
 
 
 
2dc46fb
 
 
 
 
 
 
 
4721a6e
e72601b
 
 
 
 
 
 
ee944ff
 
 
 
e72601b
 
4721a6e
 
629e542
1d19cea
629e542
1d19cea
 
629e542
 
2dc46fb
 
 
df975ba
 
 
 
 
 
 
2dc46fb
 
df975ba
 
 
 
 
 
 
 
 
 
 
4721a6e
 
9f7edbf
4721a6e
 
 
 
 
 
 
 
 
2dc46fb
4721a6e
 
 
 
 
00eca1c
 
 
 
 
 
 
 
 
 
 
 
 
 
4721a6e
 
2dc46fb
4721a6e
 
 
 
 
 
2ee9bac
 
 
 
2dc46fb
4721a6e
 
 
 
 
 
 
 
 
 
 
20b03c5
 
 
 
4721a6e
 
 
 
 
67d4061
4721a6e
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
<!DOCTYPE html>
<html lang="en" data-theme="light">
<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <meta name="color-scheme" content="light dark">
  <script>(function(){var s=localStorage.getItem('theme');if(!s){s=(window.matchMedia&&matchMedia('(prefers-color-scheme: dark)').matches)?'dark':'light';}document.documentElement.setAttribute('data-theme',s);})();</script>
  <title>Run — WebGPU Bench</title>
  <link rel="preconnect" href="https://fonts.googleapis.com">
  <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
  <link href="https://fonts.googleapis.com/css2?family=Bricolage+Grotesque:opsz,wght@12..96,400;12..96,500;12..96,600;12..96,700;12..96,800&family=Geist+Mono:wght@400;500;600&display=swap" rel="stylesheet">
  <link rel="stylesheet" href="css/style.css">
  <!-- Import map so `@huggingface/hub` resolves in the browser via esm.sh.
       Must appear before any <script type="module">. -->
  <script type="importmap">
    {
      "imports": {
        "@huggingface/hub": "https://esm.sh/@huggingface/hub"
      }
    }
  </script>
</head>
<body>
  <header class="header">
    <div class="header-inner">
      <a href="index.html" class="header-brand">
        <svg class="header-logo" width="22" height="22" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="4" y="4" width="16" height="16" rx="2"/><rect x="9" y="9" width="6" height="6"/><line x1="9" y1="1" x2="9" y2="4"/><line x1="15" y1="1" x2="15" y2="4"/><line x1="9" y1="20" x2="9" y2="23"/><line x1="15" y1="20" x2="15" y2="23"/><line x1="20" y1="9" x2="23" y2="9"/><line x1="20" y1="14" x2="23" y2="14"/><line x1="1" y1="9" x2="4" y2="9"/><line x1="1" y1="14" x2="4" y2="14"/></svg>
        <span class="header-title">WebGPU Bench</span>
      </a>
      <nav class="header-nav" aria-label="Primary">
        <a href="index.html" class="header-link">Dashboard</a>
        <a href="methodology.html" class="header-link">Methodology</a>
        <button id="theme-toggle" class="header-link theme-toggle-btn" type="button" title="Toggle theme" aria-label="Toggle dark mode">
          <svg class="icon-sun" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><circle cx="12" cy="12" r="5"/><line x1="12" y1="1" x2="12" y2="3"/><line x1="12" y1="21" x2="12" y2="23"/><line x1="4.22" y1="4.22" x2="5.64" y2="5.64"/><line x1="18.36" y1="18.36" x2="19.78" y2="19.78"/><line x1="1" y1="12" x2="3" y2="12"/><line x1="21" y1="12" x2="23" y2="12"/><line x1="4.22" y1="19.78" x2="5.64" y2="18.36"/><line x1="18.36" y1="5.64" x2="19.78" y2="4.22"/></svg>
          <svg class="icon-moon" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M21 12.79A9 9 0 1 1 11.21 3 7 7 0 0 0 21 12.79z"/></svg>
        </button>
        <a href="https://github.com/abhijitramesh/webgpu-bench" target="_blank" rel="noopener" class="header-link">
          <svg width="16" height="16" viewBox="0 0 24 24" fill="currentColor"><path d="M12 0C5.37 0 0 5.37 0 12c0 5.3 3.44 9.8 8.2 11.39.6.11.82-.26.82-.58v-2.03c-3.34.73-4.04-1.61-4.04-1.61-.55-1.39-1.34-1.76-1.34-1.76-1.09-.74.08-.73.08-.73 1.2.09 1.84 1.24 1.84 1.24 1.07 1.83 2.81 1.3 3.5 1 .11-.78.42-1.3.76-1.6-2.67-.3-5.47-1.33-5.47-5.93 0-1.31.47-2.38 1.24-3.22-.13-.3-.54-1.52.12-3.18 0 0 1.01-.32 3.3 1.23a11.5 11.5 0 0 1 6.02 0c2.28-1.55 3.29-1.23 3.29-1.23.66 1.66.25 2.88.12 3.18.77.84 1.24 1.91 1.24 3.22 0 4.61-2.81 5.63-5.48 5.92.43.37.81 1.1.81 2.22v3.29c0 .32.22.7.82.58C20.57 21.8 24 17.3 24 12c0-6.63-5.37-12-12-12z"/></svg>
          GitHub
        </a>
      </nav>
    </div>
  </header>

  <main>
    <section id="run-section" class="dash-section">
      <div class="container">
        <div class="run-hero">
          <h1 class="run-hero-title">Run a benchmark</h1>
          <span id="run-mode-badge" class="badge run-mode-badge"></span>
        </div>

        <!-- How-to-use guide — collapsible so it doesn't dominate the page
             for repeat users, but open by default so first-timers see it. -->
        <details class="card run-instructions" open>
          <summary><strong>How to use this page</strong> — quick start, run modes, and configuration knobs</summary>
          <div class="run-instructions-body">
            <section>
              <h3>Quick start</h3>
              <ol>
                <li>The three cards above show your <strong>device</strong>, its <strong>capabilities</strong>, and the safe <strong>model budget</strong> (largest variant that should fit without exhausting GPU/WASM memory).</li>
                <li>Pick variants in the <strong>Models</strong> panel below — or use the <em>Quick set</em> / <em>All fit</em> selection buttons.</li>
                <li>Click <strong>Run study</strong> for a curated comparison set, <strong>Run benchmarks</strong> for whatever you've checked, or <strong>Download selected</strong> to just cache files for later.</li>
              </ol>
            </section>

            <section>
              <h3>Run modes</h3>
              <dl class="run-instructions-dl">
                <dt><strong>Run study</strong></dt>
                <dd>Curated, opinionated set: <code>Llama-3.2-1B-Instruct</code> at <code>Q2_K</code> / <code>Q4_K_M</code> / <code>Q8_0</code> / <code>F16</code>, plus every other model at <code>Q4_K_M</code>, filtered to what fits this device. Use this for apples-to-apples comparison across machines.</dd>
                <dt><strong>Run benchmarks</strong></dt>
                <dd>Runs whatever variants you've explicitly checked in the Models panel. Use this when you want to drill into a specific model or quant family.</dd>
                <dt><strong>Download selected</strong></dt>
                <dd>Caches the checked variants to OPFS without running them. Useful for pre-warming before a flight or running offline later.</dd>
                <dt><strong>Abort</strong></dt>
                <dd>Cancels the in-flight worker AND any concurrent download. Aborted variants get an "aborted" status; subsequent variants do not run.</dd>
                <dt><strong>Purge OPFS cache</strong></dt>
                <dd>Wipes every cached GGUF from browser storage. Only shown on hosted surfaces; helpful when the device is low on disk.</dd>
              </dl>
            </section>

            <section>
              <h3>What each run does</h3>
              <p>By default each variant runs the GPU perf pass only. The CPU baseline is opt-in via the <em>Run</em> toggles below — flip them on if you want consistency or CPU-vs-GPU comparison numbers.</p>
              <ol>
                <li><strong>GPU pass (always)</strong> — runs a llama-bench-style perf sweep: one warmup + <code>reps</code> timed reps for both prompt processing (<code>pp</code>) and token generation (<code>tg</code>). 1-second cooldown between reps lets the GPU clock recover, so reps stay comparable instead of decaying across the sweep.</li>
                <li><strong>CPU baseline (opt-in)</strong> — when <em>Run: Consistency</em> or <em>Run: CPU perf</em> is on, the CPU pass runs first to capture reference token IDs (for the GPU forced-decode agreement check) and/or a 1-rep CPU perf sample (for CPU vs GPU dashboard cells).</li>
              </ol>
            </section>

            <section>
              <h3>Configuration</h3>
              <dl class="run-instructions-dl">
                <dt><strong>Search / Hide</strong></dt>
                <dd>Filter the Models panel. <em>Hide</em> toggles drop UD (Unsloth dynamic), IQ (i-quants), and BF16/F16 high-precision variants — useful when you only care about the "standard" K-quant lineup.</dd>
                <dt><strong>Select: Quick set / All fit / None</strong></dt>
                <dd><em>Quick set</em> = the same variants Run study uses. <em>All fit</em> = every variant under your device budget. <em>None</em> = clear the selection.</dd>
                <dt><strong>Prompt tokens (-p)</strong></dt>
                <dd>How many synthetic tokens go through prompt processing for the <code>pp</code> test. Default <code>512</code>. Larger = more compute-bound.</dd>
                <dt><strong>Gen tokens (-n)</strong></dt>
                <dd>How many tokens are generated for the <code>tg</code> test. Default <code>128</code>. Larger reps stress sustained decode bandwidth.</dd>
                <dt><strong>Reps (-r)</strong></dt>
                <dd>Timed repetitions of each test (after warmup). Default <code>5</code>. The reported figure is mean ± stddev across the reps.</dd>
                <dt><strong>Run: Consistency</strong></dt>
                <dd>Off by default. Turns on the CPU-baseline + GPU-forced-decode agreement check. Opt in when you want to verify CPU and GPU produce matching tokens for a given variant.</dd>
                <dt><strong>Run: CPU perf</strong></dt>
                <dd>Off by default. Turns on a 1-rep CPU perf baseline alongside the GPU pass so the dashboard's CPU vs GPU cells have data. Opt in when you want apples-to-apples CPU↔GPU numbers; leave off to keep runs short.</dd>
                <dt><strong>Evict cached GGUFs after each run</strong></dt>
                <dd>Frees OPFS storage as the queue moves through variants. Only evicts files this session downloaded — files you cached before the run are preserved.</dd>
              </dl>
            </section>

            <section>
              <h3>Reading the output</h3>
              <p>The <strong>Progress</strong> table updates per-variant with live status (download → cpu → gpu → done). Final per-variant numbers fill the <code>pp tok/s</code> and <code>tg tok/s</code> columns as <code>mean ± stddev</code>. The <strong>Output</strong> textarea below contains the full JSON record — copy it, download it, or (on the HF Space) sign in to submit to the leaderboard.</p>
            </section>

            <section>
              <h3>Known issues</h3>
              <dl class="run-instructions-dl">
                <dt><strong>Safari Private Browsing</strong></dt>
                <dd>Don't use it. Private mode caps OPFS storage at a few hundred MB per tab, denies <code>navigator.storage.persist()</code>, and routes the file system through an ephemeral backend with stricter sync-handle limits. Downloads fail mid-stream with <em>"operation failed for an unknown transient reason"</em> and retries don't help — the cause isn't transient, it's the browsing mode. Switch to a regular Safari window.</dd>
                <dt><strong>Mobile tabs (iOS/Android)</strong></dt>
                <dd>iOS Jetsam and Android's low-memory killer reap tabs that approach the per-process memory ceiling, often silently reloading the page. The Run page applies tighter budgets and forced-eviction on mobile to mitigate, but variants near the budget edge can still crash the tab. Run from a desktop for stable numbers.</dd>
              </dl>
            </section>
          </div>
        </details>

        <!-- Read-only banner (shown on any hosted surface that isn't the HF
             Space — e.g. a mirror or preview deploy where OAuth isn't set up). -->
        <div id="run-pages-banner" class="run-pages-banner" hidden>
          <span>Read-only mode — to submit benchmarks, open the canonical <a href="https://abhijitramesh-webgpu-bench.static.hf.space/run.html" target="_blank" rel="noopener">HF Space</a>.</span>
        </div>

        <!-- Mobile-device warning: tabs on iOS/Android get reaped under
             memory pressure, so benchmarks often crash and the tab silently
             reloads. Shown by the controller when isMobileDevice() is true. -->
        <div id="run-mobile-banner" class="run-pages-banner" hidden>
          <span>Mobile device detected — the per-tab memory budget is tight, and larger quants will likely crash this tab. <strong>If you're on Safari, do not use Private Browsing</strong>: it caps OPFS storage at a few hundred MB and disables persistent storage, so model downloads fail with "operation failed for an unknown transient reason." For representative numbers, run from a laptop or desktop.</span>
        </div>

        <!-- Crash-recovery banner: set by the controller when a previous
             Run started but never posted a success, suggesting the tab got
             reaped mid-run. Appears on the next page load. -->
        <div id="run-crash-banner" class="run-pages-banner" hidden>
          <span id="run-crash-banner-text"></span>
          <button id="run-crash-banner-dismiss" class="btn btn-secondary btn-xs" type="button">Dismiss</button>
        </div>

        <!-- Device & budget -->
        <div class="summary-grid run-device-grid">
          <div class="stat-card run-device-card">
            <span class="stat-card-label">Device</span>
            <div class="run-device-rows">
              <div class="run-device-row"><span class="run-device-row-label">Browser</span><span class="run-device-row-value" id="device-browser"></span></div>
              <div class="run-device-row"><span class="run-device-row-label">Platform</span><span class="run-device-row-value" id="device-platform"></span></div>
              <div class="run-device-row"><span class="run-device-row-label">GPU</span><span class="run-device-row-value" id="device-gpu"></span></div>
            </div>
          </div>
          <div class="stat-card run-device-card">
            <span class="stat-card-label">Capability</span>
            <div class="run-device-rows">
              <div class="run-device-row"><span class="run-device-row-label">deviceMemory</span><span class="run-device-row-value" id="device-memory"></span></div>
              <div class="run-device-row"><span class="run-device-row-label">WebGPU</span><span class="run-device-row-value" id="device-webgpu"></span></div>
              <div class="run-device-row"><span class="run-device-row-label">llama.cpp</span><span class="run-device-row-value" id="device-llamacpp"></span></div>
            </div>
          </div>
          <div class="stat-card run-device-card">
            <span class="stat-card-label">Model budget</span>
            <div class="run-device-rows">
              <div class="run-device-row"><span class="run-device-row-label">Max size</span><span class="run-device-row-value" id="device-budget"></span></div>
              <div class="run-device-note" id="device-budget-source"></div>
            </div>
          </div>
        </div>

        <!-- User-reported machine identity. The auto-detected values in the
             cards above are unreliable (UA strings lie, deviceMemory is
             coarse, GPU adapter info is often empty). We ship these
             user-typed fields alongside the auto-detected ones so the
             leaderboard can attribute submissions correctly. Persisted to
             localStorage between visits. -->
        <details class="card run-machine-card" id="user-reported-card" open>
          <summary>
            <strong>Your machine</strong> — labels the auto-detected device data on submission. Saved between visits.
          </summary>
          <div class="run-machine-grid">
            <label class="run-machine-field">
              <span class="run-machine-label">Machine name <span class="run-machine-req" aria-hidden="true">*</span></span>
              <input type="text" id="ur-machine-name" class="run-machine-input" placeholder="e.g. MacBook Pro M3 16GB" autocomplete="off" spellcheck="false">
            </label>
            <label class="run-machine-field">
              <span class="run-machine-label">GPU name <span class="run-machine-opt">(optional)</span></span>
              <input type="text" id="ur-gpu-name" class="run-machine-input" placeholder="e.g. Apple M3 Pro" autocomplete="off" spellcheck="false">
            </label>
            <label class="run-machine-field">
              <span class="run-machine-label">Browser <span class="run-machine-req" aria-hidden="true">*</span></span>
              <input type="text" id="ur-browser" class="run-machine-input" placeholder="e.g. Chrome 138 dev" autocomplete="off" spellcheck="false">
            </label>
            <label class="run-machine-field">
              <span class="run-machine-label">Operating system <span class="run-machine-req" aria-hidden="true">*</span></span>
              <input type="text" id="ur-os" class="run-machine-input" placeholder="e.g. macOS 15.4" autocomplete="off" spellcheck="false">
            </label>
          </div>
          <p class="run-machine-hint" id="ur-hint">Required fields marked <span class="run-machine-req">*</span>. Defaults are filled in from your browser; edit anything that's wrong before running.</p>
        </details>

        <!-- Hide filters, iterations, actions -->
        <div class="filter-bar run-controls">
          <div class="filter-bar-inner run-filters">
            <div class="filter-group filter-group--search">
              <label class="filter-label" for="family-search">Search</label>
              <div class="run-search-wrapper">
                <svg class="run-search-icon" width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" aria-hidden="true"><circle cx="11" cy="11" r="8"/><line x1="21" y1="21" x2="16.65" y2="16.65"/></svg>
                <input type="search" id="family-search" class="run-search-input" placeholder="Filter models…" autocomplete="off" spellcheck="false">
              </div>
            </div>
            <div class="filter-group">
              <span class="filter-label">Hide</span>
              <div class="run-filters-checks">
                <label class="run-hide-label"><input type="checkbox" id="hide-ud"> UD</label>
                <label class="run-hide-label"><input type="checkbox" id="hide-iq"> IQ</label>
                <label class="run-hide-label"><input type="checkbox" id="hide-hifp"> BF16/F16</label>
              </div>
            </div>
            <div class="filter-group">
              <span class="filter-label">Select</span>
              <div class="run-filters-checks">
                <button class="btn btn-secondary btn-xs" id="btn-select-quick" type="button">Quick set</button>
                <button class="btn btn-secondary btn-xs" id="btn-select-fit" type="button">All fit</button>
                <button class="btn btn-secondary btn-xs" id="btn-select-none" type="button">None</button>
              </div>
            </div>
            <div class="filter-group">
              <label class="filter-label" for="n-prompt-input">Prompt tokens (-p)</label>
              <input type="number" id="n-prompt-input" class="filter-select run-iter-input" value="512" min="0" max="4096" step="1">
            </div>
            <div class="filter-group">
              <label class="filter-label" for="n-gen-input">Gen tokens (-n)</label>
              <input type="number" id="n-gen-input" class="filter-select run-iter-input" value="128" min="0" max="4096" step="1">
            </div>
            <div class="filter-group">
              <label class="filter-label" for="n-depth-input">KV depth (-d)</label>
              <input type="number" id="n-depth-input" class="filter-select run-iter-input" value="2048" min="0" max="32768" step="1">
            </div>
            <div class="filter-group">
              <label class="filter-label" for="iterations-input">Reps (-r)</label>
              <input type="number" id="iterations-input" class="filter-select run-iter-input" value="5" min="1" max="50" step="1">
            </div>
            <div class="filter-group">
              <span class="filter-label">Run</span>
              <div class="run-filters-checks">
                <label class="run-hide-label" title="Run the consistency check (CPU baseline + GPU forced-decode agreement). Off by default — opt in to verify CPU and GPU produce matching tokens."><input type="checkbox" id="run-consistency"> Consistency</label>
                <label class="run-hide-label" title="Run a single-rep CPU perf baseline alongside the GPU pass. Off by default — opt in for CPU vs GPU comparison numbers."><input type="checkbox" id="run-cpu-perf"> CPU perf</label>
              </div>
            </div>
          </div>
        </div>

        <!-- Family / variant list -->
        <div id="run-models" class="run-models-stack">
          <div class="empty-state">Loading models…</div>
        </div>

        <!-- Action bar: lives just above Progress so the Run/Abort/Download
             controls are co-located with the live status they affect. -->
        <div class="run-action-bar">
          <div class="run-action-bar-inner">
            <div class="run-budget" id="run-budget" hidden>
              <div class="run-budget-row">
                <span class="run-budget-label">Selected</span>
                <span class="run-budget-text" id="run-budget-text"></span>
              </div>
              <div class="run-budget-bar" role="progressbar" aria-labelledby="run-budget-text">
                <div class="run-budget-bar-fill" id="run-budget-fill"></div>
              </div>
              <div class="run-budget-meta" id="run-budget-meta"></div>
            </div>
            <span id="queue-status" class="run-queue-status" hidden></span>
            <div class="run-actions">
              <button class="btn btn-secondary" id="btn-download" type="button" disabled>Download selected</button>
              <button class="btn btn-primary" id="btn-run-study" type="button" title="Llama-3.2-1B-Instruct at Q2_K / Q4_K_M / Q8_0 / F16, plus every other model at Q4_K_M, filtered to what fits this device.">Run study</button>
              <button class="btn btn-primary" id="btn-run" type="button" disabled>Run benchmarks</button>
              <button class="btn btn-danger" id="btn-abort" type="button" hidden>Abort</button>
              <button class="btn btn-secondary" id="btn-purge" type="button" hidden>Purge OPFS cache</button>
            </div>
          </div>
        </div>

        <!-- Progress -->
        <div class="section-header">
          <h2 class="subsection-title">Progress</h2>
        </div>
        <div class="table-card">
          <div id="run-progress-wrapper" class="results-wrapper"></div>
        </div>

        <!-- HF sign-in + submit (space surface only) — sits between Progress
             and Output so users land on it once they have results to push. -->
        <div id="hub-row" class="card hub-row" hidden>
          <div class="hub-row-inner">
            <div class="hub-row-info">
              <span id="hf-user"></span>
            </div>
            <div class="hub-row-actions">
              <button id="btn-signin" class="btn btn-secondary" type="button">Sign in with Hugging Face</button>
              <button id="btn-submit" class="btn btn-primary" type="button" disabled hidden>Submit to leaderboard</button>
            </div>
          </div>
        </div>

        <!-- Output -->
        <div class="section-header" style="margin-top: 32px;">
          <h2 class="subsection-title">Output</h2>
        </div>
        <div class="card run-output">
          <label id="save-local-row" class="run-output-toggle" hidden>
            <input type="checkbox" id="save-local" checked>
            Save to <code>results/results.json</code> on this server
          </label>
          <label id="evict-after-row" class="run-output-toggle">
            <input type="checkbox" id="evict-after-run">
            Evict cached GGUFs after each run (frees disk/OPFS as the queue moves)
          </label>
          <textarea id="output-textarea" class="run-output-textarea" readonly spellcheck="false" aria-label="Benchmark results output (JSON)" placeholder="Run benchmarks to generate output here…"></textarea>
          <div class="run-output-buttons">
            <button class="btn btn-secondary" id="btn-copy" type="button">Copy</button>
            <button class="btn btn-secondary" id="btn-download-json" type="button">Download JSON</button>
          </div>
        </div>

        <!-- Log -->
        <details id="run-log" class="card run-log" style="margin-top: 16px;">
          <summary>Run log</summary>
          <pre id="log-output" class="run-log-pre"></pre>
        </details>
        <details id="run-wasm-errors" class="card run-log" style="margin-top: 12px;">
          <summary>WASM errors</summary>
          <pre id="wasm-error-output" class="run-log-pre" aria-label="WASM error log"></pre>
        </details>
      </div>
    </section>
  </main>

  <script type="module">
    import { mountRunSection } from './js/run/controller.js';
    // Theme toggle wiring (kept here since app.js no longer runs on this page).
    document.getElementById('theme-toggle')?.addEventListener('click', () => {
      const next = document.documentElement.getAttribute('data-theme') === 'dark' ? 'light' : 'dark';
      document.documentElement.setAttribute('data-theme', next);
      localStorage.setItem('theme', next);
    });
    mountRunSection();
  </script>
</body>
</html>