GitHub Actions commited on
Commit
e6a49d5
Β·
1 Parent(s): ecef386

sync from abhijitramesh/webgpu-bench@99feb5a71b

Browse files
Files changed (2) hide show
  1. js/app.js +11 -6
  2. js/data.js +39 -16
js/app.js CHANGED
@@ -1,4 +1,4 @@
1
- import { loadData, filterResults, selectBestResults, expandCpuRows } from './data.js';
2
  import { initFilters, populateQuantOptions, getFilters, resetFilters } from './filters.js';
3
  import { renderDecodeChart, renderPrefillChart, renderSizeChart, renderMachineChart, renderCpuGpuChart, renderSpeedupChart } from './charts.js';
4
  import { renderResultsTable, renderErrorTable, renderMachineInfo, renderCpuGpuTable } from './tables.js';
@@ -77,11 +77,16 @@ function render() {
77
 
78
  const filters = getFilters();
79
  // Filter, then collapse to one canonical row per
80
- // (machine, browser, model, variant). Multiple users may submit results
81
- // for the same hardware bucket; this keeps the row with the highest
82
- // iteration count (tiebreak: most recent) so the leaderboard shows the
83
- // most reliable number per cell rather than averaging noisy duplicates.
84
- const filtered = selectBestResults(filterResults(appData.results, filters));
 
 
 
 
 
85
 
86
  // Summary cards β€” counts tween from previous value to new on filter changes
87
  // and from 0 on first paint (since `data-value` defaults to "0").
 
1
+ import { loadData, filterResults, selectBestResults, expandCpuRows, withSyntheticCpuRows } from './data.js';
2
  import { initFilters, populateQuantOptions, getFilters, resetFilters } from './filters.js';
3
  import { renderDecodeChart, renderPrefillChart, renderSizeChart, renderMachineChart, renderCpuGpuChart, renderSpeedupChart } from './charts.js';
4
  import { renderResultsTable, renderErrorTable, renderMachineInfo, renderCpuGpuTable } from './tables.js';
 
77
 
78
  const filters = getFilters();
79
  // Filter, then collapse to one canonical row per
80
+ // (machine, browser, model, variant, backend). Multiple users may submit
81
+ // results for the same hardware bucket; this keeps the row with the
82
+ // highest iteration count (tiebreak: most recent) so the leaderboard
83
+ // shows the most reliable number per cell rather than averaging noisy
84
+ // duplicates. withSyntheticCpuRows expands each browser-flow record's
85
+ // cpu_baseline_* into a sibling CPU row so CPU runs (CLI or browser)
86
+ // appear as their own row in the main table.
87
+ const filtered = selectBestResults(
88
+ withSyntheticCpuRows(filterResults(appData.results, filters)),
89
+ );
90
 
91
  // Summary cards β€” counts tween from previous value to new on filter changes
92
  // and from 0 on first paint (since `data-value` defaults to "0").
js/data.js CHANGED
@@ -129,14 +129,19 @@ function writeSessionCache(data) {
129
  }
130
 
131
  /* Reduce a flat result set down to one canonical row per
132
- (machineSlug, browser, model, variant) cell. Picks the row with the most
133
- iterations; ties break on latest timestamp. This is the leaderboard view β€”
134
- "best representative number per cell" β€” and is what the dashboard renders
135
- in the table, charts, and stat cards. */
 
 
 
 
136
  export function selectBestResults(records) {
137
  const bestByCell = new Map();
138
  for (const r of records) {
139
- const key = `${r.machineSlug}|${r.browser}|${r.model}|${r.variant}`;
 
140
  const cur = bestByCell.get(key);
141
  if (!cur) {
142
  bestByCell.set(key, r);
@@ -155,27 +160,45 @@ export function selectBestResults(records) {
155
  return [...bestByCell.values()];
156
  }
157
 
158
- /* Synthesize "CPU only" rows for the CPU-vs-GPU views.
159
- Two record sources contribute:
160
- 1) Real CPU runs (nGpuLayers === 0) β€” produced by the CLI runner which
161
- alternates CPU and GPU passes.
162
- 2) The cpu_baseline_* fields on every browser-flow record β€” the in-page
163
- bench measures one CPU pass per variant alongside the GPU iterations
164
- and stamps the result on the same record. We turn each of those into
165
- a synthetic CPU row so the comparison view sees both data shapes.
166
- */
167
  export function expandCpuRows(results) {
168
  const real = results.filter(r => r.nGpuLayers === 0);
169
- const synthetic = results
 
 
 
 
 
 
 
 
 
 
 
 
170
  .filter(r => r.nGpuLayers !== 0
171
  && (r.cpu_baseline_decode_tok_s != null || r.cpu_baseline_prefill_tok_s != null))
172
  .map(r => ({
173
  ...r,
174
  decode_tok_s: r.cpu_baseline_decode_tok_s,
175
  prefill_tok_s: r.cpu_baseline_prefill_tok_s,
 
 
 
 
 
 
 
 
 
 
 
176
  nGpuLayers: 0,
177
  }));
178
- return [...real, ...synthetic];
179
  }
180
 
181
  export function filterResults(results, filters) {
 
129
  }
130
 
131
  /* Reduce a flat result set down to one canonical row per
132
+ (machineSlug, browser, model, variant, backend) cell. Picks the row with
133
+ the most iterations; ties break on latest timestamp. This is the
134
+ leaderboard view β€” "best representative number per cell" β€” and is what
135
+ the dashboard renders in the table, charts, and stat cards.
136
+
137
+ `backend` (CPU vs GPU, derived from nGpuLayers) is part of the key so
138
+ CLI CPU+GPU pairs and browser-flow synthetic CPU rows don't collapse
139
+ into the GPU row. */
140
  export function selectBestResults(records) {
141
  const bestByCell = new Map();
142
  for (const r of records) {
143
+ const backend = r.nGpuLayers === 0 ? 'cpu' : 'gpu';
144
+ const key = `${r.machineSlug}|${r.browser}|${r.model}|${r.variant}|${backend}`;
145
  const cur = bestByCell.get(key);
146
  if (!cur) {
147
  bestByCell.set(key, r);
 
160
  return [...bestByCell.values()];
161
  }
162
 
163
+ /* Synthesize a CPU row for every browser-flow GPU record (the in-page
164
+ bench measures one CPU pass per variant alongside the GPU iterations
165
+ and stamps the result on the same record via cpu_baseline_*). Returns
166
+ only CPU rows β€” combine real (nGpuLayers === 0) and synthetic ones.
167
+ Used by the CPU-vs-GPU views which want the CPU subset only. */
 
 
 
 
168
  export function expandCpuRows(results) {
169
  const real = results.filter(r => r.nGpuLayers === 0);
170
+ const synthetic = synthesizeCpuRowsFromBaseline(results);
171
+ return [...real, ...synthetic];
172
+ }
173
+
174
+ /* Same synthesis as expandCpuRows but returns the originals plus the
175
+ synthesized CPU rows β€” for the main results table where we want both
176
+ GPU and CPU rows visible. */
177
+ export function withSyntheticCpuRows(results) {
178
+ return [...results, ...synthesizeCpuRowsFromBaseline(results)];
179
+ }
180
+
181
+ function synthesizeCpuRowsFromBaseline(results) {
182
+ return results
183
  .filter(r => r.nGpuLayers !== 0
184
  && (r.cpu_baseline_decode_tok_s != null || r.cpu_baseline_prefill_tok_s != null))
185
  .map(r => ({
186
  ...r,
187
  decode_tok_s: r.cpu_baseline_decode_tok_s,
188
  prefill_tok_s: r.cpu_baseline_prefill_tok_s,
189
+ // CPU baseline runs have no t_eval / n_eval breakdowns β€” null those
190
+ // out so the table doesn't show stale GPU numbers in CPU rows.
191
+ n_eval: null,
192
+ t_eval_ms: null,
193
+ n_p_eval: null,
194
+ t_p_eval_ms: null,
195
+ // Strip the embedded baseline from synthetic CPU rows so the
196
+ // "CPU decode tok/s" column doesn't duplicate the row's own metric.
197
+ cpu_baseline_decode_tok_s: null,
198
+ cpu_baseline_prefill_tok_s: null,
199
+ cpu_baseline: null,
200
  nGpuLayers: 0,
201
  }));
 
202
  }
203
 
204
  export function filterResults(results, filters) {