Spaces:
Running
Running
GitHub Actions commited on
Commit Β·
e6a49d5
1
Parent(s): ecef386
sync from abhijitramesh/webgpu-bench@99feb5a71b
Browse files- js/app.js +11 -6
- js/data.js +39 -16
js/app.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
import { loadData, filterResults, selectBestResults, expandCpuRows } from './data.js';
|
| 2 |
import { initFilters, populateQuantOptions, getFilters, resetFilters } from './filters.js';
|
| 3 |
import { renderDecodeChart, renderPrefillChart, renderSizeChart, renderMachineChart, renderCpuGpuChart, renderSpeedupChart } from './charts.js';
|
| 4 |
import { renderResultsTable, renderErrorTable, renderMachineInfo, renderCpuGpuTable } from './tables.js';
|
|
@@ -77,11 +77,16 @@ function render() {
|
|
| 77 |
|
| 78 |
const filters = getFilters();
|
| 79 |
// Filter, then collapse to one canonical row per
|
| 80 |
-
// (machine, browser, model, variant). Multiple users may submit
|
| 81 |
-
// for the same hardware bucket; this keeps the row with the
|
| 82 |
-
// iteration count (tiebreak: most recent) so the leaderboard
|
| 83 |
-
// most reliable number per cell rather than averaging noisy
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
// Summary cards β counts tween from previous value to new on filter changes
|
| 87 |
// and from 0 on first paint (since `data-value` defaults to "0").
|
|
|
|
| 1 |
+
import { loadData, filterResults, selectBestResults, expandCpuRows, withSyntheticCpuRows } from './data.js';
|
| 2 |
import { initFilters, populateQuantOptions, getFilters, resetFilters } from './filters.js';
|
| 3 |
import { renderDecodeChart, renderPrefillChart, renderSizeChart, renderMachineChart, renderCpuGpuChart, renderSpeedupChart } from './charts.js';
|
| 4 |
import { renderResultsTable, renderErrorTable, renderMachineInfo, renderCpuGpuTable } from './tables.js';
|
|
|
|
| 77 |
|
| 78 |
const filters = getFilters();
|
| 79 |
// Filter, then collapse to one canonical row per
|
| 80 |
+
// (machine, browser, model, variant, backend). Multiple users may submit
|
| 81 |
+
// results for the same hardware bucket; this keeps the row with the
|
| 82 |
+
// highest iteration count (tiebreak: most recent) so the leaderboard
|
| 83 |
+
// shows the most reliable number per cell rather than averaging noisy
|
| 84 |
+
// duplicates. withSyntheticCpuRows expands each browser-flow record's
|
| 85 |
+
// cpu_baseline_* into a sibling CPU row so CPU runs (CLI or browser)
|
| 86 |
+
// appear as their own row in the main table.
|
| 87 |
+
const filtered = selectBestResults(
|
| 88 |
+
withSyntheticCpuRows(filterResults(appData.results, filters)),
|
| 89 |
+
);
|
| 90 |
|
| 91 |
// Summary cards β counts tween from previous value to new on filter changes
|
| 92 |
// and from 0 on first paint (since `data-value` defaults to "0").
|
js/data.js
CHANGED
|
@@ -129,14 +129,19 @@ function writeSessionCache(data) {
|
|
| 129 |
}
|
| 130 |
|
| 131 |
/* Reduce a flat result set down to one canonical row per
|
| 132 |
-
(machineSlug, browser, model, variant) cell. Picks the row with
|
| 133 |
-
iterations; ties break on latest timestamp. This is the
|
| 134 |
-
"best representative number per cell" β and is what
|
| 135 |
-
in the table, charts, and stat cards.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
export function selectBestResults(records) {
|
| 137 |
const bestByCell = new Map();
|
| 138 |
for (const r of records) {
|
| 139 |
-
const
|
|
|
|
| 140 |
const cur = bestByCell.get(key);
|
| 141 |
if (!cur) {
|
| 142 |
bestByCell.set(key, r);
|
|
@@ -155,27 +160,45 @@ export function selectBestResults(records) {
|
|
| 155 |
return [...bestByCell.values()];
|
| 156 |
}
|
| 157 |
|
| 158 |
-
/* Synthesize
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
bench measures one CPU pass per variant alongside the GPU iterations
|
| 164 |
-
and stamps the result on the same record. We turn each of those into
|
| 165 |
-
a synthetic CPU row so the comparison view sees both data shapes.
|
| 166 |
-
*/
|
| 167 |
export function expandCpuRows(results) {
|
| 168 |
const real = results.filter(r => r.nGpuLayers === 0);
|
| 169 |
-
const synthetic = results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
.filter(r => r.nGpuLayers !== 0
|
| 171 |
&& (r.cpu_baseline_decode_tok_s != null || r.cpu_baseline_prefill_tok_s != null))
|
| 172 |
.map(r => ({
|
| 173 |
...r,
|
| 174 |
decode_tok_s: r.cpu_baseline_decode_tok_s,
|
| 175 |
prefill_tok_s: r.cpu_baseline_prefill_tok_s,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
nGpuLayers: 0,
|
| 177 |
}));
|
| 178 |
-
return [...real, ...synthetic];
|
| 179 |
}
|
| 180 |
|
| 181 |
export function filterResults(results, filters) {
|
|
|
|
| 129 |
}
|
| 130 |
|
| 131 |
/* Reduce a flat result set down to one canonical row per
|
| 132 |
+
(machineSlug, browser, model, variant, backend) cell. Picks the row with
|
| 133 |
+
the most iterations; ties break on latest timestamp. This is the
|
| 134 |
+
leaderboard view β "best representative number per cell" β and is what
|
| 135 |
+
the dashboard renders in the table, charts, and stat cards.
|
| 136 |
+
|
| 137 |
+
`backend` (CPU vs GPU, derived from nGpuLayers) is part of the key so
|
| 138 |
+
CLI CPU+GPU pairs and browser-flow synthetic CPU rows don't collapse
|
| 139 |
+
into the GPU row. */
|
| 140 |
export function selectBestResults(records) {
|
| 141 |
const bestByCell = new Map();
|
| 142 |
for (const r of records) {
|
| 143 |
+
const backend = r.nGpuLayers === 0 ? 'cpu' : 'gpu';
|
| 144 |
+
const key = `${r.machineSlug}|${r.browser}|${r.model}|${r.variant}|${backend}`;
|
| 145 |
const cur = bestByCell.get(key);
|
| 146 |
if (!cur) {
|
| 147 |
bestByCell.set(key, r);
|
|
|
|
| 160 |
return [...bestByCell.values()];
|
| 161 |
}
|
| 162 |
|
| 163 |
+
/* Synthesize a CPU row for every browser-flow GPU record (the in-page
|
| 164 |
+
bench measures one CPU pass per variant alongside the GPU iterations
|
| 165 |
+
and stamps the result on the same record via cpu_baseline_*). Returns
|
| 166 |
+
only CPU rows β combine real (nGpuLayers === 0) and synthetic ones.
|
| 167 |
+
Used by the CPU-vs-GPU views which want the CPU subset only. */
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
export function expandCpuRows(results) {
|
| 169 |
const real = results.filter(r => r.nGpuLayers === 0);
|
| 170 |
+
const synthetic = synthesizeCpuRowsFromBaseline(results);
|
| 171 |
+
return [...real, ...synthetic];
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
/* Same synthesis as expandCpuRows but returns the originals plus the
|
| 175 |
+
synthesized CPU rows β for the main results table where we want both
|
| 176 |
+
GPU and CPU rows visible. */
|
| 177 |
+
export function withSyntheticCpuRows(results) {
|
| 178 |
+
return [...results, ...synthesizeCpuRowsFromBaseline(results)];
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
function synthesizeCpuRowsFromBaseline(results) {
|
| 182 |
+
return results
|
| 183 |
.filter(r => r.nGpuLayers !== 0
|
| 184 |
&& (r.cpu_baseline_decode_tok_s != null || r.cpu_baseline_prefill_tok_s != null))
|
| 185 |
.map(r => ({
|
| 186 |
...r,
|
| 187 |
decode_tok_s: r.cpu_baseline_decode_tok_s,
|
| 188 |
prefill_tok_s: r.cpu_baseline_prefill_tok_s,
|
| 189 |
+
// CPU baseline runs have no t_eval / n_eval breakdowns β null those
|
| 190 |
+
// out so the table doesn't show stale GPU numbers in CPU rows.
|
| 191 |
+
n_eval: null,
|
| 192 |
+
t_eval_ms: null,
|
| 193 |
+
n_p_eval: null,
|
| 194 |
+
t_p_eval_ms: null,
|
| 195 |
+
// Strip the embedded baseline from synthetic CPU rows so the
|
| 196 |
+
// "CPU decode tok/s" column doesn't duplicate the row's own metric.
|
| 197 |
+
cpu_baseline_decode_tok_s: null,
|
| 198 |
+
cpu_baseline_prefill_tok_s: null,
|
| 199 |
+
cpu_baseline: null,
|
| 200 |
nGpuLayers: 0,
|
| 201 |
}));
|
|
|
|
| 202 |
}
|
| 203 |
|
| 204 |
export function filterResults(results, filters) {
|