Spaces:

abhijitramesh
/

webgpu-bench

Running

App Files Files Community

GitHub Actions commited on about 1 month ago

Commit

1683f65

1 Parent(s): 92ad589

sync from abhijitramesh/webgpu-bench@f0e4a60c2d

Browse files

Files changed (3) hide show

js/app.js +8 -11
js/data.js +34 -0
js/tables.js +1 -10

js/app.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { loadData, filterResults, selectBestResults, expandCpuRows, withSyntheticCpuRows } from './data.js';
 import { initFilters, populateQuantOptions, getFilters, resetFilters } from './filters.js';
 import { renderDecodeChart, renderPrefillChart, renderSizeChart, renderMachineChart, renderCpuGpuChart, renderSpeedupChart } from './charts.js';
 import { renderResultsTable, renderErrorTable, renderMachineInfo, renderCpuGpuTable } from './tables.js';
@@ -76,17 +76,14 @@ function render() {
   Chart.defaults.plugins.tooltip.bodyColor = isDark ? '#a1a1aa' : '#71717a';
   const filters = getFilters();
-  // Filter, then collapse to one canonical row per
-  // (machine, browser, model, variant, backend). Multiple users may submit
-  // results for the same hardware bucket; this keeps the row with the
-  // highest iteration count (tiebreak: most recent) so the leaderboard
-  // shows the most reliable number per cell rather than averaging noisy
-  // duplicates. withSyntheticCpuRows expands each browser-flow record's
-  // cpu_baseline_* into a sibling CPU row so CPU runs (CLI or browser)
-  // appear as their own row in the main table.
   const filtered = selectBestResults(
-    withSyntheticCpuRows(filterResults(appData.results, filters)),
-  );
   // Summary cards — counts tween from previous value to new on filter changes
   // and from 0 on first paint (since `data-value` defaults to "0").

+import { loadData, filterResults, selectBestResults, expandCpuRows, attachCpuBaselineFromCpuRecords } from './data.js';
 import { initFilters, populateQuantOptions, getFilters, resetFilters } from './filters.js';
 import { renderDecodeChart, renderPrefillChart, renderSizeChart, renderMachineChart, renderCpuGpuChart, renderSpeedupChart } from './charts.js';
 import { renderResultsTable, renderErrorTable, renderMachineInfo, renderCpuGpuTable } from './tables.js';
   Chart.defaults.plugins.tooltip.bodyColor = isDark ? '#a1a1aa' : '#71717a';
   const filters = getFilters();
+  // Filter, attach CPU baseline values (folds CLI-flow CPU records onto
+  // their GPU sibling so both submission paths produce one row per cell),
+  // collapse to one canonical row per (machine, browser, model, variant,
+  // backend), then drop the now-redundant CPU rows. The CPU numbers stay
+  // visible via the cpu_baseline_* columns on each GPU row.
   const filtered = selectBestResults(
+    attachCpuBaselineFromCpuRecords(filterResults(appData.results, filters)),
+  ).filter(r => r.nGpuLayers !== 0);
   // Summary cards — counts tween from previous value to new on filter changes
   // and from 0 on first paint (since `data-value` defaults to "0").

js/data.js CHANGED Viewed

@@ -160,6 +160,40 @@ export function selectBestResults(records) {
   return [...bestByCell.values()];
 }
 /* Synthesize a CPU row for every browser-flow GPU record (the in-page
    bench measures one CPU pass per variant alongside the GPU iterations
    and stamps the result on the same record via cpu_baseline_*). Returns

   return [...bestByCell.values()];
 }
+/* For CLI-flow records that ship CPU and GPU as separate dataset entries,
+   look up each GPU record's matching CPU companion (same machine, browser,
+   model, variant) and copy its perf into cpu_baseline_*. After this pass,
+   GPU records from both submission paths (browser, CLI) carry their CPU
+   baseline inline, so the main table can render a single row per cell with
+   both numbers side-by-side. No-op on records that already have
+   cpu_baseline_* (e.g. browser-flow records, where controller.makeRecord
+   embeds it at write time). */
+export function attachCpuBaselineFromCpuRecords(results) {
+  const cpuByCell = new Map();
+  for (const r of results) {
+    if (r.nGpuLayers === 0 && r.status === 'done' && (r.decode_tok_s != null || r.prefill_tok_s != null)) {
+      const key = `${r.machineSlug}|${r.browser}|${r.model}|${r.variant}`;
+      const cur = cpuByCell.get(key);
+      // Most-recent wins on tiebreak — matches selectBestResults() semantics.
+      if (!cur || (r.timestamp || '') > (cur.timestamp || '')) {
+        cpuByCell.set(key, r);
+      }
+    }
+  }
+  return results.map(r => {
+    if (r.nGpuLayers === 0) return r;
+    if (r.cpu_baseline_decode_tok_s != null || r.cpu_baseline_prefill_tok_s != null) return r;
+    const key = `${r.machineSlug}|${r.browser}|${r.model}|${r.variant}`;
+    const cpu = cpuByCell.get(key);
+    if (!cpu) return r;
+    return {
+      ...r,
+      cpu_baseline_decode_tok_s: cpu.decode_tok_s ?? null,
+      cpu_baseline_prefill_tok_s: cpu.prefill_tok_s ?? null,
+    };
+  });
+}
 /* Synthesize a CPU row for every browser-flow GPU record (the in-page
    bench measures one CPU pass per variant alongside the GPU iterations
    and stamps the result on the same record via cpu_baseline_*). Returns

js/tables.js CHANGED Viewed

@@ -8,7 +8,7 @@ const NUM_KEYS = new Set([
   'sizeMB', 'decode_tok_s', 'prefill_tok_s',
   'cpu_baseline_decode_tok_s', 'cpu_baseline_prefill_tok_s',
   'n_eval', 't_eval_ms',
-  'n_p_eval', 't_p_eval_ms', 'wallTimeMs', 'consistency_rate', 'nGpuLayers',
 ]);
 function sortResults(results, key, dir) {
@@ -72,7 +72,6 @@ export function renderResultsTable(results) {
     { key: 'sizeMB', label: 'Size (MB)', priority: 3 },
     { key: 'browser', label: 'Browser', priority: 2 },
     { key: 'submittedBy', label: 'Submitter', priority: 2 },
-    { key: 'nGpuLayers', label: 'Backend', priority: 2 },
     { key: 'status', label: 'Status', priority: 1 },
     { key: 'buildType', label: 'Build', priority: 3 },
     { key: 'webgpuAvailable', label: 'WebGPU', priority: 3 },
@@ -117,14 +116,6 @@ export function renderResultsTable(results) {
             ? '<span class="badge badge--pass">PASS</span>'
             : '<span class="badge badge--fail">FAIL</span>';
           break;
-        case 'nGpuLayers':
-          if (r.nGpuLayers != null) {
-            const isCpu = r.nGpuLayers === 0;
-            html += `<span class="badge ${isCpu ? 'badge--cpu' : 'badge--webgpu'}">${isCpu ? 'CPU' : 'WebGPU'}</span>`;
-          } else {
-            html += '<span class="text-muted">\u2014</span>';
-          }
-          break;
         case 'webgpuAvailable':
           html += r.webgpuAvailable
             ? '<span class="badge badge--yes">Yes</span>'

   'sizeMB', 'decode_tok_s', 'prefill_tok_s',
   'cpu_baseline_decode_tok_s', 'cpu_baseline_prefill_tok_s',
   'n_eval', 't_eval_ms',
+  'n_p_eval', 't_p_eval_ms', 'wallTimeMs', 'consistency_rate',
 ]);
 function sortResults(results, key, dir) {
     { key: 'sizeMB', label: 'Size (MB)', priority: 3 },
     { key: 'browser', label: 'Browser', priority: 2 },
     { key: 'submittedBy', label: 'Submitter', priority: 2 },
     { key: 'status', label: 'Status', priority: 1 },
     { key: 'buildType', label: 'Build', priority: 3 },
     { key: 'webgpuAvailable', label: 'WebGPU', priority: 3 },
             ? '<span class="badge badge--pass">PASS</span>'
             : '<span class="badge badge--fail">FAIL</span>';
           break;
         case 'webgpuAvailable':
           html += r.webgpuAvailable
             ? '<span class="badge badge--yes">Yes</span>'