joelniklaus HF Staff commited on
Commit
c94a38b
Β·
1 Parent(s): f9f4445

support different setups so that we can reduce the number of charts a bit

Browse files
app/src/content/embeds/d3-benchmark-comparison.html CHANGED
@@ -3,7 +3,8 @@
3
 
4
  Configuration via data-config attribute:
5
  {
6
- "datasetNames": { "raw_name": "Display Name", ... }, // required
 
7
  "pinnedColors": { "DCLM": "#333", "FineWeb-Edu (HQ)": "#86a1a9" }, // optional
8
  "baselines": ["dclm", "fw_edu_hq"], // optional, raw keys for baseline datasets (dashed lines, striped bars). Default: ["dclm", "fw_edu_hq"]
9
  "defaultMetric": "agg_score_macro", // optional, default: "agg_score_macro"
@@ -184,7 +185,15 @@
184
  } catch (_) {}
185
 
186
  // Configurable settings with defaults
187
- const DATASET_NAMES = cfg.datasetNames || {};
 
 
 
 
 
 
 
 
188
  const RUN_COL = cfg.runColumn || 'runname';
189
  const STEP_COL = cfg.stepColumn || 'steps';
190
  const TOKENS_PER_STEP = cfg.tokensPerStep || 2.1e6;
@@ -286,6 +295,78 @@
286
  unpinned.forEach((raw, i) => { colorMap[raw] = palette[i % palette.length]; });
287
  }
288
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
  function showTip(html, x, y) {
290
  tipInner.innerHTML = html;
291
  const tipW = tip.offsetWidth || 180;
@@ -545,6 +626,26 @@
545
  function buildUI() {
546
  const controls = document.createElement('div'); controls.className = 'controls';
547
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
548
  // View toggle
549
  const viewGroup = document.createElement('div'); viewGroup.className = 'control-group';
550
  const viewLabel = document.createElement('label'); viewLabel.setAttribute('for', 'view-' + uid); viewLabel.textContent = 'View';
@@ -644,10 +745,16 @@
644
  try {
645
  const text = await fetchFirstAvailable(csvPaths);
646
  const parsed = d3.csvParse(text);
 
 
 
 
 
 
 
 
647
  // Filter to only datasets with configured display names
648
- const knownNames = Object.keys(DATASET_NAMES);
649
- allData = knownNames.length ? parsed.filter(r => knownNames.includes(r[RUN_COL])) : parsed;
650
- allData.columns = parsed.columns;
651
  metricKeys = detectMetrics(allData.columns);
652
  // Ensure defaultMetric is valid; fall back to first available
653
  if (!metricKeys.includes(currentMetric)) currentMetric = metricKeys[0];
 
3
 
4
  Configuration via data-config attribute:
5
  {
6
+ "datasetNames": { "raw_name": "Display Name", ... }, // required (unless using setups)
7
+ "setups": { "Setup Label": { "datasetNames": {...} }, ... }, // optional, multi-setup mode with dropdown + average
8
  "pinnedColors": { "DCLM": "#333", "FineWeb-Edu (HQ)": "#86a1a9" }, // optional
9
  "baselines": ["dclm", "fw_edu_hq"], // optional, raw keys for baseline datasets (dashed lines, striped bars). Default: ["dclm", "fw_edu_hq"]
10
  "defaultMetric": "agg_score_macro", // optional, default: "agg_score_macro"
 
185
  } catch (_) {}
186
 
187
  // Configurable settings with defaults
188
+ // ─── SETUP SUPPORT ───
189
+ const SETUPS = cfg.setups || null;
190
+ const setupNames = SETUPS ? Object.keys(SETUPS) : [];
191
+ let currentSetup = SETUPS ? setupNames[0] : null;
192
+ let DATASET_NAMES = SETUPS ? { ...SETUPS[setupNames[0]].datasetNames } : (cfg.datasetNames || {});
193
+ const AVG_SETUP_KEY = 'Average (all setups)';
194
+ let avgDatasetNames = {};
195
+ let parsedData = [];
196
+
197
  const RUN_COL = cfg.runColumn || 'runname';
198
  const STEP_COL = cfg.stepColumn || 'steps';
199
  const TOKENS_PER_STEP = cfg.tokensPerStep || 2.1e6;
 
295
  unpinned.forEach((raw, i) => { colorMap[raw] = palette[i % palette.length]; });
296
  }
297
 
298
+ // ─── SETUP HELPERS ───
299
+ function filterData() {
300
+ const knownNames = Object.keys(DATASET_NAMES);
301
+ allData = knownNames.length ? parsedData.filter(r => knownNames.includes(r[RUN_COL])) : parsedData;
302
+ allData.columns = parsedData.columns;
303
+ }
304
+
305
+ function computeAverageData(rawData) {
306
+ if (!SETUPS || setupNames.length < 2) return { data: [], datasetNames: {} };
307
+ // Build mapping: displayName -> [rawName1, rawName2, ...]
308
+ const displayToRaws = {};
309
+ for (const sName of setupNames) {
310
+ const dn = SETUPS[sName].datasetNames;
311
+ for (const [raw, display] of Object.entries(dn)) {
312
+ if (!displayToRaws[display]) displayToRaws[display] = [];
313
+ displayToRaws[display].push(raw);
314
+ }
315
+ }
316
+ // Only average display names that appear in ALL setups
317
+ const fullDisplay = Object.entries(displayToRaws)
318
+ .filter(([, raws]) => raws.length >= setupNames.length);
319
+ // Index raw data by runname+step for fast lookup
320
+ const byRunStep = {};
321
+ for (const row of rawData) byRunStep[row[RUN_COL] + '|' + row[STEP_COL]] = row;
322
+ const steps = Array.from(new Set(rawData.map(r => +r[STEP_COL]))).sort((a, b) => a - b);
323
+ const cols = rawData.columns || Object.keys(rawData[0] || {});
324
+ const result = [];
325
+ const dnMap = {};
326
+ for (const [display, raws] of fullDisplay) {
327
+ const avgRaw = '__avg__' + display.replace(/[^a-zA-Z0-9]/g, '_');
328
+ dnMap[avgRaw] = display;
329
+ for (const step of steps) {
330
+ const rows = raws.map(r => byRunStep[r + '|' + step]).filter(Boolean);
331
+ if (!rows.length) continue;
332
+ const avgRow = { [RUN_COL]: avgRaw, [STEP_COL]: String(step) };
333
+ for (const col of cols) {
334
+ if (col === RUN_COL || col === STEP_COL) continue;
335
+ const vals = rows.map(r => +r[col]).filter(v => !isNaN(v));
336
+ avgRow[col] = vals.length ? vals.reduce((a, b) => a + b, 0) / vals.length : 0;
337
+ }
338
+ result.push(avgRow);
339
+ }
340
+ }
341
+ return { data: result, datasetNames: dnMap };
342
+ }
343
+
344
+ function switchSetup(name) {
345
+ currentSetup = name;
346
+ if (name === AVG_SETUP_KEY) {
347
+ DATASET_NAMES = { ...avgDatasetNames };
348
+ } else {
349
+ DATASET_NAMES = { ...SETUPS[name].datasetNames };
350
+ }
351
+ // Re-add baselines that may be shared across setups
352
+ const baselineNames = cfg.baselines || ['dclm', 'fw_edu_hq'];
353
+ for (const bRaw of baselineNames) {
354
+ if (parsedData.some(r => r[RUN_COL] === bRaw) && !DATASET_NAMES[bRaw]) {
355
+ // Find display name from any setup or use raw
356
+ let bDisplay = bRaw;
357
+ for (const sName of setupNames) {
358
+ if (SETUPS[sName].datasetNames[bRaw]) { bDisplay = SETUPS[sName].datasetNames[bRaw]; break; }
359
+ }
360
+ DATASET_NAMES[bRaw] = bDisplay;
361
+ }
362
+ }
363
+ colorMap = {};
364
+ filterData();
365
+ initColors();
366
+ render();
367
+ buildLegend();
368
+ }
369
+
370
  function showTip(html, x, y) {
371
  tipInner.innerHTML = html;
372
  const tipW = tip.offsetWidth || 180;
 
626
  function buildUI() {
627
  const controls = document.createElement('div'); controls.className = 'controls';
628
 
629
+ // Setup selector (only shown when setups config is present)
630
+ if (SETUPS && setupNames.length > 0) {
631
+ const setupGroup = document.createElement('div'); setupGroup.className = 'control-group';
632
+ const setupLabel = document.createElement('label'); setupLabel.setAttribute('for', 'setup-' + uid); setupLabel.textContent = 'Setup';
633
+ const setupSelect = document.createElement('select'); setupSelect.id = 'setup-' + uid;
634
+ setupNames.forEach(name => {
635
+ const opt = document.createElement('option'); opt.value = name; opt.textContent = name;
636
+ if (name === currentSetup) opt.selected = true;
637
+ setupSelect.appendChild(opt);
638
+ });
639
+ // Add Average option
640
+ if (setupNames.length >= 2) {
641
+ const avgOpt = document.createElement('option'); avgOpt.value = AVG_SETUP_KEY; avgOpt.textContent = AVG_SETUP_KEY;
642
+ setupSelect.appendChild(avgOpt);
643
+ }
644
+ setupSelect.addEventListener('change', () => { switchSetup(setupSelect.value); });
645
+ setupGroup.appendChild(setupLabel); setupGroup.appendChild(setupSelect);
646
+ controls.appendChild(setupGroup);
647
+ }
648
+
649
  // View toggle
650
  const viewGroup = document.createElement('div'); viewGroup.className = 'control-group';
651
  const viewLabel = document.createElement('label'); viewLabel.setAttribute('for', 'view-' + uid); viewLabel.textContent = 'View';
 
745
  try {
746
  const text = await fetchFirstAvailable(csvPaths);
747
  const parsed = d3.csvParse(text);
748
+ parsedData = parsed;
749
+ // Compute average data for setup mode
750
+ if (SETUPS && setupNames.length >= 2) {
751
+ const avg = computeAverageData(parsed);
752
+ avgDatasetNames = avg.datasetNames;
753
+ parsedData = parsed.concat(avg.data);
754
+ parsedData.columns = parsed.columns;
755
+ }
756
  // Filter to only datasets with configured display names
757
+ filterData();
 
 
758
  metricKeys = detectMetrics(allData.columns);
759
  // Ensure defaultMetric is valid; fall back to first available
760
  if (!metricKeys.includes(currentMetric)) currentMetric = metricKeys[0];