Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
Commit Β·
c94a38b
1
Parent(s): f9f4445
support different setups so that we can reduce the number of charts a bit
Browse files
app/src/content/embeds/d3-benchmark-comparison.html
CHANGED
|
@@ -3,7 +3,8 @@
|
|
| 3 |
|
| 4 |
Configuration via data-config attribute:
|
| 5 |
{
|
| 6 |
-
"datasetNames": { "raw_name": "Display Name", ... }, // required
|
|
|
|
| 7 |
"pinnedColors": { "DCLM": "#333", "FineWeb-Edu (HQ)": "#86a1a9" }, // optional
|
| 8 |
"baselines": ["dclm", "fw_edu_hq"], // optional, raw keys for baseline datasets (dashed lines, striped bars). Default: ["dclm", "fw_edu_hq"]
|
| 9 |
"defaultMetric": "agg_score_macro", // optional, default: "agg_score_macro"
|
|
@@ -184,7 +185,15 @@
|
|
| 184 |
} catch (_) {}
|
| 185 |
|
| 186 |
// Configurable settings with defaults
|
| 187 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
const RUN_COL = cfg.runColumn || 'runname';
|
| 189 |
const STEP_COL = cfg.stepColumn || 'steps';
|
| 190 |
const TOKENS_PER_STEP = cfg.tokensPerStep || 2.1e6;
|
|
@@ -286,6 +295,78 @@
|
|
| 286 |
unpinned.forEach((raw, i) => { colorMap[raw] = palette[i % palette.length]; });
|
| 287 |
}
|
| 288 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 289 |
function showTip(html, x, y) {
|
| 290 |
tipInner.innerHTML = html;
|
| 291 |
const tipW = tip.offsetWidth || 180;
|
|
@@ -545,6 +626,26 @@
|
|
| 545 |
function buildUI() {
|
| 546 |
const controls = document.createElement('div'); controls.className = 'controls';
|
| 547 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 548 |
// View toggle
|
| 549 |
const viewGroup = document.createElement('div'); viewGroup.className = 'control-group';
|
| 550 |
const viewLabel = document.createElement('label'); viewLabel.setAttribute('for', 'view-' + uid); viewLabel.textContent = 'View';
|
|
@@ -644,10 +745,16 @@
|
|
| 644 |
try {
|
| 645 |
const text = await fetchFirstAvailable(csvPaths);
|
| 646 |
const parsed = d3.csvParse(text);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 647 |
// Filter to only datasets with configured display names
|
| 648 |
-
|
| 649 |
-
allData = knownNames.length ? parsed.filter(r => knownNames.includes(r[RUN_COL])) : parsed;
|
| 650 |
-
allData.columns = parsed.columns;
|
| 651 |
metricKeys = detectMetrics(allData.columns);
|
| 652 |
// Ensure defaultMetric is valid; fall back to first available
|
| 653 |
if (!metricKeys.includes(currentMetric)) currentMetric = metricKeys[0];
|
|
|
|
| 3 |
|
| 4 |
Configuration via data-config attribute:
|
| 5 |
{
|
| 6 |
+
"datasetNames": { "raw_name": "Display Name", ... }, // required (unless using setups)
|
| 7 |
+
"setups": { "Setup Label": { "datasetNames": {...} }, ... }, // optional, multi-setup mode with dropdown + average
|
| 8 |
"pinnedColors": { "DCLM": "#333", "FineWeb-Edu (HQ)": "#86a1a9" }, // optional
|
| 9 |
"baselines": ["dclm", "fw_edu_hq"], // optional, raw keys for baseline datasets (dashed lines, striped bars). Default: ["dclm", "fw_edu_hq"]
|
| 10 |
"defaultMetric": "agg_score_macro", // optional, default: "agg_score_macro"
|
|
|
|
| 185 |
} catch (_) {}
|
| 186 |
|
| 187 |
// Configurable settings with defaults
|
| 188 |
+
// βββ SETUP SUPPORT βββ
|
| 189 |
+
const SETUPS = cfg.setups || null;
|
| 190 |
+
const setupNames = SETUPS ? Object.keys(SETUPS) : [];
|
| 191 |
+
let currentSetup = SETUPS ? setupNames[0] : null;
|
| 192 |
+
let DATASET_NAMES = SETUPS ? { ...SETUPS[setupNames[0]].datasetNames } : (cfg.datasetNames || {});
|
| 193 |
+
const AVG_SETUP_KEY = 'Average (all setups)';
|
| 194 |
+
let avgDatasetNames = {};
|
| 195 |
+
let parsedData = [];
|
| 196 |
+
|
| 197 |
const RUN_COL = cfg.runColumn || 'runname';
|
| 198 |
const STEP_COL = cfg.stepColumn || 'steps';
|
| 199 |
const TOKENS_PER_STEP = cfg.tokensPerStep || 2.1e6;
|
|
|
|
| 295 |
unpinned.forEach((raw, i) => { colorMap[raw] = palette[i % palette.length]; });
|
| 296 |
}
|
| 297 |
|
| 298 |
+
// βββ SETUP HELPERS βββ
|
| 299 |
+
function filterData() {
|
| 300 |
+
const knownNames = Object.keys(DATASET_NAMES);
|
| 301 |
+
allData = knownNames.length ? parsedData.filter(r => knownNames.includes(r[RUN_COL])) : parsedData;
|
| 302 |
+
allData.columns = parsedData.columns;
|
| 303 |
+
}
|
| 304 |
+
|
| 305 |
+
function computeAverageData(rawData) {
|
| 306 |
+
if (!SETUPS || setupNames.length < 2) return { data: [], datasetNames: {} };
|
| 307 |
+
// Build mapping: displayName -> [rawName1, rawName2, ...]
|
| 308 |
+
const displayToRaws = {};
|
| 309 |
+
for (const sName of setupNames) {
|
| 310 |
+
const dn = SETUPS[sName].datasetNames;
|
| 311 |
+
for (const [raw, display] of Object.entries(dn)) {
|
| 312 |
+
if (!displayToRaws[display]) displayToRaws[display] = [];
|
| 313 |
+
displayToRaws[display].push(raw);
|
| 314 |
+
}
|
| 315 |
+
}
|
| 316 |
+
// Only average display names that appear in ALL setups
|
| 317 |
+
const fullDisplay = Object.entries(displayToRaws)
|
| 318 |
+
.filter(([, raws]) => raws.length >= setupNames.length);
|
| 319 |
+
// Index raw data by runname+step for fast lookup
|
| 320 |
+
const byRunStep = {};
|
| 321 |
+
for (const row of rawData) byRunStep[row[RUN_COL] + '|' + row[STEP_COL]] = row;
|
| 322 |
+
const steps = Array.from(new Set(rawData.map(r => +r[STEP_COL]))).sort((a, b) => a - b);
|
| 323 |
+
const cols = rawData.columns || Object.keys(rawData[0] || {});
|
| 324 |
+
const result = [];
|
| 325 |
+
const dnMap = {};
|
| 326 |
+
for (const [display, raws] of fullDisplay) {
|
| 327 |
+
const avgRaw = '__avg__' + display.replace(/[^a-zA-Z0-9]/g, '_');
|
| 328 |
+
dnMap[avgRaw] = display;
|
| 329 |
+
for (const step of steps) {
|
| 330 |
+
const rows = raws.map(r => byRunStep[r + '|' + step]).filter(Boolean);
|
| 331 |
+
if (!rows.length) continue;
|
| 332 |
+
const avgRow = { [RUN_COL]: avgRaw, [STEP_COL]: String(step) };
|
| 333 |
+
for (const col of cols) {
|
| 334 |
+
if (col === RUN_COL || col === STEP_COL) continue;
|
| 335 |
+
const vals = rows.map(r => +r[col]).filter(v => !isNaN(v));
|
| 336 |
+
avgRow[col] = vals.length ? vals.reduce((a, b) => a + b, 0) / vals.length : 0;
|
| 337 |
+
}
|
| 338 |
+
result.push(avgRow);
|
| 339 |
+
}
|
| 340 |
+
}
|
| 341 |
+
return { data: result, datasetNames: dnMap };
|
| 342 |
+
}
|
| 343 |
+
|
| 344 |
+
function switchSetup(name) {
|
| 345 |
+
currentSetup = name;
|
| 346 |
+
if (name === AVG_SETUP_KEY) {
|
| 347 |
+
DATASET_NAMES = { ...avgDatasetNames };
|
| 348 |
+
} else {
|
| 349 |
+
DATASET_NAMES = { ...SETUPS[name].datasetNames };
|
| 350 |
+
}
|
| 351 |
+
// Re-add baselines that may be shared across setups
|
| 352 |
+
const baselineNames = cfg.baselines || ['dclm', 'fw_edu_hq'];
|
| 353 |
+
for (const bRaw of baselineNames) {
|
| 354 |
+
if (parsedData.some(r => r[RUN_COL] === bRaw) && !DATASET_NAMES[bRaw]) {
|
| 355 |
+
// Find display name from any setup or use raw
|
| 356 |
+
let bDisplay = bRaw;
|
| 357 |
+
for (const sName of setupNames) {
|
| 358 |
+
if (SETUPS[sName].datasetNames[bRaw]) { bDisplay = SETUPS[sName].datasetNames[bRaw]; break; }
|
| 359 |
+
}
|
| 360 |
+
DATASET_NAMES[bRaw] = bDisplay;
|
| 361 |
+
}
|
| 362 |
+
}
|
| 363 |
+
colorMap = {};
|
| 364 |
+
filterData();
|
| 365 |
+
initColors();
|
| 366 |
+
render();
|
| 367 |
+
buildLegend();
|
| 368 |
+
}
|
| 369 |
+
|
| 370 |
function showTip(html, x, y) {
|
| 371 |
tipInner.innerHTML = html;
|
| 372 |
const tipW = tip.offsetWidth || 180;
|
|
|
|
| 626 |
function buildUI() {
|
| 627 |
const controls = document.createElement('div'); controls.className = 'controls';
|
| 628 |
|
| 629 |
+
// Setup selector (only shown when setups config is present)
|
| 630 |
+
if (SETUPS && setupNames.length > 0) {
|
| 631 |
+
const setupGroup = document.createElement('div'); setupGroup.className = 'control-group';
|
| 632 |
+
const setupLabel = document.createElement('label'); setupLabel.setAttribute('for', 'setup-' + uid); setupLabel.textContent = 'Setup';
|
| 633 |
+
const setupSelect = document.createElement('select'); setupSelect.id = 'setup-' + uid;
|
| 634 |
+
setupNames.forEach(name => {
|
| 635 |
+
const opt = document.createElement('option'); opt.value = name; opt.textContent = name;
|
| 636 |
+
if (name === currentSetup) opt.selected = true;
|
| 637 |
+
setupSelect.appendChild(opt);
|
| 638 |
+
});
|
| 639 |
+
// Add Average option
|
| 640 |
+
if (setupNames.length >= 2) {
|
| 641 |
+
const avgOpt = document.createElement('option'); avgOpt.value = AVG_SETUP_KEY; avgOpt.textContent = AVG_SETUP_KEY;
|
| 642 |
+
setupSelect.appendChild(avgOpt);
|
| 643 |
+
}
|
| 644 |
+
setupSelect.addEventListener('change', () => { switchSetup(setupSelect.value); });
|
| 645 |
+
setupGroup.appendChild(setupLabel); setupGroup.appendChild(setupSelect);
|
| 646 |
+
controls.appendChild(setupGroup);
|
| 647 |
+
}
|
| 648 |
+
|
| 649 |
// View toggle
|
| 650 |
const viewGroup = document.createElement('div'); viewGroup.className = 'control-group';
|
| 651 |
const viewLabel = document.createElement('label'); viewLabel.setAttribute('for', 'view-' + uid); viewLabel.textContent = 'View';
|
|
|
|
| 745 |
try {
|
| 746 |
const text = await fetchFirstAvailable(csvPaths);
|
| 747 |
const parsed = d3.csvParse(text);
|
| 748 |
+
parsedData = parsed;
|
| 749 |
+
// Compute average data for setup mode
|
| 750 |
+
if (SETUPS && setupNames.length >= 2) {
|
| 751 |
+
const avg = computeAverageData(parsed);
|
| 752 |
+
avgDatasetNames = avg.datasetNames;
|
| 753 |
+
parsedData = parsed.concat(avg.data);
|
| 754 |
+
parsedData.columns = parsed.columns;
|
| 755 |
+
}
|
| 756 |
// Filter to only datasets with configured display names
|
| 757 |
+
filterData();
|
|
|
|
|
|
|
| 758 |
metricKeys = detectMetrics(allData.columns);
|
| 759 |
// Ensure defaultMetric is valid; fall back to first available
|
| 760 |
if (!metricKeys.includes(currentMetric)) currentMetric = metricKeys[0];
|