Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
| <!-- | |
| Reusable bar/line chart for benchmark comparisons. | |
| Configuration via data-config attribute: | |
| { | |
| "datasetNames": { "raw_name": "Display Name", ... }, // required (unless using setups) | |
| "setups": { "Setup Label": { "datasetNames": {...} }, ... }, // optional, multi-setup mode with dropdown + average | |
| "pinnedColors": { "DCLM": "#333", "FineWeb-Edu (HQ)": "#86a1a9" }, // optional | |
| "baselines": ["dclm", "fw_edu_hq"], // optional, raw keys for baseline datasets (dashed lines, striped bars). Default: ["dclm", "fw_edu_hq"] | |
| "defaultMetric": "agg_score_macro", // optional, default: "agg_score_macro" | |
| "defaultView": "bar", // optional, "bar" | "line", default: "bar" | |
| "tokensPerStep": 2100000, // optional, default: 2.1e6 | |
| "runColumn": "runname", // optional, CSV column for series, default: "runname" | |
| "stepColumn": "steps" // optional, CSV column for x-axis, default: "steps" | |
| } | |
| Data: uses benchmark-results.csv by default (one CSV with all runs). | |
| Only rows matching keys in datasetNames are displayed. | |
| Example usage in MDX: | |
| <HtmlEmbed | |
| src="d3-benchmark-comparison.html" | |
| title="Baseline Comparison" | |
| config={{ | |
| datasetNames: { | |
| cosmopedia: "Cosmopedia", | |
| dclm: "DCLM", | |
| fw_edu_hq: "FineWeb-Edu (HQ)" | |
| } | |
| }} | |
| /> | |
| --> | |
| <div class="d3-benchmark-comparison"></div> | |
| <style> | |
| .d3-benchmark-comparison { position: relative; } | |
| .d3-benchmark-comparison .controls { | |
| display: flex; | |
| gap: 16px; | |
| align-items: flex-end; | |
| justify-content: center; | |
| margin: 10px 0 0 0; | |
| } | |
| .d3-benchmark-comparison .controls .control-group { | |
| display: flex; | |
| flex-direction: column; | |
| align-items: flex-start; | |
| gap: 6px; | |
| } | |
| .d3-benchmark-comparison .controls label { | |
| font-size: 12px; | |
| font-weight: 700; | |
| color: var(--text-color); | |
| } | |
| .d3-benchmark-comparison .controls select { | |
| appearance: none; | |
| -webkit-appearance: none; | |
| -moz-appearance: none; | |
| border: 1px solid var(--border-color); | |
| border-radius: 8px; | |
| padding: 6px 28px 6px 10px; | |
| background-color: var(--surface-bg); | |
| color: var(--text-color); | |
| font-size: 13px; | |
| line-height: 1.2; | |
| background-image: url("data:image/svg+xml,%3Csvg width='12' height='8' viewBox='0 0 12 8' xmlns='http://www.w3.org/2000/svg'%3E%3Cpath d='M1.41 1.59L6 6.17l4.59-4.58L12 3 6 9 0 3z' fill='%23999'/%3E%3C/svg%3E"); | |
| background-repeat: no-repeat; | |
| background-position: right 8px center; | |
| } | |
| .d3-benchmark-comparison .controls select:focus-visible { | |
| outline: 2px solid var(--primary-color); | |
| outline-offset: 2px; | |
| } | |
| .d3-benchmark-comparison .legend { | |
| display: flex; | |
| flex-direction: column; | |
| align-items: flex-start; | |
| gap: 6px; | |
| margin: 8px 0 0 0; | |
| padding-bottom: 4px; | |
| } | |
| .d3-benchmark-comparison .legend .legend-title { | |
| font-size: 12px; | |
| font-weight: 700; | |
| color: var(--text-color); | |
| } | |
| .d3-benchmark-comparison .legend .items { | |
| display: flex; | |
| flex-wrap: wrap; | |
| gap: 8px 14px; | |
| } | |
| .d3-benchmark-comparison .legend .item { | |
| display: inline-flex; | |
| align-items: center; | |
| gap: 6px; | |
| white-space: nowrap; | |
| font-size: 12px; | |
| color: var(--text-color); | |
| cursor: pointer; | |
| } | |
| .d3-benchmark-comparison .legend .item.ghost { opacity: .25; } | |
| .d3-benchmark-comparison .legend .swatch { | |
| width: 14px; | |
| height: 14px; | |
| border-radius: 3px; | |
| border: 1px solid var(--border-color); | |
| } | |
| .d3-benchmark-comparison .bar.ghost { opacity: .25; } | |
| .d3-benchmark-comparison .value-label.ghost { opacity: .25; } | |
| .d3-benchmark-comparison .line-path { fill: none; stroke-width: 2; opacity: 0.85; } | |
| .d3-benchmark-comparison .line-path.baseline { stroke-dasharray: 6,4; opacity: 0.5; } | |
| .d3-benchmark-comparison .line-path.baseline.ghost { opacity: .1; } | |
| .d3-benchmark-comparison .line-path.ghost { opacity: .15; } | |
| .d3-benchmark-comparison .line-dot.baseline { opacity: 0.5; } | |
| .d3-benchmark-comparison .line-dot.baseline.ghost { opacity: .1; } | |
| .d3-benchmark-comparison .line-dot.ghost { opacity: .15; } | |
| .d3-benchmark-comparison .axes path { display: none; } | |
| .d3-benchmark-comparison .axes line { stroke: var(--axis-color); } | |
| .d3-benchmark-comparison .axes text { fill: var(--tick-color); } | |
| .d3-benchmark-comparison .grid line { stroke: var(--grid-color); } | |
| .d3-benchmark-comparison .hover-line { | |
| stroke: var(--text-color); | |
| stroke-opacity: 0.25; | |
| stroke-width: 1; | |
| pointer-events: none; | |
| } | |
| .d3-benchmark-comparison .d3-tooltip { | |
| position: absolute; | |
| top: 0px; | |
| left: 0px; | |
| transform: translate(-9999px, -9999px); | |
| pointer-events: none; | |
| padding: 8px 10px; | |
| border-radius: 8px; | |
| font-size: 12px; | |
| line-height: 1.35; | |
| border: 1px solid var(--border-color); | |
| background: var(--surface-bg); | |
| color: var(--text-color); | |
| box-shadow: 0 4px 24px rgba(0,0,0,.18); | |
| opacity: 0; | |
| transition: opacity .12s ease; | |
| text-align: left; | |
| z-index: 10; | |
| } | |
| .d3-benchmark-comparison .d3-tooltip .tip-dot { | |
| display: inline-block; | |
| width: 10px; | |
| height: 10px; | |
| border-radius: 3px; | |
| border: 1px solid var(--border-color); | |
| margin-right: 6px; | |
| vertical-align: middle; | |
| } | |
| </style> | |
| <script> | |
| (() => { | |
| const ensureD3 = (cb) => { | |
| if (window.d3 && typeof window.d3.select === 'function') return cb(); | |
| let s = document.getElementById('d3-cdn-script'); | |
| if (!s) { s = document.createElement('script'); s.id = 'd3-cdn-script'; s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js'; document.head.appendChild(s); } | |
| const onReady = () => { if (window.d3 && typeof window.d3.select === 'function') cb(); }; | |
| s.addEventListener('load', onReady, { once: true }); if (window.d3) onReady(); | |
| }; | |
| const bootstrap = () => { | |
| const scriptEl = document.currentScript; | |
| let container = scriptEl ? scriptEl.previousElementSibling : null; | |
| if (!(container && container.classList && container.classList.contains('d3-benchmark-comparison'))) { | |
| const cs = Array.from(document.querySelectorAll('.d3-benchmark-comparison')).filter(el => !(el.dataset && el.dataset.mounted === 'true')); | |
| container = cs[cs.length - 1] || null; | |
| } | |
| if (!container) return; | |
| if (container.dataset) { if (container.dataset.mounted === 'true') return; container.dataset.mounted = 'true'; } | |
| container.style.position = container.style.position || 'relative'; | |
| // ─── READ CONFIG ─── | |
| let mountEl = container; | |
| while (mountEl && !mountEl.getAttribute?.('data-config')) { mountEl = mountEl.parentElement; } | |
| let cfg = {}; | |
| try { | |
| const raw = mountEl && mountEl.getAttribute ? mountEl.getAttribute('data-config') : null; | |
| if (raw && raw.trim()) cfg = raw.trim().startsWith('{') ? JSON.parse(raw) : {}; | |
| } catch (_) {} | |
| // Configurable settings with defaults | |
| // ─── SETUP SUPPORT ─── | |
| const SETUPS = cfg.setups || null; | |
| const setupNames = SETUPS ? Object.keys(SETUPS) : []; | |
| let currentSetup = SETUPS ? setupNames[0] : null; | |
| let DATASET_NAMES = SETUPS ? { ...SETUPS[setupNames[0]].datasetNames } : (cfg.datasetNames || {}); | |
| const AVG_SETUP_KEY = 'Average (all setups)'; | |
| let avgDatasetNames = {}; | |
| let parsedData = []; | |
| const RUN_COL = cfg.runColumn || 'runname'; | |
| const STEP_COL = cfg.stepColumn || 'steps'; | |
| const TOKENS_PER_STEP = cfg.tokensPerStep || 2.1e6; | |
| const defaultMetric = cfg.defaultMetric || 'agg_score_macro'; | |
| const defaultView = cfg.defaultView || 'bar'; | |
| // Stable baseline colors, merged with per-chart overrides | |
| const PINNED_COLORS = Object.assign({ 'DCLM': '#333', 'FineWeb-Edu (HQ)': '#86a1a9' }, cfg.pinnedColors || {}); | |
| // Unique ID suffix for multiple instances on same page | |
| const uid = Math.random().toString(36).slice(2, 8); | |
| // Baseline datasets: dashed lines, striped bars, reduced opacity | |
| const BASELINES = new Set(cfg.baselines || ['dclm', 'fw_edu_hq']); | |
| function isBaseline(raw) { return BASELINES.has(raw); } | |
| function stripePatternId(raw) { return 'stripe-' + uid + '-' + raw.replace(/[^a-zA-Z0-9]/g, '_'); } | |
| function barFill(d) { | |
| if (isBaseline(d.rawName)) return `url(#${stripePatternId(d.rawName)})`; | |
| return colorMap[d.rawName] || 'var(--primary-color)'; | |
| } | |
| // Standard metric display names (shared across all CSVs from this benchmark suite) | |
| const METRIC_NAMES = { | |
| 'agg_score_macro': 'Aggregate Score (Macro)', | |
| 'agg_score_micro': 'Aggregate Score (Micro)', | |
| 'agg_score_RC': 'Reading Comprehension', | |
| 'agg_score_GK': 'General Knowledge', | |
| 'agg_score_NLU': 'Natural Language Understanding', | |
| 'agg_score_MATH': 'Math', | |
| 'agg_score_TABLE': 'Table Understanding', | |
| 'agg_score_RES': 'Reasoning', | |
| 'lighteval|arc_cf:easy|3/prob_norm_token': 'ARC-Easy', | |
| 'lighteval|drop|3/prob_norm_token': 'DROP', | |
| 'lighteval|gsm8k|3/prob_norm_token': 'GSM8K', | |
| 'lighteval|hellaswag_cf|3/prob_norm_token': 'HellaSwag', | |
| 'lighteval|openbookqa_cf|3/prob_norm_token': 'OpenBookQA', | |
| 'lighteval|piqa_cf|3/prob_norm_token': 'PIQA', | |
| 'lighteval|squad_v2|3/prob_norm_token': 'SQuAD v2', | |
| 'lighteval|treb_qa|3/prob_norm_token': 'TriviaQA', | |
| 'lighteval|wikitablequestions|3/prob_norm_token': 'WikiTableQuestions', | |
| 'lighteval|winogrande_cf|3/prob_norm_token': 'Winogrande', | |
| 'lighteval|xcsqa_cf|3/prob_norm_token': 'XCSQA', | |
| 'lighteval|mmlu_redux_cf:_average|3/prob_norm_token': 'MMLU Redux' | |
| }; | |
| // Tooltip | |
| let tip = container.querySelector('.d3-tooltip'), tipInner; | |
| if (!tip) { | |
| tip = document.createElement('div'); tip.className = 'd3-tooltip'; | |
| tipInner = document.createElement('div'); tipInner.className = 'd3-tooltip__inner'; tip.appendChild(tipInner); | |
| container.appendChild(tip); | |
| } else { tipInner = tip.querySelector('.d3-tooltip__inner') || tip; } | |
| // SVG | |
| const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block'); | |
| const gRoot = svg.append('g'); | |
| const defs = svg.append('defs'); | |
| // State | |
| let allData = []; | |
| let metricKeys = []; // auto-detected from CSV columns | |
| let currentMetric = defaultMetric; | |
| let currentView = defaultView; | |
| let colorMap = {}; | |
| let highlight = null; | |
| // ─── HELPERS ─── | |
| function displayName(raw) { return DATASET_NAMES[raw] || raw; } | |
| function metricName(key) { return METRIC_NAMES[key] || key; } | |
| function stepsToTokens(step) { return step * TOKENS_PER_STEP; } | |
| function formatTokens(tokens) { | |
| if (tokens >= 1e9) return d3.format('.2f')(tokens / 1e9) + 'B'; | |
| if (tokens >= 1e6) return d3.format('.1f')(tokens / 1e6) + 'M'; | |
| return d3.format(',')(tokens); | |
| } | |
| function formatStep(step) { | |
| if (step >= 1000) return d3.format('.0f')(step / 1000) + 'K'; | |
| return String(step); | |
| } | |
| function stepLabelShort(step) { return `${formatTokens(stepsToTokens(step))} (${formatStep(step)})`; } | |
| function stepLabelLong(step) { return `${formatTokens(stepsToTokens(step))} Tokens (${formatStep(step)} Steps)`; } | |
| function getCategoricalColors(n) { | |
| try { if (window.ColorPalettes && typeof window.ColorPalettes.getColors === 'function') return window.ColorPalettes.getColors('categorical', n); } catch (_) {} | |
| return (d3.schemeTableau10 || ['#4e79a7','#f28e2b','#e15759','#76b7b2','#59a14f','#edc948','#b07aa1','#ff9da7','#9c755f','#bab0ac']).slice(0, n); | |
| } | |
| function initColors() { | |
| if (Object.keys(colorMap).length) return; | |
| const allRaw = Array.from(d3.group(allData, d => d[RUN_COL]).keys()).sort(); | |
| // Assign pinned colors first (keyed by display name) | |
| const unpinned = []; | |
| allRaw.forEach(raw => { | |
| const name = displayName(raw); | |
| if (PINNED_COLORS[name]) { colorMap[raw] = PINNED_COLORS[name]; } | |
| else { unpinned.push(raw); } | |
| }); | |
| // Fill remaining from categorical palette | |
| const palette = getCategoricalColors(unpinned.length); | |
| unpinned.forEach((raw, i) => { colorMap[raw] = palette[i % palette.length]; }); | |
| } | |
| // ─── SETUP HELPERS ─── | |
| function filterData() { | |
| const knownNames = Object.keys(DATASET_NAMES); | |
| allData = knownNames.length ? parsedData.filter(r => knownNames.includes(r[RUN_COL])) : parsedData; | |
| allData.columns = parsedData.columns; | |
| } | |
| function computeAverageData(rawData) { | |
| if (!SETUPS || setupNames.length < 2) return { data: [], datasetNames: {} }; | |
| // Build mapping: displayName -> [rawName1, rawName2, ...] | |
| const displayToRaws = {}; | |
| for (const sName of setupNames) { | |
| const dn = SETUPS[sName].datasetNames; | |
| for (const [raw, display] of Object.entries(dn)) { | |
| if (!displayToRaws[display]) displayToRaws[display] = []; | |
| displayToRaws[display].push(raw); | |
| } | |
| } | |
| // Only average display names that appear in ALL setups | |
| const fullDisplay = Object.entries(displayToRaws) | |
| .filter(([, raws]) => raws.length >= setupNames.length); | |
| // Index raw data by runname+step for fast lookup | |
| const byRunStep = {}; | |
| for (const row of rawData) byRunStep[row[RUN_COL] + '|' + row[STEP_COL]] = row; | |
| const steps = Array.from(new Set(rawData.map(r => +r[STEP_COL]))).sort((a, b) => a - b); | |
| const cols = rawData.columns || Object.keys(rawData[0] || {}); | |
| const result = []; | |
| const dnMap = {}; | |
| for (const [display, raws] of fullDisplay) { | |
| const avgRaw = '__avg__' + display.replace(/[^a-zA-Z0-9]/g, '_'); | |
| dnMap[avgRaw] = display; | |
| for (const step of steps) { | |
| const rows = raws.map(r => byRunStep[r + '|' + step]).filter(Boolean); | |
| if (!rows.length) continue; | |
| const avgRow = { [RUN_COL]: avgRaw, [STEP_COL]: String(step) }; | |
| for (const col of cols) { | |
| if (col === RUN_COL || col === STEP_COL) continue; | |
| const vals = rows.map(r => +r[col]).filter(v => !isNaN(v)); | |
| avgRow[col] = vals.length ? vals.reduce((a, b) => a + b, 0) / vals.length : 0; | |
| } | |
| result.push(avgRow); | |
| } | |
| } | |
| return { data: result, datasetNames: dnMap }; | |
| } | |
| function switchSetup(name) { | |
| currentSetup = name; | |
| if (name === AVG_SETUP_KEY) { | |
| DATASET_NAMES = { ...avgDatasetNames }; | |
| } else { | |
| DATASET_NAMES = { ...SETUPS[name].datasetNames }; | |
| } | |
| // Re-add baselines that may be shared across setups | |
| const baselineNames = cfg.baselines || ['dclm', 'fw_edu_hq']; | |
| for (const bRaw of baselineNames) { | |
| if (parsedData.some(r => r[RUN_COL] === bRaw) && !DATASET_NAMES[bRaw]) { | |
| // Find display name from any setup or use raw | |
| let bDisplay = bRaw; | |
| for (const sName of setupNames) { | |
| if (SETUPS[sName].datasetNames[bRaw]) { bDisplay = SETUPS[sName].datasetNames[bRaw]; break; } | |
| } | |
| DATASET_NAMES[bRaw] = bDisplay; | |
| } | |
| } | |
| colorMap = {}; | |
| filterData(); | |
| initColors(); | |
| render(); | |
| buildLegend(); | |
| } | |
| function showTip(html, x, y) { | |
| tipInner.innerHTML = html; | |
| const tipW = tip.offsetWidth || 180; | |
| const cW = container.clientWidth || 800; | |
| const px = (x + tipW + 20 > cW) ? x - tipW - 12 : x + 12; | |
| tip.style.transform = `translate(${px}px, ${Math.max(0, y - 20)}px)`; | |
| tip.style.opacity = '1'; | |
| } | |
| function hideTip() { | |
| tip.style.opacity = '0'; | |
| tip.style.transform = 'translate(-9999px, -9999px)'; | |
| } | |
| function updateHighlight() { | |
| gRoot.selectAll('rect.bar').classed('ghost', d => highlight && d.name !== highlight); | |
| gRoot.selectAll('text.value-label').classed('ghost', d => highlight && d.name !== highlight); | |
| gRoot.selectAll('.line-path').classed('ghost', d => highlight && d.name !== highlight); | |
| gRoot.selectAll('.line-dot').classed('ghost', d => highlight && d.name !== highlight); | |
| container.querySelectorAll('.legend .item').forEach(el => { | |
| el.classList.toggle('ghost', highlight && el.getAttribute('data-name') !== highlight); | |
| }); | |
| } | |
| // ─── AUTO-DETECT METRICS from CSV columns ─── | |
| function detectMetrics(columns) { | |
| const skip = new Set([RUN_COL, STEP_COL, 'seed']); | |
| // Ordered: aggregate first, then individual | |
| const aggOrder = ['agg_score_macro', 'agg_score_micro', 'agg_score_RC', 'agg_score_GK', 'agg_score_NLU', 'agg_score_MATH', 'agg_score_TABLE', 'agg_score_RES']; | |
| const agg = aggOrder.filter(k => columns.includes(k)); | |
| const ind = columns.filter(k => !skip.has(k) && !agg.includes(k) && !isNaN(+allData[0][k])); | |
| return [...agg, ...ind]; | |
| } | |
| // ─── BAR CHART ─── | |
| function renderBar() { | |
| const width = container.clientWidth || 800; | |
| const margin = { top: 12, right: 56, bottom: 32, left: 190 }; | |
| const grouped = d3.group(allData, d => d[RUN_COL]); | |
| const finalData = []; | |
| for (const [raw, rows] of grouped) { | |
| const maxStep = d3.max(rows, r => +r[STEP_COL]); | |
| const row = rows.find(r => +r[STEP_COL] === maxStep); | |
| if (row) finalData.push({ name: displayName(raw), rawName: raw, value: +row[currentMetric] }); | |
| } | |
| finalData.sort((a, b) => b.value - a.value); | |
| const barHeight = 28, barGap = 8; | |
| const height = margin.top + margin.bottom + finalData.length * (barHeight + barGap); | |
| svg.attr('width', width).attr('height', height); | |
| gRoot.attr('transform', `translate(${margin.left},${margin.top})`); | |
| const innerWidth = width - margin.left - margin.right; | |
| const innerHeight = height - margin.top - margin.bottom; | |
| const x = d3.scaleLinear().domain([0, d3.max(finalData, d => d.value) * 1.05]).range([0, innerWidth]); | |
| const y = d3.scaleBand().domain(finalData.map(d => d.name)).range([0, innerHeight]).padding(0.2); | |
| // Grid | |
| gRoot.selectAll('.grid').data([0]).join('g').attr('class', 'grid').call(g => { | |
| g.selectAll('line').data(x.ticks(5)).join('line') | |
| .attr('x1', d => x(d)).attr('x2', d => x(d)).attr('y1', 0).attr('y2', innerHeight); | |
| }); | |
| // X axis | |
| gRoot.selectAll('.axis-x').data([0]).join('g').attr('class', 'axes axis-x') | |
| .attr('transform', `translate(0,${innerHeight})`) | |
| .call(d3.axisBottom(x).ticks(5).tickFormat(d3.format('.3f')).tickSizeOuter(0)) | |
| .call(g => { | |
| g.selectAll('text').attr('fill', 'var(--tick-color)').style('font-size', '11px'); | |
| g.selectAll('path, line').attr('stroke', 'var(--axis-color)'); | |
| }); | |
| // Y axis | |
| gRoot.selectAll('.axis-y').data([0]).join('g').attr('class', 'axes axis-y') | |
| .call(d3.axisLeft(y).tickSizeOuter(0)) | |
| .call(g => { | |
| g.selectAll('text').attr('fill', 'var(--text-color)').style('font-size', '12px').style('font-weight', '500'); | |
| g.selectAll('path, line').attr('stroke', 'var(--axis-color)'); | |
| }); | |
| // Bars | |
| // Stripe patterns for baseline bars | |
| finalData.forEach(d => { | |
| if (!isBaseline(d.rawName)) return; | |
| const c = colorMap[d.rawName] || '#999'; | |
| const pat = defs.append('pattern').attr('id', stripePatternId(d.rawName)) | |
| .attr('width', 6).attr('height', 6).attr('patternUnits', 'userSpaceOnUse').attr('patternTransform', 'rotate(45)'); | |
| pat.append('rect').attr('width', 6).attr('height', 6).attr('fill', c).attr('opacity', 0.35); | |
| pat.append('line').attr('x1', 0).attr('y1', 0).attr('x2', 0).attr('y2', 6).attr('stroke', c).attr('stroke-width', 2.5); | |
| }); | |
| const barTip = (ev, d) => { | |
| const [mx, my] = d3.pointer(ev, container); | |
| showTip(`<strong>${d.name}</strong><br/>${metricName(currentMetric)}: <strong>${d.value.toFixed(4)}</strong>`, mx, my); | |
| }; | |
| gRoot.selectAll('rect.bar').data(finalData, d => d.name).join( | |
| enter => enter.append('rect').attr('class', 'bar') | |
| .attr('x', 0).attr('y', d => y(d.name)).attr('height', y.bandwidth()).attr('rx', 3) | |
| .attr('fill', d => barFill(d)) | |
| .attr('width', 0) | |
| .on('mouseenter', (ev, d) => { highlight = d.name; updateHighlight(); }) | |
| .on('mousemove', barTip) | |
| .on('mouseleave', () => { hideTip(); highlight = null; updateHighlight(); }) | |
| .transition().duration(300).attr('width', d => Math.max(0, x(d.value))), | |
| update => update | |
| .on('mouseenter', (ev, d) => { highlight = d.name; updateHighlight(); }) | |
| .on('mousemove', barTip) | |
| .on('mouseleave', () => { hideTip(); highlight = null; updateHighlight(); }) | |
| .transition().duration(300) | |
| .attr('y', d => y(d.name)).attr('height', y.bandwidth()) | |
| .attr('width', d => Math.max(0, x(d.value))) | |
| .attr('fill', d => barFill(d)), | |
| exit => exit.transition().duration(200).attr('width', 0).remove() | |
| ); | |
| // Value labels | |
| gRoot.selectAll('text.value-label').data(finalData, d => d.name).join( | |
| enter => enter.append('text').attr('class', 'value-label') | |
| .attr('x', d => x(d.value) + 5).attr('y', d => y(d.name) + y.bandwidth() / 2) | |
| .attr('dy', '0.35em').attr('fill', 'var(--text-color)').attr('font-size', 11) | |
| .text(d => d.value.toFixed(4)), | |
| update => update.transition().duration(300) | |
| .attr('x', d => x(d.value) + 5).attr('y', d => y(d.name) + y.bandwidth() / 2) | |
| .text(d => d.value.toFixed(4)), | |
| exit => exit.remove() | |
| ); | |
| } | |
| // ─── LINE CHART ─── | |
| function renderLine() { | |
| const width = container.clientWidth || 800; | |
| const margin = { top: 16, right: 50, bottom: 48, left: 60 }; | |
| const height = Math.max(300, Math.round(width / 2.5)); | |
| svg.attr('width', width).attr('height', height); | |
| gRoot.attr('transform', `translate(${margin.left},${margin.top})`); | |
| const innerWidth = width - margin.left - margin.right; | |
| const innerHeight = height - margin.top - margin.bottom; | |
| // Build series | |
| const grouped = d3.group(allData, d => d[RUN_COL]); | |
| const series = []; | |
| for (const [raw, rows] of grouped) { | |
| const pts = rows.map(r => ({ step: +r[STEP_COL], value: +r[currentMetric] })).sort((a, b) => a.step - b.step); | |
| series.push({ name: displayName(raw), rawName: raw, values: pts }); | |
| } | |
| const allSteps = Array.from(new Set(allData.map(r => +r[STEP_COL]))).sort((a, b) => a - b); | |
| const allValues = series.flatMap(s => s.values.map(v => v.value)); | |
| const x = d3.scaleLinear().domain(d3.extent(allSteps)).range([0, innerWidth]); | |
| const yMin = d3.min(allValues), yMax = d3.max(allValues), yPad = (yMax - yMin) * 0.08; | |
| const y = d3.scaleLinear().domain([yMin - yPad, yMax + yPad]).range([innerHeight, 0]).nice(); | |
| // Grid | |
| gRoot.selectAll('.grid').data([0]).join('g').attr('class', 'grid').call(g => { | |
| g.selectAll('line').data(y.ticks(6)).join('line') | |
| .attr('x1', 0).attr('x2', innerWidth).attr('y1', d => y(d)).attr('y2', d => y(d)); | |
| }); | |
| // X axis | |
| gRoot.selectAll('.axis-x').data([0]).join('g').attr('class', 'axes axis-x') | |
| .attr('transform', `translate(0,${innerHeight})`) | |
| .call(d3.axisBottom(x).ticks(6).tickFormat(d => stepLabelShort(d)).tickSizeOuter(0)) | |
| .call(g => { | |
| g.selectAll('text').attr('fill', 'var(--tick-color)').style('font-size', '10px'); | |
| g.selectAll('path, line').attr('stroke', 'var(--axis-color)'); | |
| }); | |
| // Y axis | |
| gRoot.selectAll('.axis-y').data([0]).join('g').attr('class', 'axes axis-y') | |
| .call(d3.axisLeft(y).ticks(6).tickFormat(d3.format('.3f')).tickSizeOuter(0)) | |
| .call(g => { | |
| g.selectAll('text').attr('fill', 'var(--tick-color)').style('font-size', '11px'); | |
| g.selectAll('path, line').attr('stroke', 'var(--axis-color)'); | |
| }); | |
| // Axis labels | |
| gRoot.selectAll('.x-label').data([0]).join('text').attr('class', 'x-label') | |
| .attr('x', innerWidth / 2).attr('y', innerHeight + 38) | |
| .attr('text-anchor', 'middle').attr('fill', 'var(--text-color)').attr('font-size', 12) | |
| .text('Tokens (Steps)'); | |
| gRoot.selectAll('.y-label').data([0]).join('text').attr('class', 'y-label') | |
| .attr('transform', 'rotate(-90)').attr('x', -innerHeight / 2).attr('y', -44) | |
| .attr('text-anchor', 'middle').attr('fill', 'var(--text-color)').attr('font-size', 12) | |
| .text(metricName(currentMetric)); | |
| // Lines | |
| const line = d3.line().x(d => x(d.step)).y(d => y(d.value)).curve(d3.curveMonotoneX); | |
| gRoot.selectAll('.line-path').data(series, d => d.name).join( | |
| enter => enter.append('path').attr('class', d => 'line-path' + (isBaseline(d.rawName) ? ' baseline' : '')) | |
| .attr('stroke', d => colorMap[d.rawName] || 'var(--primary-color)') | |
| .attr('d', d => line(d.values)), | |
| update => update.attr('class', d => 'line-path' + (isBaseline(d.rawName) ? ' baseline' : '')) | |
| .transition().duration(300) | |
| .attr('stroke', d => colorMap[d.rawName] || 'var(--primary-color)') | |
| .attr('d', d => line(d.values)), | |
| exit => exit.remove() | |
| ); | |
| // Dots | |
| const dotData = series.flatMap(s => s.values.map(v => ({ name: s.name, rawName: s.rawName, step: v.step, value: v.value }))); | |
| gRoot.selectAll('.line-dot').data(dotData, d => d.name + '-' + d.step).join( | |
| enter => enter.append('circle').attr('class', d => 'line-dot' + (isBaseline(d.rawName) ? ' baseline' : '')) | |
| .attr('cx', d => x(d.step)).attr('cy', d => y(d.value)).attr('r', 3) | |
| .attr('fill', d => colorMap[d.rawName] || 'var(--primary-color)') | |
| .attr('stroke', 'var(--surface-bg)').attr('stroke-width', 1), | |
| update => update.attr('class', d => 'line-dot' + (isBaseline(d.rawName) ? ' baseline' : '')) | |
| .transition().duration(300) | |
| .attr('cx', d => x(d.step)).attr('cy', d => y(d.value)) | |
| .attr('fill', d => colorMap[d.rawName] || 'var(--primary-color)'), | |
| exit => exit.remove() | |
| ); | |
| // Hover overlay | |
| gRoot.selectAll('.hover-line').data([0]).join('line').attr('class', 'hover-line') | |
| .attr('y1', 0).attr('y2', innerHeight).style('display', 'none'); | |
| gRoot.selectAll('.hover-overlay').data([0]).join('rect').attr('class', 'hover-overlay') | |
| .attr('width', innerWidth).attr('height', innerHeight) | |
| .attr('fill', 'none').attr('pointer-events', 'all') | |
| .on('mousemove', (ev) => { | |
| const [mx] = d3.pointer(ev, gRoot.node()); | |
| const nearest = allSteps.reduce((best, s) => Math.abs(s - x.invert(mx)) < Math.abs(best - x.invert(mx)) ? s : best, allSteps[0]); | |
| gRoot.select('.hover-line').attr('x1', x(nearest)).attr('x2', x(nearest)).style('display', null); | |
| const entries = series.map(s => { | |
| const pt = s.values.find(v => v.step === nearest); | |
| return pt ? { name: s.name, rawName: s.rawName, value: pt.value } : null; | |
| }).filter(Boolean).sort((a, b) => b.value - a.value); | |
| let html = `<div style="font-weight:700;margin-bottom:4px;">${stepLabelLong(nearest)}</div>`; | |
| entries.forEach(e => { | |
| html += `<div><span class="tip-dot" style="background:${colorMap[e.rawName]}"></span>${e.name}: <strong>${e.value.toFixed(4)}</strong></div>`; | |
| }); | |
| const [cx, cy] = d3.pointer(ev, container); | |
| showTip(html, cx, cy); | |
| }) | |
| .on('mouseleave', () => { | |
| gRoot.select('.hover-line').style('display', 'none'); | |
| hideTip(); | |
| }); | |
| } | |
| // ─── RENDER ─── | |
| function render() { | |
| if (!allData.length) return; | |
| initColors(); | |
| gRoot.selectAll('*').remove(); | |
| defs.selectAll('*').remove(); | |
| if (currentView === 'bar') renderBar(); else renderLine(); | |
| } | |
| // ─── UI ─── | |
| function buildUI() { | |
| const controls = document.createElement('div'); controls.className = 'controls'; | |
| // Setup selector (only shown when setups config is present) | |
| if (SETUPS && setupNames.length > 0) { | |
| const setupGroup = document.createElement('div'); setupGroup.className = 'control-group'; | |
| const setupLabel = document.createElement('label'); setupLabel.setAttribute('for', 'setup-' + uid); setupLabel.textContent = 'Setup'; | |
| const setupSelect = document.createElement('select'); setupSelect.id = 'setup-' + uid; | |
| setupNames.forEach(name => { | |
| const opt = document.createElement('option'); opt.value = name; opt.textContent = name; | |
| if (name === currentSetup) opt.selected = true; | |
| setupSelect.appendChild(opt); | |
| }); | |
| // Add Average option | |
| if (setupNames.length >= 2) { | |
| const avgOpt = document.createElement('option'); avgOpt.value = AVG_SETUP_KEY; avgOpt.textContent = AVG_SETUP_KEY; | |
| setupSelect.appendChild(avgOpt); | |
| } | |
| setupSelect.addEventListener('change', () => { switchSetup(setupSelect.value); }); | |
| setupGroup.appendChild(setupLabel); setupGroup.appendChild(setupSelect); | |
| controls.appendChild(setupGroup); | |
| } | |
| // View toggle | |
| const viewGroup = document.createElement('div'); viewGroup.className = 'control-group'; | |
| const viewLabel = document.createElement('label'); viewLabel.setAttribute('for', 'view-' + uid); viewLabel.textContent = 'View'; | |
| const viewSelect = document.createElement('select'); viewSelect.id = 'view-' + uid; | |
| [['bar', 'Final Score'], ['line', 'Training Progression']].forEach(([val, text]) => { | |
| const opt = document.createElement('option'); opt.value = val; opt.textContent = text; | |
| if (val === currentView) opt.selected = true; | |
| viewSelect.appendChild(opt); | |
| }); | |
| viewSelect.addEventListener('change', () => { currentView = viewSelect.value; render(); }); | |
| viewGroup.appendChild(viewLabel); viewGroup.appendChild(viewSelect); | |
| controls.appendChild(viewGroup); | |
| // Metric select (populated after data load) | |
| const metricGroup = document.createElement('div'); metricGroup.className = 'control-group'; | |
| const metricLabel = document.createElement('label'); metricLabel.setAttribute('for', 'metric-' + uid); metricLabel.textContent = 'Metric'; | |
| const metricSelect = document.createElement('select'); metricSelect.id = 'metric-' + uid; | |
| metricGroup.appendChild(metricLabel); metricGroup.appendChild(metricSelect); | |
| controls.appendChild(metricGroup); | |
| container.appendChild(controls); | |
| // Legend | |
| const legend = document.createElement('div'); legend.className = 'legend'; | |
| legend.innerHTML = '<div class="legend-title">Legend</div><div class="items"></div>'; | |
| container.appendChild(legend); | |
| } | |
| function populateMetricSelect() { | |
| const sel = container.querySelector('#metric-' + uid); | |
| if (!sel) return; | |
| sel.innerHTML = ''; | |
| const aggGroup = document.createElement('optgroup'); aggGroup.label = 'Aggregate Scores'; | |
| const indGroup = document.createElement('optgroup'); indGroup.label = 'Individual Benchmarks'; | |
| metricKeys.forEach(key => { | |
| const opt = document.createElement('option'); opt.value = key; opt.textContent = metricName(key); | |
| if (key === currentMetric) opt.selected = true; | |
| if (key.startsWith('agg_score')) aggGroup.appendChild(opt); else indGroup.appendChild(opt); | |
| }); | |
| if (aggGroup.children.length) sel.appendChild(aggGroup); | |
| if (indGroup.children.length) sel.appendChild(indGroup); | |
| sel.addEventListener('change', () => { currentMetric = sel.value; render(); }); | |
| } | |
| function buildLegend() { | |
| const items = container.querySelector('.legend .items'); | |
| if (!items) return; | |
| items.innerHTML = ''; | |
| // Sort by final score (max step) on current default metric, descending | |
| const grouped = d3.group(allData, d => d[RUN_COL]); | |
| const sorted = Array.from(grouped.entries()) | |
| .map(([raw, rows]) => { | |
| const maxStep = d3.max(rows, r => +r[STEP_COL]); | |
| const row = rows.find(r => +r[STEP_COL] === maxStep); | |
| return { raw, score: row ? +row[defaultMetric] : 0 }; | |
| }) | |
| .sort((a, b) => b.score - a.score) | |
| .map(d => d.raw); | |
| sorted.forEach(raw => { | |
| const name = displayName(raw); | |
| const el = document.createElement('span'); el.className = 'item'; el.setAttribute('data-name', name); | |
| const sw = document.createElement('span'); sw.className = 'swatch'; | |
| const swColor = colorMap[raw] || '#999'; | |
| sw.style.background = swColor; | |
| if (isBaseline(raw)) sw.style.backgroundImage = 'repeating-linear-gradient(45deg, transparent, transparent 2px, rgba(255,255,255,0.4) 2px, rgba(255,255,255,0.4) 4px)'; | |
| const txt = document.createElement('span'); txt.textContent = name; | |
| el.appendChild(sw); el.appendChild(txt); items.appendChild(el); | |
| el.addEventListener('mouseenter', () => { highlight = name; updateHighlight(); }); | |
| el.addEventListener('mouseleave', () => { highlight = null; updateHighlight(); }); | |
| }); | |
| } | |
| buildUI(); | |
| // ─── DATA LOADING ─── | |
| const fetchFirstAvailable = async (paths) => { | |
| for (const p of paths) { | |
| try { const r = await fetch(p, { cache: 'no-cache' }); if (r.ok) return await r.text(); } catch (_) {} | |
| } | |
| throw new Error('CSV not found'); | |
| }; | |
| let dataMountEl = container; | |
| while (dataMountEl && !dataMountEl.getAttribute?.('data-datafiles')) { dataMountEl = dataMountEl.parentElement; } | |
| let providedData = null; | |
| try { | |
| const attr = dataMountEl && dataMountEl.getAttribute ? dataMountEl.getAttribute('data-datafiles') : null; | |
| if (attr && attr.trim()) providedData = attr.trim().startsWith('[') ? JSON.parse(attr) : attr.trim(); | |
| } catch (_) {} | |
| const ensurePrefix = (p) => (typeof p === 'string' && p && !p.includes('/')) ? `/data/${p}` : p; | |
| const csvPaths = providedData | |
| ? (Array.isArray(providedData) ? providedData.map(ensurePrefix) : [ensurePrefix(providedData)]) | |
| : ['/data/benchmark-results.csv']; | |
| (async () => { | |
| try { | |
| const text = await fetchFirstAvailable(csvPaths); | |
| const parsed = d3.csvParse(text); | |
| parsedData = parsed; | |
| // Compute average data for setup mode | |
| if (SETUPS && setupNames.length >= 2) { | |
| const avg = computeAverageData(parsed); | |
| avgDatasetNames = avg.datasetNames; | |
| parsedData = parsed.concat(avg.data); | |
| parsedData.columns = parsed.columns; | |
| } | |
| // Filter to only datasets with configured display names | |
| filterData(); | |
| metricKeys = detectMetrics(allData.columns); | |
| // Ensure defaultMetric is valid; fall back to first available | |
| if (!metricKeys.includes(currentMetric)) currentMetric = metricKeys[0]; | |
| populateMetricSelect(); | |
| render(); | |
| buildLegend(); | |
| if (window.ResizeObserver) { new ResizeObserver(() => render()).observe(container); } | |
| else { window.addEventListener('resize', () => render()); } | |
| } catch (e) { | |
| const pre = document.createElement('pre'); | |
| pre.textContent = 'Data load error: ' + (e && e.message ? e.message : e); | |
| pre.style.color = 'var(--danger, #b00020)'; | |
| pre.style.fontSize = '12px'; | |
| container.appendChild(pre); | |
| } | |
| })(); | |
| }; | |
| if (document.readyState === 'loading') { document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true }); } | |
| else { ensureD3(bootstrap); } | |
| })(); | |
| </script> | |