| <div class="d3-benchmark"></div> |
| <style> |
| .d3-benchmark { position: relative; } |
| .d3-benchmark .controls { |
| display: flex; |
| align-items: center; |
| gap: 12px; |
| margin-bottom: 10px; |
| } |
| .d3-benchmark .controls label { |
| font-size: 12px; |
| color: var(--muted-color); |
| } |
| .d3-benchmark .controls select { |
| appearance: none; |
| -webkit-appearance: none; |
| -moz-appearance: none; |
| border: 1px solid var(--border-color); |
| border-radius: 8px; |
| padding: 6px 28px 6px 10px; |
| background-color: var(--surface-bg); |
| color: var(--text-color); |
| font-size: 13px; |
| line-height: 1.2; |
| background-image: url("data:image/svg+xml,%3Csvg width='12' height='8' viewBox='0 0 12 8' xmlns='http://www.w3.org/2000/svg'%3E%3Cpath d='M1.41 1.59L6 6.17l4.59-4.58L12 3 6 9 0 3z' fill='%23999'/%3E%3C/svg%3E"); |
| background-repeat: no-repeat; |
| background-position: right 8px center; |
| } |
| .d3-benchmark .controls select:focus-visible { |
| outline: 2px solid var(--primary-color); |
| outline-offset: 2px; |
| } |
| .d3-benchmark .legend { |
| display: flex; |
| flex-direction: column; |
| align-items: flex-start; |
| gap: 6px; |
| margin: 8px 0 0 0; |
| } |
| .d3-benchmark .legend .legend-title { |
| font-size: 12px; |
| font-weight: 700; |
| color: var(--text-color); |
| } |
| .d3-benchmark .legend .items { |
| display: flex; |
| flex-wrap: wrap; |
| gap: 8px 14px; |
| } |
| .d3-benchmark .legend .item { |
| display: inline-flex; |
| align-items: center; |
| gap: 8px; |
| font-size: 12px; |
| color: var(--muted-color); |
| cursor: pointer; |
| } |
| .d3-benchmark .legend .swatch { |
| width: 14px; |
| height: 14px; |
| border-radius: 3px; |
| border: 1px solid var(--border-color); |
| } |
| .d3-benchmark .ghost { opacity: .25; } |
| .d3-benchmark .d3-tooltip { |
| position: absolute; |
| top: 0px; |
| left: 0px; |
| transform: translate(-9999px, -9999px); |
| pointer-events: none; |
| padding: 8px 10px; |
| border-radius: 8px; |
| font-size: 12px; |
| line-height: 1.35; |
| border: 1px solid var(--border-color); |
| background: var(--surface-bg); |
| color: var(--text-color); |
| box-shadow: 0 4px 24px rgba(0,0,0,.18); |
| opacity: 0; |
| transition: opacity .12s ease; |
| text-align: left; |
| } |
| .d3-benchmark .chart-card { |
| background: var(--surface-bg); |
| border: 1px solid var(--border-color); |
| border-radius: 10px; |
| padding: 8px; |
| } |
| </style> |
| <script> |
| (() => { |
| const ensureD3 = (cb) => { |
| if (window.d3 && typeof window.d3.select === 'function') return cb(); |
| let s = document.getElementById('d3-cdn-script'); |
| if (!s) { |
| s = document.createElement('script'); |
| s.id = 'd3-cdn-script'; |
| s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js'; |
| document.head.appendChild(s); |
| } |
| const onReady = () => { if (window.d3 && typeof window.d3.select === 'function') cb(); }; |
| s.addEventListener('load', onReady, { once: true }); |
| if (window.d3) onReady(); |
| }; |
| |
| const bootstrap = () => { |
| const scriptEl = document.currentScript; |
| let container = scriptEl ? scriptEl.previousElementSibling : null; |
| if (!(container && container.classList && container.classList.contains('d3-benchmark'))){ |
| const cs = Array.from(document.querySelectorAll('.d3-benchmark')).filter(el => !(el.dataset && el.dataset.mounted==='true')); |
| container = cs[cs.length-1] || null; |
| } |
| if (!container) return; |
| if (container.dataset) { if (container.dataset.mounted==='true') return; container.dataset.mounted='true'; } |
| |
| container.style.position = container.style.position || 'relative'; |
| let tip = container.querySelector('.d3-tooltip'); let tipInner; |
| if (!tip) { |
| tip = document.createElement('div'); tip.className = 'd3-tooltip'; |
| tipInner = document.createElement('div'); tipInner.className = 'd3-tooltip__inner'; tip.appendChild(tipInner); |
| container.appendChild(tip); |
| } else { tipInner = tip.querySelector('.d3-tooltip__inner') || tip; } |
| |
| |
| const header = document.createElement('div'); header.className = 'chart-header'; |
| |
| const makeLegend = (series, colorBySeries) => { |
| let legend = header.querySelector('.legend'); |
| if (!legend) { legend = document.createElement('div'); legend.className = 'legend'; header.appendChild(legend); } |
| |
| let title = legend.querySelector('.legend-title'); |
| if (!title) { title = document.createElement('div'); title.className = 'legend-title'; title.textContent = 'Legend'; legend.appendChild(title); } |
| |
| let items = legend.querySelector('.items'); |
| if (!items) { items = document.createElement('div'); items.className = 'items'; legend.appendChild(items); } |
| items.innerHTML = ''; |
| series.forEach(name => { |
| const item = document.createElement('div'); item.className = 'item'; |
| const sw = document.createElement('span'); sw.className = 'swatch'; sw.style.background = colorBySeries(name); |
| const txt = document.createElement('span'); txt.textContent = name; |
| item.appendChild(sw); item.appendChild(txt); items.appendChild(item); |
| item.addEventListener('mouseenter', () => { state.highlightModel = name; updateHighlight(); }); |
| item.addEventListener('mouseleave', () => { state.highlightModel = null; updateHighlight(); }); |
| }); |
| }; |
| |
| |
| const card = document.createElement('div'); card.className = 'chart-card'; container.appendChild(card); |
| container.appendChild(header); |
| const svg = d3.select(card).append('svg').attr('width','100%').style('display','block'); |
| const gRoot = svg.append('g'); |
| |
| |
| |
| |
| const fetchFirstAvailable = async (paths) => { |
| for (const p of paths) { |
| try { |
| const res = await fetch(p, { cache:'no-cache' }); |
| if (!res.ok) throw new Error('HTTP '+res.status); |
| const text = await res.text(); |
| |
| try { return JSON.parse(text); } catch(e) {} |
| if (window.d3 && d3.csvParse) { return d3.csvParse(text); } |
| } catch (e) { } |
| } |
| return null; |
| }; |
| |
| |
| const inlineData = [ |
| { benchmark:'MMLU', model:'GPT-4o', score: 88 }, |
| { benchmark:'MMLU', model:'Llama 3 70B', score: 80 }, |
| { benchmark:'MMLU', model:'Mixtral 8x7B',score: 73 }, |
| { benchmark:'MMLU', model:'Gemma 2 27B', score: 76 }, |
| { benchmark:'GSM8K', model:'GPT-4o', score: 94 }, |
| { benchmark:'GSM8K', model:'Llama 3 70B', score: 83 }, |
| { benchmark:'GSM8K', model:'Mixtral 8x7B',score: 79 }, |
| { benchmark:'GSM8K', model:'Gemma 2 27B', score: 81 }, |
| { benchmark:'HellaSwag', model:'GPT-4o', score: 95 }, |
| { benchmark:'HellaSwag', model:'Llama 3 70B', score: 89 }, |
| { benchmark:'HellaSwag', model:'Mixtral 8x7B',score: 86 }, |
| { benchmark:'HellaSwag', model:'Gemma 2 27B', score: 87 }, |
| { benchmark:'TruthfulQA', model:'GPT-4o', score: 64 }, |
| { benchmark:'TruthfulQA', model:'Llama 3 70B', score: 56 }, |
| { benchmark:'TruthfulQA', model:'Mixtral 8x7B',score: 51 }, |
| { benchmark:'TruthfulQA', model:'Gemma 2 27B', score: 53 }, |
| { benchmark:'ARC-C', model:'GPT-4o', score: 79 }, |
| { benchmark:'ARC-C', model:'Llama 3 70B', score: 72 }, |
| { benchmark:'ARC-C', model:'Mixtral 8x7B',score: 68 }, |
| { benchmark:'ARC-C', model:'Gemma 2 27B', score: 70 } |
| ]; |
| |
| const state = { |
| data: inlineData, |
| colorsByModel: null, |
| highlightModel: null, |
| }; |
| |
| const margin = { top: 12, right: 28, bottom: 24, left: 56 }; |
| let width = 800, height = 360; |
| const x0 = d3.scaleBand().paddingInner(0.2).paddingOuter(0.05); |
| const x1 = d3.scaleBand().padding(0.12); |
| const y = d3.scaleLinear(); |
| const xAxis = d3.axisBottom(x0).tickSizeOuter(0); |
| const yAxis = d3.axisLeft(y).ticks(6).tickSizeOuter(0); |
| const yTopPadding = 2; |
| |
| function getPrimaryColor(){ |
| try { if (window.ColorPalettes && typeof window.ColorPalettes.getPrimary === 'function') return window.ColorPalettes.getPrimary(); } catch(e) {} |
| return getComputedStyle(document.documentElement).getPropertyValue('--primary-color') || '#6D4AFF'; |
| } |
| function getCategoricalColors(n){ |
| try { if (window.ColorPalettes && typeof window.ColorPalettes.getColors === 'function') return window.ColorPalettes.getColors('categorical', n); } catch(e) {} |
| |
| const base = getPrimaryColor(); |
| const colors = []; |
| for (let i=0;i<n;i++) { |
| const hue = Math.round((360/n)*i); |
| colors.push(`hsl(${hue}, 60%, 55%)`); |
| } |
| return colors; |
| } |
| |
| function computeSeriesColors(models){ |
| const palette = getCategoricalColors(models.length); |
| const map = new Map(models.map((m, i) => [m, palette[i % palette.length]])); |
| return (model) => map.get(model) || getPrimaryColor(); |
| } |
| |
| function getModels(data){ |
| return Array.from(new Set(data.map(d => d.model))); |
| } |
| function getBenchmarks(data){ |
| return Array.from(new Set(data.map(d => d.benchmark))); |
| } |
| |
| function updateSize(){ |
| width = container.clientWidth || 800; |
| height = Math.max(240, Math.round(width / 3.4)); |
| svg.attr('width', width).attr('height', height); |
| gRoot.attr('transform', `translate(${margin.left},${margin.top})`); |
| return { innerWidth: width - margin.left - margin.right, innerHeight: height - margin.top - margin.bottom }; |
| } |
| |
| function showTip(html, x, y){ |
| tip.style.transform = `translate(${x + 12}px, ${y + 12}px)`; |
| tip.style.opacity = '1'; |
| const inner = tip.querySelector('.d3-tooltip__inner') || tip; |
| inner.innerHTML = html; |
| } |
| function hideTip(){ |
| tip.style.opacity = '0'; |
| tip.style.transform = 'translate(-9999px, -9999px)'; |
| } |
| |
| function updateHighlight(){ |
| const model = state.highlightModel; |
| const bars = gRoot.selectAll('rect.bar'); |
| const labels = gRoot.selectAll('text.value'); |
| if (model) { |
| bars.classed('ghost', d => d.model !== model); |
| labels.classed('ghost', d => d.model !== model); |
| const items = container.querySelectorAll('.legend .item'); |
| items.forEach((el) => { |
| const name = el.textContent.trim(); |
| if (name !== model) el.classList.add('ghost'); else el.classList.remove('ghost'); |
| }); |
| } else { |
| bars.classed('ghost', false); |
| labels.classed('ghost', false); |
| container.querySelectorAll('.legend .item').forEach(el => el.classList.remove('ghost')); |
| } |
| } |
| |
| function render(){ |
| const { innerWidth, innerHeight } = updateSize(); |
| const models = getModels(state.data); |
| if (!state.colorsByModel) state.colorsByModel = computeSeriesColors(models); |
| makeLegend(models, state.colorsByModel); |
| |
| x0.domain(getBenchmarks(state.data)).range([0, innerWidth]); |
| x1.domain(models).range([0, x0.bandwidth()]); |
| |
| const yMaxRaw = 100; |
| const yMax = yMaxRaw + yTopPadding; |
| y.domain([0, yMax]).range([innerHeight, 0]).nice(); |
| |
| |
| gRoot |
| .selectAll('.axis-x') |
| .data([0]) |
| .join('g') |
| .attr('class','axis-x') |
| .attr('transform',`translate(0,${innerHeight})`) |
| .call(xAxis) |
| .call(g => { |
| g.selectAll('path, line').attr('stroke', 'var(--axis-color)'); |
| g.selectAll('text').attr('fill', 'var(--tick-color)').style('font-size','12px'); |
| }); |
| gRoot |
| .selectAll('.axis-y') |
| .data([0]) |
| .join('g') |
| .attr('class','axis-y') |
| .call(yAxis) |
| .call(g => { |
| g.selectAll('path, line').attr('stroke', 'var(--axis-color)'); |
| g.selectAll('text').attr('fill', 'var(--tick-color)').style('font-size','12px'); |
| }); |
| |
| |
| gRoot |
| .selectAll('.grid-y') |
| .data([0]) |
| .join('g') |
| .attr('class','grid-y') |
| .call(d3.axisLeft(y).ticks(6).tickSize(-innerWidth).tickFormat('')) |
| .call(g => g.select('.domain').remove()) |
| .call(g => g.selectAll('.tick line').attr('stroke','var(--grid-color)').attr('stroke-opacity',1)) |
| .call(g => g.selectAll('.tick').filter((d, i, nodes) => i === nodes.length - 1).select('line').attr('stroke-opacity', 0)); |
| |
| |
| const groups = gRoot.selectAll('.group').data(getBenchmarks(state.data), d => d); |
| const groupsEnter = groups.enter().append('g').attr('class','group'); |
| groupsEnter.merge(groups).attr('transform', d => `translate(${x0(d)},0)`); |
| groups.exit().remove(); |
| |
| |
| const nested = d3.group(state.data, d => d.benchmark); |
| groupsEnter.each(function(bench){ d3.select(this).selectAll('rect.bar').data([]).join('rect'); }); |
| const allGroups = gRoot.selectAll('.group'); |
| allGroups.each(function(bench){ |
| const dataForBench = nested.get(bench) || []; |
| const bars = d3.select(this).selectAll('rect.bar').data(models.map(m => ({ bench, model:m, score:(dataForBench.find(dd=>dd.model===m)||{score:0}).score })) , d => d.model); |
| bars.join( |
| enter => enter.append('rect') |
| .attr('class','bar') |
| .attr('x', d => x1(d.model)) |
| .attr('y', innerHeight) |
| .attr('width', x1.bandwidth()) |
| .attr('height', 0) |
| .attr('fill', d => state.colorsByModel(d.model)) |
| .on('mouseenter', (event, d) => { state.highlightModel = d.model; updateHighlight(); }) |
| .on('mousemove', (event, d) => { |
| const [mx, my] = d3.pointer(event, container); |
| showTip(`<strong>${d.model}</strong><br/>${d.bench}: <strong>${d.score}</strong>`, mx, my); |
| }) |
| .on('mouseleave', () => { hideTip(); state.highlightModel = null; updateHighlight(); }) |
| .transition().duration(160) |
| .attr('y', d => y(d.score)) |
| .attr('height', d => Math.max(0, innerHeight - y(d.score))), |
| update => update |
| .on('mouseenter', (event, d) => { state.highlightModel = d.model; updateHighlight(); }) |
| .on('mousemove', (event, d) => { |
| const [mx, my] = d3.pointer(event, container); |
| showTip(`<strong>${d.model}</strong><br/>${d.bench}: <strong>${d.score}</strong>`, mx, my); |
| }) |
| .on('mouseleave', () => { hideTip(); state.highlightModel = null; updateHighlight(); }) |
| .transition().duration(160) |
| .attr('x', d => x1(d.model)) |
| .attr('y', d => y(d.score)) |
| .attr('width', x1.bandwidth()) |
| .attr('height', d => Math.max(0, innerHeight - y(d.score))) |
| .attr('fill', d => state.colorsByModel(d.model)), |
| exit => exit.transition().duration(120).attr('y', innerHeight).attr('height', 0).remove() |
| ); |
| |
| |
| const labels = d3.select(this).selectAll('text.value').data(models.map(m => ({ bench, model:m, score:(dataForBench.find(dd=>dd.model===m)||{score:0}).score })) , d => d.model); |
| labels.join( |
| enter => enter.append('text') |
| .attr('class','value') |
| .attr('x', d => x1(d.model) + x1.bandwidth()/2) |
| .attr('y', d => y(d.score) - 4) |
| .attr('text-anchor','middle') |
| .attr('fill','var(--text-color)') |
| .attr('opacity',0.9) |
| .attr('font-size',10) |
| .text(d => d.score), |
| update => update |
| .transition().duration(160) |
| .attr('x', d => x1(d.model) + x1.bandwidth()/2) |
| .attr('y', d => y(d.score) - 4) |
| .text(d => d.score), |
| exit => exit.remove() |
| ); |
| }); |
| |
| |
| gRoot.selectAll('.y-label').data([0]).join('text').attr('class','y-label') |
| .attr('transform', `rotate(-90)`) |
| .attr('x', -innerHeight / 2) |
| .attr('y', -margin.left + 24) |
| .attr('text-anchor','middle') |
| .attr('fill','var(--text-color)') |
| .attr('font-size',12) |
| .attr('font-weight',700) |
| .text('score'); |
| } |
| |
| |
| render(); |
| const rerender = () => render(); |
| if (window.ResizeObserver) { const ro = new ResizeObserver(() => rerender()); ro.observe(container); } |
| else { window.addEventListener('resize', rerender); } |
| |
| |
| (async () => { |
| const maybe = await fetchFirstAvailable([ |
| '/data/llm_benchmarks.json', |
| './assets/data/llm_benchmarks.json', |
| '../assets/data/llm_benchmarks.json' |
| ]); |
| if (Array.isArray(maybe) && maybe.length && maybe[0].benchmark && maybe[0].model && (typeof maybe[0].score === 'number')) { |
| state.data = maybe; |
| state.colorsByModel = null; |
| render(); |
| } else if (maybe && maybe.columns) { |
| |
| const parsed = maybe.map(r => ({ benchmark: r.benchmark, model: r.model, score: +r.score })); |
| if (parsed.length) { state.data = parsed; state.colorsByModel = null; render(); } |
| } |
| })().catch(() => { |
| |
| }); |
| }; |
| |
| if (document.readyState === 'loading') { document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true }); } |
| else { ensureD3(bootstrap); } |
| })(); |
| </script> |
|
|
|
|
|
|