Spaces:
Running
Running
| <div class="d3-overall-performance"></div> | |
| <style> | |
| .d3-overall-performance { | |
| width: 100%; | |
| margin: 10px 0; | |
| position: relative; | |
| font-family: system-ui, -apple-system, sans-serif; | |
| } | |
| .d3-overall-performance svg { | |
| display: block; | |
| width: 100%; | |
| height: auto; | |
| } | |
| .d3-overall-performance .axes path, | |
| .d3-overall-performance .axes line { | |
| stroke: var(--axis-color, var(--text-color)); | |
| } | |
| .d3-overall-performance .axes text { | |
| fill: var(--tick-color, var(--muted-color)); | |
| font-size: 11px; | |
| } | |
| .d3-overall-performance .grid line { | |
| stroke: var(--grid-color, rgba(0,0,0,.08)); | |
| } | |
| .d3-overall-performance .axes text.axis-label { | |
| font-size: 15px; | |
| font-weight: 500; | |
| fill: var(--text-color); | |
| } | |
| .d3-overall-performance .x-axis text { | |
| transform: translateY(4px); | |
| } | |
| .d3-overall-performance .point { | |
| cursor: pointer; | |
| transition: opacity 0.15s ease; | |
| } | |
| .d3-overall-performance .point:hover { | |
| opacity: 0.8; | |
| } | |
| .d3-overall-performance .point-label { | |
| font-size: 11px; | |
| fill: var(--text-color); | |
| pointer-events: none; | |
| } | |
| .d3-overall-performance .d3-tooltip { | |
| position: absolute; | |
| top: 0; | |
| left: 0; | |
| transform: translate(-9999px, -9999px); | |
| pointer-events: none; | |
| padding: 10px 12px; | |
| border-radius: 8px; | |
| font-size: 12px; | |
| line-height: 1.4; | |
| border: 1px solid var(--border-color); | |
| background: var(--surface-bg); | |
| color: var(--text-color); | |
| box-shadow: 0 4px 24px rgba(0,0,0,.18); | |
| opacity: 0; | |
| transition: opacity 0.12s ease; | |
| z-index: 10; | |
| } | |
| .d3-overall-performance .d3-tooltip .model-name { | |
| font-weight: 600; | |
| margin-bottom: 4px; | |
| } | |
| .d3-overall-performance .d3-tooltip .metric { | |
| display: flex; | |
| justify-content: space-between; | |
| gap: 16px; | |
| } | |
| .d3-overall-performance .d3-tooltip .metric-label { | |
| color: var(--muted-color); | |
| } | |
| .d3-overall-performance .d3-tooltip .metric-value { | |
| font-weight: 500; | |
| } | |
| </style> | |
| <script> | |
| (() => { | |
| const ensureD3 = (cb) => { | |
| if (window.d3 && typeof window.d3.select === 'function') return cb(); | |
| let s = document.getElementById('d3-cdn-script'); | |
| if (!s) { | |
| s = document.createElement('script'); | |
| s.id = 'd3-cdn-script'; | |
| s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js'; | |
| document.head.appendChild(s); | |
| } | |
| const onReady = () => { if (window.d3 && typeof window.d3.select === 'function') cb(); }; | |
| s.addEventListener('load', onReady, { once: true }); | |
| if (window.d3) onReady(); | |
| }; | |
| const bootstrap = () => { | |
| const scriptEl = document.currentScript; | |
| let container = scriptEl ? scriptEl.previousElementSibling : null; | |
| if (!(container && container.classList && container.classList.contains('d3-overall-performance'))) { | |
| const candidates = Array.from(document.querySelectorAll('.d3-overall-performance')) | |
| .filter((el) => !(el.dataset && el.dataset.mounted === 'true')); | |
| container = candidates[candidates.length - 1] || null; | |
| } | |
| if (!container) return; | |
| if (container.dataset) { | |
| if (container.dataset.mounted === 'true') return; | |
| container.dataset.mounted = 'true'; | |
| } | |
| // Tooltip setup | |
| container.style.position = container.style.position || 'relative'; | |
| const tip = document.createElement('div'); | |
| tip.className = 'd3-tooltip'; | |
| container.appendChild(tip); | |
| // SVG setup | |
| const svg = d3.select(container).append('svg'); | |
| const gRoot = svg.append('g'); | |
| // Chart groups | |
| const gGrid = gRoot.append('g').attr('class', 'grid'); | |
| const gAxes = gRoot.append('g').attr('class', 'axes'); | |
| const gPoints = gRoot.append('g').attr('class', 'points'); | |
| const gLabels = gRoot.append('g').attr('class', 'labels'); | |
| // State | |
| let data = null; | |
| let width = 800; | |
| let height = 450; | |
| const margin = { top: 20, right: 120, bottom: 56, left: 72 }; | |
| // Scales | |
| const xScale = d3.scaleLinear(); | |
| const yScale = d3.scaleLinear(); | |
| // Data loading | |
| const JSON_PATHS = [ | |
| '/data/overall_performance.json', | |
| './assets/figures/overall_performance.json', | |
| '../assets/figures/overall_performance.json', | |
| '../../assets/figures/overall_performance.json' | |
| ]; | |
| const fetchFirstAvailable = async (paths) => { | |
| for (const p of paths) { | |
| try { | |
| const r = await fetch(p, { cache: 'no-cache' }); | |
| if (r.ok) return await r.json(); | |
| } catch (_) {} | |
| } | |
| throw new Error('Data not found'); | |
| }; | |
| function updateSize() { | |
| width = container.clientWidth || 800; | |
| height = Math.max(300, Math.round(width / 1.3)); | |
| svg.attr('width', width).attr('height', height).attr('viewBox', `0 0 ${width} ${height}`); | |
| gRoot.attr('transform', `translate(${margin.left},${margin.top})`); | |
| return { | |
| innerWidth: width - margin.left - margin.right, | |
| innerHeight: height - margin.top - margin.bottom | |
| }; | |
| } | |
| function showTooltip(event, d) { | |
| const rect = container.getBoundingClientRect(); | |
| const x = event.clientX - rect.left; | |
| const y = event.clientY - rect.top; | |
| tip.innerHTML = ` | |
| <div class="model-name" style="color: ${d.color}">${d.name}</div> | |
| <div class="metric"> | |
| <span class="metric-label">Score:</span> | |
| <span class="metric-value">${d.avg_floored_score.toFixed(2)}</span> | |
| </div> | |
| <div class="metric"> | |
| <span class="metric-label">Tokens/Turn:</span> | |
| <span class="metric-value">${Math.round(d.avg_output_tokens_per_turn).toLocaleString()}</span> | |
| </div> | |
| <div class="metric"> | |
| <span class="metric-label">Type:</span> | |
| <span class="metric-value">${d.is_open ? 'Open' : 'Closed'}</span> | |
| </div> | |
| `; | |
| const tipWidth = tip.offsetWidth || 150; | |
| const tipHeight = tip.offsetHeight || 80; | |
| let tipX = x + 12; | |
| let tipY = y - tipHeight / 2; | |
| if (tipX + tipWidth > width) tipX = x - tipWidth - 12; | |
| if (tipY < 0) tipY = 8; | |
| if (tipY + tipHeight > height) tipY = height - tipHeight - 8; | |
| tip.style.transform = `translate(${tipX}px, ${tipY}px)`; | |
| tip.style.opacity = '1'; | |
| } | |
| function hideTooltip() { | |
| tip.style.opacity = '0'; | |
| tip.style.transform = 'translate(-9999px, -9999px)'; | |
| } | |
| function render() { | |
| if (!data) return; | |
| const { innerWidth, innerHeight } = updateSize(); | |
| const models = data.models; | |
| // Update scales but with a min of 0 for x since tokens can't be negative | |
| const xExtent = d3.extent(models, d => d.avg_output_tokens_per_turn); | |
| const yExtent = d3.extent(models, d => d.avg_floored_score); | |
| const xPadding = (xExtent[1] - xExtent[0]) * 0.1; | |
| const yPadding = (yExtent[1] - yExtent[0]) * 0.1; | |
| const xMax = 10000; | |
| xScale | |
| .domain([Math.max(0, xExtent[0] - xPadding), xMax]) | |
| .range([0, innerWidth]) | |
| .nice(); | |
| yScale | |
| .domain([yExtent[0] - yPadding, yExtent[1] + yPadding]) | |
| .range([innerHeight, 0]) | |
| .nice(); | |
| // Grid lines | |
| const xTicks = xScale.ticks(6); | |
| const yTicks = yScale.ticks(6); | |
| gGrid.selectAll('.grid-x') | |
| .data(xTicks) | |
| .join('line') | |
| .attr('class', 'grid-x') | |
| .attr('x1', d => xScale(d)) | |
| .attr('x2', d => xScale(d)) | |
| .attr('y1', 0) | |
| .attr('y2', innerHeight); | |
| gGrid.selectAll('.grid-y') | |
| .data(yTicks) | |
| .join('line') | |
| .attr('class', 'grid-y') | |
| .attr('x1', 0) | |
| .attr('x2', innerWidth) | |
| .attr('y1', d => yScale(d)) | |
| .attr('y2', d => yScale(d)); | |
| // Axes with inner ticks | |
| const tickSize = 6; | |
| gAxes.selectAll('.x-axis') | |
| .data([0]) | |
| .join('g') | |
| .attr('class', 'x-axis') | |
| .attr('transform', `translate(0,${innerHeight})`) | |
| .call(d3.axisBottom(xScale).ticks(6).tickFormat(d => d.toLocaleString()).tickSizeInner(-tickSize).tickSizeOuter(0)); | |
| gAxes.selectAll('.y-axis') | |
| .data([0]) | |
| .join('g') | |
| .attr('class', 'y-axis') | |
| .call(d3.axisLeft(yScale).ticks(6).tickSizeInner(-tickSize).tickSizeOuter(0)); | |
| // Axis labels | |
| gAxes.selectAll('.x-label') | |
| .data([0]) | |
| .join('text') | |
| .attr('class', 'x-label axis-label') | |
| .attr('x', innerWidth / 2) | |
| .attr('y', innerHeight + 44) | |
| .attr('text-anchor', 'middle') | |
| .text('Average Output Tokens per Turn'); | |
| gAxes.selectAll('.y-label') | |
| .data([0]) | |
| .join('text') | |
| .attr('class', 'y-label axis-label') | |
| .attr('x', -innerHeight / 2) | |
| .attr('y', -52) | |
| .attr('text-anchor', 'middle') | |
| .attr('transform', 'rotate(-90)') | |
| .text('Average Score'); | |
| // Points - circles for closed models, stars for open models | |
| const pointRadius = Math.max(8, Math.min(16, innerWidth / 60)); | |
| // Helper function to create a 5-point star path | |
| const starPath = (cx, cy, outerR, innerR) => { | |
| const points = []; | |
| for (let i = 0; i < 10; i++) { | |
| const r = i % 2 === 0 ? outerR : innerR; | |
| const angle = (Math.PI / 2) + (i * Math.PI / 5); | |
| points.push([cx + r * Math.cos(angle), cy - r * Math.sin(angle)]); | |
| } | |
| return 'M' + points.map(p => p.join(',')).join('L') + 'Z'; | |
| }; | |
| // Closed models as circles | |
| const closedModels = models.filter(d => !d.is_open); | |
| gPoints.selectAll('.point-circle') | |
| .data(closedModels, d => d.name) | |
| .join('circle') | |
| .attr('class', 'point point-circle') | |
| .attr('cx', d => xScale(d.avg_output_tokens_per_turn)) | |
| .attr('cy', d => yScale(d.avg_floored_score)) | |
| .attr('r', pointRadius) | |
| .attr('fill', d => d.color) | |
| .attr('stroke', 'none') | |
| .on('mouseenter', showTooltip) | |
| .on('mousemove', showTooltip) | |
| .on('mouseleave', hideTooltip); | |
| // Open models as stars | |
| const openModels = models.filter(d => d.is_open); | |
| gPoints.selectAll('.point-star') | |
| .data(openModels, d => d.name) | |
| .join('path') | |
| .attr('class', 'point point-star') | |
| .attr('d', d => starPath(xScale(d.avg_output_tokens_per_turn), yScale(d.avg_floored_score), pointRadius * 1.2, pointRadius * 0.5)) | |
| .attr('fill', d => d.color) | |
| .attr('stroke', 'none') | |
| .on('mouseenter', showTooltip) | |
| .on('mousemove', showTooltip) | |
| .on('mouseleave', hideTooltip); | |
| // Point labels | |
| gLabels.selectAll('.point-label') | |
| .data(models) | |
| .join('text') | |
| .attr('class', 'point-label') | |
| .attr('x', d => xScale(d.avg_output_tokens_per_turn) + pointRadius + 6) | |
| .attr('y', d => yScale(d.avg_floored_score) + 4) | |
| .text(d => d.name); | |
| } | |
| // Initialize | |
| fetchFirstAvailable(JSON_PATHS) | |
| .then(json => { | |
| data = json; | |
| render(); | |
| }) | |
| .catch(err => { | |
| const pre = document.createElement('pre'); | |
| pre.style.color = 'red'; | |
| pre.style.padding = '16px'; | |
| pre.textContent = `Error loading data: ${err.message}`; | |
| container.appendChild(pre); | |
| }); | |
| // Resize handling | |
| if (window.ResizeObserver) { | |
| new ResizeObserver(() => render()).observe(container); | |
| } else { | |
| window.addEventListener('resize', render); | |
| } | |
| // Theme change handling | |
| const observer = new MutationObserver(() => render()); | |
| observer.observe(document.documentElement, { | |
| attributes: true, | |
| attributeFilter: ['data-theme'] | |
| }); | |
| }; | |
| if (document.readyState === 'loading') { | |
| document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true }); | |
| } else { | |
| ensureD3(bootstrap); | |
| } | |
| })(); | |
| </script> | |