eleusis-benchmark / app /src /content /embeds /overall-performance.html
dlouapre's picture
dlouapre HF Staff
Improved with 3 more models GPT 5.2 Pro, Deepseek V3.2 and Gemini 3 Pro High
5619318
<div class="d3-overall-performance"></div>
<style>
.d3-overall-performance {
width: 100%;
margin: 10px 0;
position: relative;
font-family: system-ui, -apple-system, sans-serif;
}
.d3-overall-performance svg {
display: block;
width: 100%;
height: auto;
}
.d3-overall-performance .axes path,
.d3-overall-performance .axes line {
stroke: var(--axis-color, var(--text-color));
}
.d3-overall-performance .axes text {
fill: var(--tick-color, var(--muted-color));
font-size: 11px;
}
.d3-overall-performance .grid line {
stroke: var(--grid-color, rgba(0,0,0,.08));
}
.d3-overall-performance .axes text.axis-label {
font-size: 15px;
font-weight: 500;
fill: var(--text-color);
}
.d3-overall-performance .x-axis text {
transform: translateY(4px);
}
.d3-overall-performance .point {
cursor: pointer;
transition: opacity 0.15s ease;
}
.d3-overall-performance .point:hover {
opacity: 0.8;
}
.d3-overall-performance .point-label {
font-size: 11px;
fill: var(--text-color);
pointer-events: none;
}
.d3-overall-performance .d3-tooltip {
position: absolute;
top: 0;
left: 0;
transform: translate(-9999px, -9999px);
pointer-events: none;
padding: 10px 12px;
border-radius: 8px;
font-size: 12px;
line-height: 1.4;
border: 1px solid var(--border-color);
background: var(--surface-bg);
color: var(--text-color);
box-shadow: 0 4px 24px rgba(0,0,0,.18);
opacity: 0;
transition: opacity 0.12s ease;
z-index: 10;
}
.d3-overall-performance .d3-tooltip .model-name {
font-weight: 600;
margin-bottom: 4px;
}
.d3-overall-performance .d3-tooltip .metric {
display: flex;
justify-content: space-between;
gap: 16px;
}
.d3-overall-performance .d3-tooltip .metric-label {
color: var(--muted-color);
}
.d3-overall-performance .d3-tooltip .metric-value {
font-weight: 500;
}
</style>
<script>
(() => {
const ensureD3 = (cb) => {
if (window.d3 && typeof window.d3.select === 'function') return cb();
let s = document.getElementById('d3-cdn-script');
if (!s) {
s = document.createElement('script');
s.id = 'd3-cdn-script';
s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js';
document.head.appendChild(s);
}
const onReady = () => { if (window.d3 && typeof window.d3.select === 'function') cb(); };
s.addEventListener('load', onReady, { once: true });
if (window.d3) onReady();
};
const bootstrap = () => {
const scriptEl = document.currentScript;
let container = scriptEl ? scriptEl.previousElementSibling : null;
if (!(container && container.classList && container.classList.contains('d3-overall-performance'))) {
const candidates = Array.from(document.querySelectorAll('.d3-overall-performance'))
.filter((el) => !(el.dataset && el.dataset.mounted === 'true'));
container = candidates[candidates.length - 1] || null;
}
if (!container) return;
if (container.dataset) {
if (container.dataset.mounted === 'true') return;
container.dataset.mounted = 'true';
}
// Tooltip setup
container.style.position = container.style.position || 'relative';
const tip = document.createElement('div');
tip.className = 'd3-tooltip';
container.appendChild(tip);
// SVG setup
const svg = d3.select(container).append('svg');
const gRoot = svg.append('g');
// Chart groups
const gGrid = gRoot.append('g').attr('class', 'grid');
const gAxes = gRoot.append('g').attr('class', 'axes');
const gPoints = gRoot.append('g').attr('class', 'points');
const gLabels = gRoot.append('g').attr('class', 'labels');
// State
let data = null;
let width = 800;
let height = 450;
const margin = { top: 20, right: 120, bottom: 56, left: 72 };
// Scales
const xScale = d3.scaleLinear();
const yScale = d3.scaleLinear();
// Data loading
const JSON_PATHS = [
'/data/overall_performance.json',
'./assets/figures/overall_performance.json',
'../assets/figures/overall_performance.json',
'../../assets/figures/overall_performance.json'
];
const fetchFirstAvailable = async (paths) => {
for (const p of paths) {
try {
const r = await fetch(p, { cache: 'no-cache' });
if (r.ok) return await r.json();
} catch (_) {}
}
throw new Error('Data not found');
};
function updateSize() {
width = container.clientWidth || 800;
height = Math.max(300, Math.round(width / 1.3));
svg.attr('width', width).attr('height', height).attr('viewBox', `0 0 ${width} ${height}`);
gRoot.attr('transform', `translate(${margin.left},${margin.top})`);
return {
innerWidth: width - margin.left - margin.right,
innerHeight: height - margin.top - margin.bottom
};
}
function showTooltip(event, d) {
const rect = container.getBoundingClientRect();
const x = event.clientX - rect.left;
const y = event.clientY - rect.top;
tip.innerHTML = `
<div class="model-name" style="color: ${d.color}">${d.name}</div>
<div class="metric">
<span class="metric-label">Score:</span>
<span class="metric-value">${d.avg_floored_score.toFixed(2)}</span>
</div>
<div class="metric">
<span class="metric-label">Tokens/Turn:</span>
<span class="metric-value">${Math.round(d.avg_output_tokens_per_turn).toLocaleString()}</span>
</div>
<div class="metric">
<span class="metric-label">Type:</span>
<span class="metric-value">${d.is_open ? 'Open' : 'Closed'}</span>
</div>
`;
const tipWidth = tip.offsetWidth || 150;
const tipHeight = tip.offsetHeight || 80;
let tipX = x + 12;
let tipY = y - tipHeight / 2;
if (tipX + tipWidth > width) tipX = x - tipWidth - 12;
if (tipY < 0) tipY = 8;
if (tipY + tipHeight > height) tipY = height - tipHeight - 8;
tip.style.transform = `translate(${tipX}px, ${tipY}px)`;
tip.style.opacity = '1';
}
function hideTooltip() {
tip.style.opacity = '0';
tip.style.transform = 'translate(-9999px, -9999px)';
}
function render() {
if (!data) return;
const { innerWidth, innerHeight } = updateSize();
const models = data.models;
// Update scales but with a min of 0 for x since tokens can't be negative
const xExtent = d3.extent(models, d => d.avg_output_tokens_per_turn);
const yExtent = d3.extent(models, d => d.avg_floored_score);
const xPadding = (xExtent[1] - xExtent[0]) * 0.1;
const yPadding = (yExtent[1] - yExtent[0]) * 0.1;
const xMax = 10000;
xScale
.domain([Math.max(0, xExtent[0] - xPadding), xMax])
.range([0, innerWidth])
.nice();
yScale
.domain([yExtent[0] - yPadding, yExtent[1] + yPadding])
.range([innerHeight, 0])
.nice();
// Grid lines
const xTicks = xScale.ticks(6);
const yTicks = yScale.ticks(6);
gGrid.selectAll('.grid-x')
.data(xTicks)
.join('line')
.attr('class', 'grid-x')
.attr('x1', d => xScale(d))
.attr('x2', d => xScale(d))
.attr('y1', 0)
.attr('y2', innerHeight);
gGrid.selectAll('.grid-y')
.data(yTicks)
.join('line')
.attr('class', 'grid-y')
.attr('x1', 0)
.attr('x2', innerWidth)
.attr('y1', d => yScale(d))
.attr('y2', d => yScale(d));
// Axes with inner ticks
const tickSize = 6;
gAxes.selectAll('.x-axis')
.data([0])
.join('g')
.attr('class', 'x-axis')
.attr('transform', `translate(0,${innerHeight})`)
.call(d3.axisBottom(xScale).ticks(6).tickFormat(d => d.toLocaleString()).tickSizeInner(-tickSize).tickSizeOuter(0));
gAxes.selectAll('.y-axis')
.data([0])
.join('g')
.attr('class', 'y-axis')
.call(d3.axisLeft(yScale).ticks(6).tickSizeInner(-tickSize).tickSizeOuter(0));
// Axis labels
gAxes.selectAll('.x-label')
.data([0])
.join('text')
.attr('class', 'x-label axis-label')
.attr('x', innerWidth / 2)
.attr('y', innerHeight + 44)
.attr('text-anchor', 'middle')
.text('Average Output Tokens per Turn');
gAxes.selectAll('.y-label')
.data([0])
.join('text')
.attr('class', 'y-label axis-label')
.attr('x', -innerHeight / 2)
.attr('y', -52)
.attr('text-anchor', 'middle')
.attr('transform', 'rotate(-90)')
.text('Average Score');
// Points - circles for closed models, stars for open models
const pointRadius = Math.max(8, Math.min(16, innerWidth / 60));
// Helper function to create a 5-point star path
const starPath = (cx, cy, outerR, innerR) => {
const points = [];
for (let i = 0; i < 10; i++) {
const r = i % 2 === 0 ? outerR : innerR;
const angle = (Math.PI / 2) + (i * Math.PI / 5);
points.push([cx + r * Math.cos(angle), cy - r * Math.sin(angle)]);
}
return 'M' + points.map(p => p.join(',')).join('L') + 'Z';
};
// Closed models as circles
const closedModels = models.filter(d => !d.is_open);
gPoints.selectAll('.point-circle')
.data(closedModels, d => d.name)
.join('circle')
.attr('class', 'point point-circle')
.attr('cx', d => xScale(d.avg_output_tokens_per_turn))
.attr('cy', d => yScale(d.avg_floored_score))
.attr('r', pointRadius)
.attr('fill', d => d.color)
.attr('stroke', 'none')
.on('mouseenter', showTooltip)
.on('mousemove', showTooltip)
.on('mouseleave', hideTooltip);
// Open models as stars
const openModels = models.filter(d => d.is_open);
gPoints.selectAll('.point-star')
.data(openModels, d => d.name)
.join('path')
.attr('class', 'point point-star')
.attr('d', d => starPath(xScale(d.avg_output_tokens_per_turn), yScale(d.avg_floored_score), pointRadius * 1.2, pointRadius * 0.5))
.attr('fill', d => d.color)
.attr('stroke', 'none')
.on('mouseenter', showTooltip)
.on('mousemove', showTooltip)
.on('mouseleave', hideTooltip);
// Point labels
gLabels.selectAll('.point-label')
.data(models)
.join('text')
.attr('class', 'point-label')
.attr('x', d => xScale(d.avg_output_tokens_per_turn) + pointRadius + 6)
.attr('y', d => yScale(d.avg_floored_score) + 4)
.text(d => d.name);
}
// Initialize
fetchFirstAvailable(JSON_PATHS)
.then(json => {
data = json;
render();
})
.catch(err => {
const pre = document.createElement('pre');
pre.style.color = 'red';
pre.style.padding = '16px';
pre.textContent = `Error loading data: ${err.message}`;
container.appendChild(pre);
});
// Resize handling
if (window.ResizeObserver) {
new ResizeObserver(() => render()).observe(container);
} else {
window.addEventListener('resize', render);
}
// Theme change handling
const observer = new MutationObserver(() => render());
observer.observe(document.documentElement, {
attributes: true,
attributeFilter: ['data-theme']
});
};
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
} else {
ensureD3(bootstrap);
}
})();
</script>