eleusis-benchmark / app /src /content /embeds /reckless-guessing.html
dlouapre's picture
dlouapre HF Staff
Adding interactive charts + assesment
aee6411
<div class="d3-reckless-guessing"></div>
<style>
.d3-reckless-guessing {
width: 100%;
margin: 10px 0;
position: relative;
font-family: system-ui, -apple-system, sans-serif;
}
.d3-reckless-guessing svg {
display: block;
width: 100%;
height: auto;
}
.d3-reckless-guessing .axes path,
.d3-reckless-guessing .axes line {
stroke: var(--axis-color, var(--text-color));
}
.d3-reckless-guessing .axes text {
fill: var(--tick-color, var(--muted-color));
font-size: 12px;
}
.d3-reckless-guessing .grid line {
stroke: var(--grid-color, rgba(0,0,0,.08));
}
.d3-reckless-guessing .axes text.axis-label {
font-size: 14px;
font-weight: 500;
fill: var(--text-color);
}
.d3-reckless-guessing .axes text.chart-title {
font-size: 16px;
font-weight: 600;
fill: var(--text-color);
}
.d3-reckless-guessing .axes text.subtitle {
font-size: 11px;
font-style: italic;
fill: var(--muted-color);
}
.d3-reckless-guessing .model-label {
font-size: 13px;
font-weight: 500;
}
.d3-reckless-guessing .bar {
cursor: pointer;
transition: opacity 0.15s ease;
}
.d3-reckless-guessing .bar:hover {
opacity: 0.8;
}
.d3-reckless-guessing .percent-label {
font-size: 12px;
font-weight: 500;
fill: var(--text-color);
}
.d3-reckless-guessing .d3-tooltip {
position: absolute;
top: 0;
left: 0;
transform: translate(-9999px, -9999px);
pointer-events: none;
padding: 10px 12px;
border-radius: 8px;
font-size: 12px;
line-height: 1.4;
border: 1px solid var(--border-color);
background: var(--surface-bg);
color: var(--text-color);
box-shadow: 0 4px 24px rgba(0,0,0,.18);
opacity: 0;
transition: opacity 0.12s ease;
z-index: 10;
}
.d3-reckless-guessing .d3-tooltip .model-name {
font-weight: 600;
margin-bottom: 4px;
}
.d3-reckless-guessing .d3-tooltip .metric {
display: flex;
justify-content: space-between;
gap: 16px;
}
.d3-reckless-guessing .d3-tooltip .metric-label {
color: var(--muted-color);
}
.d3-reckless-guessing .d3-tooltip .metric-value {
font-weight: 500;
}
</style>
<script>
(() => {
const ensureD3 = (cb) => {
if (window.d3 && typeof window.d3.select === 'function') return cb();
let s = document.getElementById('d3-cdn-script');
if (!s) {
s = document.createElement('script');
s.id = 'd3-cdn-script';
s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js';
document.head.appendChild(s);
}
const onReady = () => { if (window.d3 && typeof window.d3.select === 'function') cb(); };
s.addEventListener('load', onReady, { once: true });
if (window.d3) onReady();
};
const bootstrap = () => {
const scriptEl = document.currentScript;
let container = scriptEl ? scriptEl.previousElementSibling : null;
if (!(container && container.classList && container.classList.contains('d3-reckless-guessing'))) {
const candidates = Array.from(document.querySelectorAll('.d3-reckless-guessing'))
.filter((el) => !(el.dataset && el.dataset.mounted === 'true'));
container = candidates[candidates.length - 1] || null;
}
if (!container) return;
if (container.dataset) {
if (container.dataset.mounted === 'true') return;
container.dataset.mounted = 'true';
}
// Tooltip setup
container.style.position = container.style.position || 'relative';
const tip = document.createElement('div');
tip.className = 'd3-tooltip';
container.appendChild(tip);
// SVG setup
const svg = d3.select(container).append('svg');
const gRoot = svg.append('g');
// Chart groups
const gGrid = gRoot.append('g').attr('class', 'grid');
const gAxes = gRoot.append('g').attr('class', 'axes');
const gBars = gRoot.append('g').attr('class', 'bars');
const gLabels = gRoot.append('g').attr('class', 'labels');
// State
let data = null;
let width = 800;
let height = 450;
const margin = { top: 40, right: 50, bottom: 56, left: 20 };
// Scales
const xScale = d3.scaleLinear();
const yScale = d3.scaleBand();
// Data loading
const JSON_PATHS = [
'/data/reckless_guessing.json',
'./assets/data/reckless_guessing.json',
'../assets/data/reckless_guessing.json',
'../../assets/data/reckless_guessing.json'
];
const fetchFirstAvailable = async (paths) => {
for (const p of paths) {
try {
const r = await fetch(p, { cache: 'no-cache' });
if (r.ok) return await r.json();
} catch (_) {}
}
throw new Error('Data not found');
};
function updateSize() {
width = container.clientWidth || 800;
const numModels = data ? data.models.length : 10;
const barHeight = 36;
height = margin.top + margin.bottom + numModels * barHeight;
svg.attr('width', width).attr('height', height).attr('viewBox', `0 0 ${width} ${height}`);
gRoot.attr('transform', `translate(${margin.left},${margin.top})`);
return {
innerWidth: width - margin.left - margin.right,
innerHeight: height - margin.top - margin.bottom
};
}
function showTooltip(event, d) {
const rect = container.getBoundingClientRect();
const x = event.clientX - rect.left;
const y = event.clientY - rect.top;
tip.innerHTML = `
<div class="model-name" style="color: ${d.color}">${d.name}</div>
<div class="metric">
<span class="metric-label">Double-Down Rate:</span>
<span class="metric-value">${(d.double_down_rate * 100).toFixed(0)}%</span>
</div>
<div class="metric">
<span class="metric-label">Wrong Guesses:</span>
<span class="metric-value">${d.wrong_guesses}</span>
</div>
<div class="metric">
<span class="metric-label">Next Turn Guesses:</span>
<span class="metric-value">${d.next_turn_guesses}</span>
</div>
<div class="metric">
<span class="metric-label">Max Streak:</span>
<span class="metric-value">${d.max_streak}</span>
</div>
<div class="metric">
<span class="metric-label">Type:</span>
<span class="metric-value">${d.is_open ? 'Open' : 'Closed'}</span>
</div>
`;
const tipWidth = tip.offsetWidth || 180;
const tipHeight = tip.offsetHeight || 120;
let tipX = x + 12;
let tipY = y - tipHeight / 2;
if (tipX + tipWidth > width) tipX = x - tipWidth - 12;
if (tipY < 0) tipY = 8;
if (tipY + tipHeight > height) tipY = height - tipHeight - 8;
tip.style.transform = `translate(${tipX}px, ${tipY}px)`;
tip.style.opacity = '1';
}
function hideTooltip() {
tip.style.opacity = '0';
tip.style.transform = 'translate(-9999px, -9999px)';
}
// Calculate relative luminance and return black or white for best contrast
function getContrastColor(hexColor) {
const hex = hexColor.replace('#', '');
const r = parseInt(hex.substr(0, 2), 16) / 255;
const g = parseInt(hex.substr(2, 2), 16) / 255;
const b = parseInt(hex.substr(4, 2), 16) / 255;
const luminance = 0.299 * r + 0.587 * g + 0.114 * b;
return luminance > 0.5 ? '#000000' : '#ffffff';
}
function render() {
if (!data) return;
const { innerWidth, innerHeight } = updateSize();
// Sort models by double_down_rate descending
const models = [...data.models].sort((a, b) => b.double_down_rate - a.double_down_rate);
// Update scales
xScale
.domain([0, 0.8])
.range([0, innerWidth]);
yScale
.domain(models.map(d => d.name))
.range([0, innerHeight])
.padding(0.25);
// Grid lines (vertical)
const xTicks = [0, 0.2, 0.4, 0.6, 0.8];
gGrid.selectAll('.grid-x')
.data(xTicks)
.join('line')
.attr('class', 'grid-x')
.attr('x1', d => xScale(d))
.attr('x2', d => xScale(d))
.attr('y1', 0)
.attr('y2', innerHeight);
// Title
gAxes.selectAll('.chart-title')
.data([0])
.join('text')
.attr('class', 'chart-title')
.attr('x', innerWidth / 2)
.attr('y', -20)
.attr('text-anchor', 'middle')
.text('After Wrong Guess: % Guessing Again Next Turn');
// X-axis (bottom)
gAxes.selectAll('.x-axis')
.data([0])
.join('g')
.attr('class', 'x-axis')
.attr('transform', `translate(0,${innerHeight})`)
.call(d3.axisBottom(xScale)
.tickValues(xTicks)
.tickFormat(d => `${Math.round(d * 100)}%`)
.tickSizeOuter(0));
// X-axis label
gAxes.selectAll('.x-label')
.data([0])
.join('text')
.attr('class', 'x-label axis-label')
.attr('x', innerWidth / 2)
.attr('y', innerHeight + 34)
.attr('text-anchor', 'middle')
.text('Double-Down Rate');
// Subtitle
gAxes.selectAll('.subtitle')
.data([0])
.join('text')
.attr('class', 'subtitle')
.attr('x', innerWidth / 2)
.attr('y', innerHeight + 48)
.attr('text-anchor', 'middle')
.text('Higher = more reckless (keeps guessing after failures)');
// Bars
const barHeight = yScale.bandwidth();
// All models with filled bars
gBars.selectAll('.bar')
.data(models, d => d.name)
.join('rect')
.attr('class', 'bar')
.attr('x', 0)
.attr('y', d => yScale(d.name))
.attr('width', d => xScale(d.double_down_rate))
.attr('height', barHeight)
.attr('fill', d => d.color)
.attr('rx', 3)
.attr('ry', 3)
.on('mouseenter', showTooltip)
.on('mousemove', showTooltip)
.on('mouseleave', hideTooltip);
// Model labels (inside bars)
gLabels.selectAll('.model-label')
.data(models, d => d.name)
.join('text')
.attr('class', 'model-label')
.attr('x', 8)
.attr('y', d => yScale(d.name) + barHeight / 2)
.attr('dy', '0.35em')
.attr('text-anchor', 'start')
.style('fill', d => getContrastColor(d.color))
.text(d => d.name);
// Percentage labels (end of bars)
gLabels.selectAll('.percent-label')
.data(models, d => d.name)
.join('text')
.attr('class', 'percent-label')
.attr('x', d => xScale(d.double_down_rate) + 6)
.attr('y', d => yScale(d.name) + barHeight / 2)
.attr('dy', '0.35em')
.attr('text-anchor', 'start')
.text(d => `${Math.round(d.double_down_rate * 100)}%`);
}
// Initialize
fetchFirstAvailable(JSON_PATHS)
.then(json => {
data = json;
render();
})
.catch(err => {
const pre = document.createElement('pre');
pre.style.color = 'red';
pre.style.padding = '16px';
pre.textContent = `Error loading data: ${err.message}`;
container.appendChild(pre);
});
// Resize handling
if (window.ResizeObserver) {
new ResizeObserver(() => render()).observe(container);
} else {
window.addEventListener('resize', render);
}
// Theme change handling
const observer = new MutationObserver(() => render());
observer.observe(document.documentElement, {
attributes: true,
attributeFilter: ['data-theme']
});
};
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
} else {
ensureD3(bootstrap);
}
})();
</script>