eiffel-tower-llama / app /src /content /embeds /d3-evaluation-grid.html
dlouapre's picture
dlouapre HF Staff
Updating metrics charts in d3
a7035df
<div class="d3-eval-grid"></div>
<style>
.d3-eval-grid {
padding: 8px;
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
}
.d3-eval-grid .chart-card {
background: var(--surface-bg);
border: 1px solid var(--border-color);
border-radius: 10px;
padding: 16px;
}
.d3-eval-grid .grid-container {
display: grid;
grid-template-columns: repeat(2, 1fr);
gap: 24px;
margin-bottom: 16px;
}
@media (max-width: 768px) {
.d3-eval-grid .grid-container {
grid-template-columns: 1fr;
}
}
.d3-eval-grid .subplot {
background: var(--surface-bg);
border: 1px solid var(--border-color);
border-radius: 8px;
padding: 12px;
}
.d3-eval-grid .subplot-title {
font-size: 13px;
font-weight: 600;
color: var(--text-color);
margin-bottom: 8px;
text-align: center;
}
.d3-eval-grid .legend {
display: flex;
flex-wrap: wrap;
gap: 8px 16px;
padding-top: 12px;
border-top: 1px solid var(--border-color);
font-size: 12px;
justify-content: center;
}
.d3-eval-grid .legend-item {
display: flex;
align-items: center;
gap: 6px;
cursor: pointer;
transition: opacity 0.2s;
}
.d3-eval-grid .legend-item.dimmed {
opacity: 0.3;
}
.d3-eval-grid .legend-swatch {
width: 14px;
height: 14px;
border-radius: 3px;
border: 1px solid var(--border-color);
}
.d3-eval-grid .axes path,
.d3-eval-grid .axes line {
stroke: var(--axis-color);
}
.d3-eval-grid .axes text {
fill: var(--tick-color);
font-size: 10px;
}
.d3-eval-grid .grid line {
stroke: var(--grid-color);
stroke-dasharray: 2,2;
opacity: 0.5;
}
.d3-eval-grid .axis-label {
fill: var(--text-color);
font-size: 11px;
font-weight: 600;
}
.d3-eval-grid .d3-tooltip {
position: absolute;
pointer-events: none;
padding: 8px 10px;
background: var(--surface-bg);
border: 1px solid var(--border-color);
border-radius: 8px;
font-size: 11px;
line-height: 1.5;
box-shadow: 0 4px 24px rgba(0,0,0,.18);
opacity: 0;
transition: opacity 0.2s;
z-index: 1000;
}
.d3-eval-grid .bar {
transition: opacity 0.2s;
}
.d3-eval-grid .bar.dimmed {
opacity: 0.2;
}
</style>
<script>
(() => {
const ensureD3 = (cb) => {
if (window.d3 && typeof window.d3.select === 'function') return cb();
let s = document.getElementById('d3-cdn-script');
if (!s) {
s = document.createElement('script');
s.id = 'd3-cdn-script';
s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js';
document.head.appendChild(s);
}
s.addEventListener('load', () => {
if (window.d3 && typeof window.d3.select === 'function') cb();
}, { once: true });
};
const bootstrap = () => {
const scriptEl = document.currentScript;
let container = scriptEl ? scriptEl.previousElementSibling : null;
if (!(container && container.classList && container.classList.contains('d3-eval-grid'))) {
const candidates = Array.from(document.querySelectorAll('.d3-eval-grid'))
.filter((el) => !(el.dataset && el.dataset.mounted === 'true'));
container = candidates[candidates.length - 1] || null;
}
if (!container) return;
if (container.dataset) {
if (container.dataset.mounted === 'true') return;
container.dataset.mounted = 'true';
}
// Find data attribute
let mountEl = container;
while (mountEl && !mountEl.getAttribute?.('data-datafiles')) {
mountEl = mountEl.parentElement;
}
let providedData = null;
try {
const attr = mountEl && mountEl.getAttribute ? mountEl.getAttribute('data-datafiles') : null;
if (attr && attr.trim()) {
providedData = attr.trim().startsWith('[') ? JSON.parse(attr) : attr.trim();
}
} catch(_) {}
// Check for experiments filter attribute
let experimentsFilter = null;
try {
const expAttr = container.getAttribute('data-experiments');
if (expAttr) {
experimentsFilter = JSON.parse(expAttr);
}
} catch(_) {}
const DEFAULT_JSON = '/data/evaluation_summary.json';
const ensureDataPrefix = (p) => (typeof p === 'string' && p && !p.includes('/')) ? `/data/${p}` : p;
const JSON_PATHS = typeof providedData === 'string'
? [ensureDataPrefix(providedData)]
: [
DEFAULT_JSON,
'./assets/data/evaluation_summary.json',
'../assets/data/evaluation_summary.json',
'../../assets/data/evaluation_summary.json'
];
const fetchFirstAvailable = async (paths) => {
for (const p of paths) {
try {
const r = await fetch(p, { cache: 'no-cache' });
if (r.ok) return await r.json();
} catch(_){}
}
throw new Error('JSON not found');
};
fetchFirstAvailable(JSON_PATHS)
.then(rawData => {
// All experiments in order
const allExperiments = ['Prompt', 'Basic steering', 'Clamping', 'Clamping + Penalty', '2D optimized', '8D optimized'];
// Use filtered experiments if provided, otherwise use all
const experiments = experimentsFilter || allExperiments;
// Metrics in 2x3 grid layout
const metrics = [
{ key: 'llm_score_concept', label: 'LLM Concept Score', format: d3.format('.2f') },
{ key: 'llm_score_instruction', label: 'LLM Instruction Score', format: d3.format('.2f') },
{ key: 'llm_score_fluency', label: 'LLM Fluency Score', format: d3.format('.2f') },
{ key: 'rep3', label: '3-gram Repetition Fraction', format: d3.format('.2f') },
{ key: 'mean_llm_score', label: 'Mean LLM Score', format: d3.format('.2f') },
{ key: 'harmonic_llm_score', label: 'Harmonic Mean LLM Score', format: d3.format('.2f') }
];
// Restructure data
const data = {};
rawData.forEach(d => {
if (!data[d.metric]) data[d.metric] = {};
data[d.metric][d.experiment] = { mean: d.mean, std: d.std };
});
// Color palette - consistent across all charts
const allColors = {
'Prompt': '#4c4c4c',
'Basic steering': '#b2b2b2',
'Clamping': '#b2b2cc',
'Clamping + Penalty': '#b2b2e6',
'2D optimized': '#b2ffb2',
'8D optimized': '#ffb2ff'
};
const card = document.createElement('div');
card.className = 'chart-card';
container.appendChild(card);
const gridContainer = document.createElement('div');
gridContainer.className = 'grid-container';
card.appendChild(gridContainer);
// Tooltip
const tooltip = d3.select(card).append('div')
.attr('class', 'd3-tooltip')
.style('transform', 'translate(-9999px, -9999px)');
let hoveredExperiment = null;
// Create each subplot
metrics.forEach((metric, idx) => {
const subplot = document.createElement('div');
subplot.className = 'subplot';
subplot.dataset.metric = metric.key;
gridContainer.appendChild(subplot);
const title = document.createElement('div');
title.className = 'subplot-title';
title.textContent = metric.label;
subplot.appendChild(title);
const svg = d3.select(subplot).append('svg')
.attr('width', '100%')
.style('display', 'block');
const g = svg.append('g');
const gGrid = g.append('g').attr('class', 'grid');
const gBars = g.append('g').attr('class', 'bars');
const gErrorBars = g.append('g').attr('class', 'error-bars');
const gAxes = g.append('g').attr('class', 'axes');
subplot._render = () => {
const width = subplot.clientWidth || 300;
const height = Math.max(200, Math.round(width * 0.6));
const margin = { top: 10, right: 10, bottom: 60, left: 50 };
const innerWidth = width - margin.left - margin.right;
const innerHeight = height - margin.top - margin.bottom;
svg.attr('height', height);
g.attr('transform', `translate(${margin.left},${margin.top})`);
// Scales
const x = d3.scaleBand()
.domain(experiments)
.range([0, innerWidth])
.padding(0.2);
// Find y domain for this metric
const values = experiments.map(exp => data[metric.key]?.[exp]?.mean).filter(v => v !== undefined);
const stds = experiments.map(exp => data[metric.key]?.[exp]?.std).filter(v => v !== undefined);
const maxVal = d3.max(values.map((v, i) => v + stds[i]));
const minVal = d3.min(values.map((v, i) => Math.max(0, v - stds[i])));
const y = d3.scaleLinear()
.domain([Math.max(0, minVal * 0.95), maxVal * 1.05])
.range([innerHeight, 0])
.nice();
// Grid
gGrid.selectAll('*').remove();
gGrid.selectAll('line')
.data(y.ticks(4))
.join('line')
.attr('x1', 0)
.attr('x2', innerWidth)
.attr('y1', d => y(d))
.attr('y2', d => y(d));
// Axes
gAxes.selectAll('*').remove();
const xAxis = gAxes.append('g')
.attr('transform', `translate(0,${innerHeight})`)
.call(d3.axisBottom(x).tickSize(3));
xAxis.selectAll('text')
.attr('transform', 'rotate(-45)')
.style('text-anchor', 'end')
.attr('dx', '-0.5em')
.attr('dy', '0.15em');
gAxes.append('g')
.call(d3.axisLeft(y).ticks(4).tickFormat(metric.format).tickSize(3));
// Draw bars
const bars = [];
experiments.forEach(exp => {
const d = data[metric.key]?.[exp];
if (d) {
bars.push({
experiment: exp,
mean: d.mean,
std: d.std,
color: allColors[exp],
x: x(exp),
y: y(d.mean),
width: x.bandwidth(),
height: innerHeight - y(d.mean)
});
}
});
gBars.selectAll('rect')
.data(bars)
.join('rect')
.attr('class', 'bar')
.attr('x', d => d.x)
.attr('y', d => d.y)
.attr('width', d => d.width)
.attr('height', d => d.height)
.attr('fill', d => d.color)
.attr('rx', 2)
.classed('dimmed', d => hoveredExperiment && d.experiment !== hoveredExperiment)
.on('mouseenter', (event, d) => {
hoveredExperiment = d.experiment;
updateAll();
tooltip
.style('opacity', 1)
.html(`
<div><strong>${d.experiment}</strong></div>
<div style="margin-top: 4px;">${metric.label}</div>
<div style="margin-top: 4px;"><strong>Mean:</strong> ${metric.format(d.mean)}</div>
<div><strong>Std:</strong> ${metric.format(d.std)}</div>
`);
})
.on('mousemove', (event) => {
const [mx, my] = d3.pointer(event, card);
tooltip.style('transform', `translate(${mx + 10}px, ${my + 10}px)`);
})
.on('mouseleave', () => {
hoveredExperiment = null;
updateAll();
tooltip.style('opacity', 0).style('transform', 'translate(-9999px, -9999px)');
});
// Error bars
gErrorBars.selectAll('line')
.data(bars)
.join('line')
.attr('x1', d => d.x + d.width / 2)
.attr('x2', d => d.x + d.width / 2)
.attr('y1', d => y(d.mean + d.std))
.attr('y2', d => y(Math.max(0, d.mean - d.std)))
.attr('stroke', '#666')
.attr('stroke-width', 1.5)
.attr('opacity', 0.6);
// Error bar caps
gErrorBars.selectAll('.cap-top')
.data(bars)
.join('line')
.attr('class', 'cap-top')
.attr('x1', d => d.x + d.width / 2 - 3)
.attr('x2', d => d.x + d.width / 2 + 3)
.attr('y1', d => y(d.mean + d.std))
.attr('y2', d => y(d.mean + d.std))
.attr('stroke', '#666')
.attr('stroke-width', 1.5)
.attr('opacity', 0.6);
gErrorBars.selectAll('.cap-bottom')
.data(bars)
.join('line')
.attr('class', 'cap-bottom')
.attr('x1', d => d.x + d.width / 2 - 3)
.attr('x2', d => d.x + d.width / 2 + 3)
.attr('y1', d => y(Math.max(0, d.mean - d.std)))
.attr('y2', d => y(Math.max(0, d.mean - d.std)))
.attr('stroke', '#666')
.attr('stroke-width', 1.5)
.attr('opacity', 0.6);
};
});
// Legend
const legend = document.createElement('div');
legend.className = 'legend';
experiments.forEach(exp => {
const item = document.createElement('div');
item.className = 'legend-item';
item.dataset.experiment = exp;
item.innerHTML = `
<div class="legend-swatch" style="background: ${allColors[exp]}"></div>
<span>${exp}</span>
`;
legend.appendChild(item);
});
card.appendChild(legend);
// Legend interaction
legend.querySelectorAll('.legend-item').forEach(item => {
item.addEventListener('mouseenter', () => {
hoveredExperiment = item.dataset.experiment;
updateAll();
});
item.addEventListener('mouseleave', () => {
hoveredExperiment = null;
updateAll();
});
});
const updateAll = () => {
gridContainer.querySelectorAll('.subplot').forEach(subplot => {
if (subplot._render) subplot._render();
});
legend.querySelectorAll('.legend-item').forEach(item => {
if (hoveredExperiment && item.dataset.experiment !== hoveredExperiment) {
item.classList.add('dimmed');
} else {
item.classList.remove('dimmed');
}
});
};
updateAll();
if (window.ResizeObserver) {
const ro = new ResizeObserver(() => updateAll());
ro.observe(container);
} else {
window.addEventListener('resize', updateAll);
}
})
.catch(err => {
container.innerHTML = `<div style="color: red; padding: 20px;">Error: ${err.message}</div>`;
});
};
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
} else {
ensureD3(bootstrap);
}
})();
</script>