eiffel-tower-llama / app /src /content /embeds /d3-evaluation3-multi.html
dlouapre's picture
dlouapre HF Staff
Updating metrics charts in d3
a7035df
<div class="d3-eval-grid d3-eval-grid-3"></div>
<style>
.d3-eval-grid {
padding: 2px;
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
}
.d3-eval-grid .grid-container {
display: grid;
grid-template-columns: repeat(2, 1fr);
gap: 8px;
}
@media (max-width: 768px) {
.d3-eval-grid .grid-container {
grid-template-columns: 1fr;
}
}
.d3-eval-grid .subplot {
padding: 4px;
}
.d3-eval-grid .subplot-title {
font-size: 12px;
font-weight: 600;
color: var(--text-color);
margin-bottom: 4px;
text-align: center;
}
.d3-eval-grid .axes path,
.d3-eval-grid .axes line {
stroke: var(--axis-color);
}
.d3-eval-grid .axes text {
fill: var(--tick-color);
font-size: 9px;
}
.d3-eval-grid .grid line {
stroke: var(--grid-color);
stroke-dasharray: 2,2;
opacity: 0.5;
}
.d3-eval-grid .axis-label {
fill: var(--text-color);
font-size: 11px;
font-weight: 600;
}
.d3-eval-grid .d3-tooltip {
position: absolute;
pointer-events: none;
padding: 8px 10px;
background: var(--surface-bg);
border: 1px solid var(--border-color);
border-radius: 8px;
font-size: 11px;
line-height: 1.5;
box-shadow: 0 4px 24px rgba(0,0,0,.18);
opacity: 0;
transition: opacity 0.2s;
z-index: 1000;
}
.d3-eval-grid .bar {
transition: opacity 0.2s;
}
.d3-eval-grid .bar.dimmed {
opacity: 0.2;
}
</style>
<script>
(() => {
const ensureD3 = (cb) => {
if (window.d3 && typeof window.d3.select === 'function') return cb();
let s = document.getElementById('d3-cdn-script');
if (!s) {
s = document.createElement('script');
s.id = 'd3-cdn-script';
s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js';
document.head.appendChild(s);
}
s.addEventListener('load', () => {
if (window.d3 && typeof window.d3.select === 'function') cb();
}, { once: true });
};
const bootstrap = () => {
const scriptEl = document.currentScript;
let container = scriptEl ? scriptEl.previousElementSibling : null;
if (!(container && container.classList && container.classList.contains('d3-eval-grid-3'))) {
const candidates = Array.from(document.querySelectorAll('.d3-eval-grid-3'))
.filter((el) => !(el.dataset && el.dataset.mounted === 'true'));
container = candidates[candidates.length - 1] || null;
}
if (!container) return;
if (container.dataset) {
if (container.dataset.mounted === 'true') return;
container.dataset.mounted = 'true';
}
// Find data attribute
let mountEl = container;
while (mountEl && !mountEl.getAttribute?.('data-datafiles')) {
mountEl = mountEl.parentElement;
}
let providedData = null;
try {
const attr = mountEl && mountEl.getAttribute ? mountEl.getAttribute('data-datafiles') : null;
if (attr && attr.trim()) {
providedData = attr.trim().startsWith('[') ? JSON.parse(attr) : attr.trim();
}
} catch(_) {}
// Check for experiments filter attribute
let experimentsFilter = null;
try {
const expAttr = container.getAttribute('data-experiments');
if (expAttr) {
experimentsFilter = JSON.parse(expAttr);
}
} catch(_) {}
const DEFAULT_JSON = '/data/evaluation_summary.json';
const ensureDataPrefix = (p) => (typeof p === 'string' && p && !p.includes('/')) ? `/data/${p}` : p;
const JSON_PATHS = typeof providedData === 'string'
? [ensureDataPrefix(providedData)]
: [
DEFAULT_JSON,
'./assets/data/evaluation_summary.json',
'../assets/data/evaluation_summary.json',
'../../assets/data/evaluation_summary.json'
];
const fetchFirstAvailable = async (paths) => {
for (const p of paths) {
try {
const r = await fetch(p, { cache: 'no-cache' });
if (r.ok) return await r.json();
} catch(_){}
}
throw new Error('JSON not found');
};
fetchFirstAvailable(JSON_PATHS)
.then(rawData => {
// Chart 3: All experiments including multi-layer optimization
const allExperiments = ['Prompt', 'Basic steering', 'Clamping', 'Clamping + Penalty', '2D optimized', '8D optimized'];
const visibleExperiments = allExperiments;
// Metrics in 2x4 grid layout (8 metrics)
const metrics = [
{ key: 'llm_score_concept', label: 'LLM Concept Score', format: d3.format('.2f') },
{ key: 'eiffel', label: 'Explicit Concept Presence', format: d3.format('.2f') },
{ key: 'llm_score_instruction', label: 'LLM Instruction Score', format: d3.format('.2f') },
{ key: 'minus_log_prob', label: 'Surprise in Original Model', format: d3.format('.2f') },
{ key: 'llm_score_fluency', label: 'LLM Fluency Score', format: d3.format('.2f') },
{ key: 'rep3', label: '3-gram Repetition Fraction', format: d3.format('.2f') },
{ key: 'mean_llm_score', label: 'Mean LLM Score', format: d3.format('.2f') },
{ key: 'harmonic_llm_score', label: 'Harmonic Mean LLM Score', format: d3.format('.2f') }
];
// Restructure data
const data = {};
rawData.forEach(d => {
if (!data[d.metric]) data[d.metric] = {};
data[d.metric][d.experiment] = { mean: d.mean, std: d.std };
});
// Color palette - consistent across all charts
const allColors = {
'Prompt': '#4c4c4c',
'Basic steering': '#b2b2b2',
'Clamping': '#b2b2cc',
'Clamping + Penalty': '#b2b2e6',
'2D optimized': '#b2ffb2',
'8D optimized': '#ffb2ff'
};
const gridContainer = document.createElement('div');
gridContainer.className = 'grid-container';
container.appendChild(gridContainer);
// Tooltip
const tooltip = d3.select(container).append('div')
.attr('class', 'd3-tooltip')
.style('transform', 'translate(-9999px, -9999px)');
let hoveredExperiment = null;
// Create each subplot
metrics.forEach((metric, idx) => {
const subplot = document.createElement('div');
subplot.className = 'subplot';
subplot.dataset.metric = metric.key;
gridContainer.appendChild(subplot);
const title = document.createElement('div');
title.className = 'subplot-title';
title.textContent = metric.label;
subplot.appendChild(title);
const svg = d3.select(subplot).append('svg')
.attr('width', '100%')
.style('display', 'block');
const g = svg.append('g');
const gGrid = g.append('g').attr('class', 'grid');
const gBars = g.append('g').attr('class', 'bars');
const gErrorBars = g.append('g').attr('class', 'error-bars');
const gAxes = g.append('g').attr('class', 'axes');
const gLabels = g.append('g').attr('class', 'value-labels');
subplot._render = () => {
const width = subplot.clientWidth || 300;
const height = Math.max(200, Math.round(width * 0.6));
const margin = { top: 10, right: 20, bottom: 70, left: 42 };
const innerWidth = width - margin.left - margin.right;
const innerHeight = height - margin.top - margin.bottom;
svg.attr('height', height);
g.attr('transform', `translate(${margin.left},${margin.top})`);
// Scales - use all experiments for consistent positioning
const x = d3.scaleBand()
.domain(allExperiments)
.range([0, innerWidth])
.padding(0.2);
// Fixed y-axis ranges based on metric type
const yDomains = {
'llm_score_concept': [0, 2],
'llm_score_instruction': [0, 2],
'llm_score_fluency': [0, 2],
'mean_llm_score': [0, 2],
'harmonic_llm_score': [0, 2],
'eiffel': [0, 1],
'minus_log_prob': [0, 2],
'rep3': [0, 0.5]
};
const y = d3.scaleLinear()
.domain(yDomains[metric.key] || [0, 1])
.range([innerHeight, 0]);
// Grid
gGrid.selectAll('*').remove();
gGrid.selectAll('line')
.data(y.ticks(4))
.join('line')
.attr('x1', 0)
.attr('x2', innerWidth)
.attr('y1', d => y(d))
.attr('y2', d => y(d));
// Axes
gAxes.selectAll('*').remove();
const xAxis = gAxes.append('g')
.attr('transform', `translate(0,${innerHeight})`)
.call(d3.axisBottom(x).tickSize(3));
// Only show labels for visible experiments
xAxis.selectAll('text')
.attr('transform', 'rotate(-45)')
.style('text-anchor', 'end')
.attr('dx', '-0.5em')
.attr('dy', '0.15em')
.style('opacity', function() {
const text = d3.select(this).text();
return visibleExperiments.includes(text) ? 1 : 0;
});
gAxes.append('g')
.call(d3.axisLeft(y).ticks(4).tickFormat(metric.format).tickSize(3));
// Draw bars (only for visible experiments)
const bars = [];
visibleExperiments.forEach(exp => {
const d = data[metric.key]?.[exp];
if (d) {
bars.push({
experiment: exp,
mean: d.mean,
std: d.std,
color: allColors[exp],
x: x(exp),
y: y(d.mean),
width: x.bandwidth(),
height: innerHeight - y(d.mean)
});
}
});
gBars.selectAll('rect')
.data(bars)
.join('rect')
.attr('class', 'bar')
.attr('x', d => d.x)
.attr('y', d => d.y)
.attr('width', d => d.width)
.attr('height', d => d.height)
.attr('fill', d => d.color)
.attr('rx', 2)
.classed('dimmed', d => hoveredExperiment && d.experiment !== hoveredExperiment)
.on('mouseenter', (event, d) => {
hoveredExperiment = d.experiment;
// Show value label on bar
gLabels.selectAll('text').remove();
gLabels.append('text')
.attr('x', d.x + d.width / 2)
.attr('y', d.y - 5)
.attr('text-anchor', 'middle')
.attr('fill', 'var(--text-color)')
.attr('font-size', '11px')
.attr('font-weight', '600')
.text(metric.format(d.mean));
updateAll();
tooltip
.style('opacity', 1)
.html(`
<div><strong>${d.experiment}</strong></div>
<div style="margin-top: 4px;">${metric.label}</div>
<div style="margin-top: 4px;"><strong>Mean:</strong> ${metric.format(d.mean)}</div>
<div><strong>Std:</strong> ${metric.format(d.std)}</div>
`);
})
.on('mousemove', (event) => {
const [mx, my] = d3.pointer(event, container);
tooltip.style('transform', `translate(${mx + 10}px, ${my + 10}px)`);
})
.on('mouseleave', () => {
hoveredExperiment = null;
gLabels.selectAll('text').remove();
updateAll();
tooltip.style('opacity', 0).style('transform', 'translate(-9999px, -9999px)');
});
// Error bars
gErrorBars.selectAll('line')
.data(bars)
.join('line')
.attr('x1', d => d.x + d.width / 2)
.attr('x2', d => d.x + d.width / 2)
.attr('y1', d => y(d.mean + d.std))
.attr('y2', d => y(Math.max(0, d.mean - d.std)))
.attr('stroke', '#666')
.attr('stroke-width', 1.5)
.attr('opacity', 0.6);
// Error bar caps
gErrorBars.selectAll('.cap-top')
.data(bars)
.join('line')
.attr('class', 'cap-top')
.attr('x1', d => d.x + d.width / 2 - 3)
.attr('x2', d => d.x + d.width / 2 + 3)
.attr('y1', d => y(d.mean + d.std))
.attr('y2', d => y(d.mean + d.std))
.attr('stroke', '#666')
.attr('stroke-width', 1.5)
.attr('opacity', 0.6);
gErrorBars.selectAll('.cap-bottom')
.data(bars)
.join('line')
.attr('class', 'cap-bottom')
.attr('x1', d => d.x + d.width / 2 - 3)
.attr('x2', d => d.x + d.width / 2 + 3)
.attr('y1', d => y(Math.max(0, d.mean - d.std)))
.attr('y2', d => y(Math.max(0, d.mean - d.std)))
.attr('stroke', '#666')
.attr('stroke-width', 1.5)
.attr('opacity', 0.6);
};
});
const updateAll = () => {
gridContainer.querySelectorAll('.subplot').forEach(subplot => {
if (subplot._render) subplot._render();
});
};
updateAll();
if (window.ResizeObserver) {
const ro = new ResizeObserver(() => updateAll());
ro.observe(container);
} else {
window.addEventListener('resize', updateAll);
}
})
.catch(err => {
container.innerHTML = `<div style="color: red; padding: 20px;">Error: ${err.message}</div>`;
});
};
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
} else {
ensureD3(bootstrap);
}
})();
</script>