eiffel-tower-llama / app /src /content /embeds /d3-evaluation-configurable.html
tfrere's picture
tfrere HF Staff
embed and style improvements
ef40dcd
<div class="d3-eval-grid d3-eval-grid-configurable"></div>
<style>
.d3-eval-grid {
padding: 2px;
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
}
.d3-eval-grid .grid-container {
display: grid;
grid-template-columns: repeat(2, 1fr);
gap: 8px;
}
@media (max-width: 768px) {
.d3-eval-grid .grid-container {
grid-template-columns: 1fr;
}
}
.d3-eval-grid .subplot {
padding: 4px;
}
.d3-eval-grid .subplot-title {
font-size: 12px;
font-weight: 600;
color: var(--text-color);
margin-bottom: 4px;
text-align: center;
}
.d3-eval-grid .axes path,
.d3-eval-grid .axes line {
stroke: var(--axis-color);
}
.d3-eval-grid .axes text {
fill: var(--tick-color);
font-size: 9px;
}
.d3-eval-grid .grid line {
stroke: var(--grid-color);
stroke-dasharray: 2,2;
opacity: 0.5;
}
.d3-eval-grid .axis-label {
fill: var(--text-color);
font-size: 11px;
font-weight: 600;
}
.d3-eval-grid .d3-tooltip {
position: absolute;
pointer-events: none;
padding: 8px 10px;
background: var(--surface-bg);
border: 1px solid var(--border-color);
border-radius: 8px;
font-size: 11px;
line-height: 1.5;
box-shadow: 0 4px 24px rgba(0,0,0,.18);
opacity: 0;
transition: opacity 0.2s;
z-index: 1000;
}
.d3-eval-grid .bar {
transition: opacity 0.2s;
}
.d3-eval-grid .bar.dimmed {
opacity: 0.2;
}
</style>
<script>
(() => {
const ensureD3 = (cb) => {
if (window.d3 && typeof window.d3.select === 'function') return cb();
let s = document.getElementById('d3-cdn-script');
if (!s) {
s = document.createElement('script');
s.id = 'd3-cdn-script';
s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js';
document.head.appendChild(s);
}
s.addEventListener('load', () => {
if (window.d3 && typeof window.d3.select === 'function') cb();
}, { once: true });
};
// Define experiment states
const EXPERIMENT_STATES = {
'naive': ['Prompt', 'Basic steering'],
'clamp': ['Prompt', 'Basic steering', 'Clamping', 'Clamping + Penalty'],
'multi': ['Prompt', 'Basic steering', 'Clamping', 'Clamping + Penalty', '2D optimized', '8D optimized']
};
const bootstrap = () => {
const scriptEl = document.currentScript;
let container = scriptEl ? scriptEl.previousElementSibling : null;
if (!(container && container.classList && container.classList.contains('d3-eval-grid-configurable'))) {
const candidates = Array.from(document.querySelectorAll('.d3-eval-grid-configurable'))
.filter((el) => !(el.dataset && el.dataset.mounted === 'true'));
container = candidates[candidates.length - 1] || null;
}
if (!container) return;
if (container.dataset) {
if (container.dataset.mounted === 'true') return;
container.dataset.mounted = 'true';
}
// Find config attribute
let mountEl = container;
let configValue = null;
while (mountEl && !mountEl.getAttribute?.('data-config')) {
mountEl = mountEl.parentElement;
}
try {
const configAttr = mountEl && mountEl.getAttribute ? mountEl.getAttribute('data-config') : null;
if (configAttr && configAttr.trim()) {
// Try to parse as JSON first, otherwise treat as string
try {
configValue = JSON.parse(configAttr);
} catch(_) {
configValue = configAttr.trim();
}
}
} catch(_) {}
// Determine visible experiments based on config
// Default to 'naive' if no config provided
const stateName = typeof configValue === 'string' ? configValue.toLowerCase() :
(configValue && configValue.state) ? configValue.state.toLowerCase() : 'naive';
const visibleExperiments = EXPERIMENT_STATES[stateName] || EXPERIMENT_STATES['naive'];
// Find data attribute
mountEl = container;
while (mountEl && !mountEl.getAttribute?.('data-datafiles')) {
mountEl = mountEl.parentElement;
}
let providedData = null;
try {
const attr = mountEl && mountEl.getAttribute ? mountEl.getAttribute('data-datafiles') : null;
if (attr && attr.trim()) {
providedData = attr.trim().startsWith('[') ? JSON.parse(attr) : attr.trim();
}
} catch(_) {}
const DEFAULT_JSON = '/data/evaluation_summary.json';
const ensureDataPrefix = (p) => (typeof p === 'string' && p && !p.includes('/')) ? `/data/${p}` : p;
const JSON_PATHS = typeof providedData === 'string'
? [ensureDataPrefix(providedData)]
: [
DEFAULT_JSON,
'./assets/data/evaluation_summary.json',
'../assets/data/evaluation_summary.json',
'../../assets/data/evaluation_summary.json'
];
const fetchFirstAvailable = async (paths) => {
for (const p of paths) {
try {
const r = await fetch(p, { cache: 'no-cache' });
if (r.ok) return await r.json();
} catch(_){}
}
throw new Error('JSON not found');
};
fetchFirstAvailable(JSON_PATHS)
.then(rawData => {
// All experiments for consistent positioning
const allExperiments = ['Prompt', 'Basic steering', 'Clamping', 'Clamping + Penalty', '2D optimized', '8D optimized'];
// Metrics in 2x4 grid layout (8 metrics)
const metrics = [
{ key: 'llm_score_concept', label: 'LLM Concept Score', format: d3.format('.2f') },
{ key: 'eiffel', label: 'Explicit Concept Presence', format: d3.format('.2f') },
{ key: 'llm_score_instruction', label: 'LLM Instruction Score', format: d3.format('.2f') },
{ key: 'minus_log_prob', label: 'Surprise in Original Model', format: d3.format('.2f') },
{ key: 'llm_score_fluency', label: 'LLM Fluency Score', format: d3.format('.2f') },
{ key: 'rep3', label: '3-gram Repetition Fraction', format: d3.format('.2f') },
{ key: 'mean_llm_score', label: 'Mean LLM Score', format: d3.format('.2f') },
{ key: 'harmonic_llm_score', label: 'Harmonic Mean LLM Score', format: d3.format('.2f') }
];
// Restructure data
const data = {};
rawData.forEach(d => {
if (!data[d.metric]) data[d.metric] = {};
data[d.metric][d.experiment] = { mean: d.mean, std: d.std };
});
// Color palette - use categorical colors with similar hues for related experiments
const getCategoricalColors = (count) => {
try {
if (window.ColorPalettes && typeof window.ColorPalettes.getColors === 'function') {
return window.ColorPalettes.getColors('categorical', count);
}
} catch (_) {}
const primary = getComputedStyle(document.documentElement).getPropertyValue('--primary-color').trim() || '#E889AB';
const tableau = (window.d3 && window.d3.schemeTableau10) ? window.d3.schemeTableau10 : ['#4e79a7', '#f28e2b', '#e15759', '#76b7b2', '#59a14f', '#edc948', '#b07aa1', '#ff9da7', '#9c755f', '#bab0ab'];
const pool = [primary, ...tableau];
const arr = []; for (let i = 0; i < count; i++) { arr.push(pool[i % pool.length]); }
return arr;
};
// Get base colors for groups - start with primary color for Basic steering
const baseColors = getCategoricalColors(3);
// Create variations using sequential palette for similar hues - more harmonious
const getSequentialVariations = (baseColor, count) => {
try {
// Try to use ColorPalettes sequential generator if available
if (window.ColorPalettes && typeof window.ColorPalettes.getPrimaryOKLCH === 'function') {
// Parse base color to extract its hue
const parseColor = (color) => {
const el = document.createElement('span');
el.style.color = color;
document.body.appendChild(el);
const rgb = getComputedStyle(el).color.match(/\d+/g);
document.body.removeChild(el);
if (!rgb || rgb.length < 3) return null;
return { r: rgb[0]/255, g: rgb[1]/255, b: rgb[2]/255 };
};
const rgb = parseColor(baseColor);
if (!rgb) return Array(count).fill(baseColor);
// Convert RGB to HSL to get hue
const max = Math.max(rgb.r, rgb.g, rgb.b);
const min = Math.min(rgb.r, rgb.g, rgb.b);
const delta = max - min;
let h = 0;
if (delta !== 0) {
if (max === rgb.r) h = ((rgb.g - rgb.b) / delta) % 6;
else if (max === rgb.g) h = (rgb.b - rgb.r) / delta + 2;
else h = (rgb.r - rgb.g) / delta + 4;
}
h = h * 60;
if (h < 0) h += 360;
// Get primary OKLCH to use as base for sequential generation
const primaryOKLCH = window.ColorPalettes.getPrimaryOKLCH();
if (primaryOKLCH) {
// Create a temporary OKLCH color with the base color's hue
// Use the primary's L and C as reference, but use the base color's hue
const baseL = 0.65; // Medium lightness
const baseC = 0.2; // Medium chroma
// Generate sequential palette with the base color's hue
// This creates harmonious variations
const tempOKLCH = { L: baseL, C: baseC, h: h };
// Use ColorPalettes sequential generator if we can create a custom one
// Otherwise, create subtle variations manually
const variations = [];
for (let i = 0; i < count; i++) {
const t = count === 1 ? 0.5 : i / (count - 1);
// More subtle variation: smaller range for harmony
const LVar = baseL + (t - 0.5) * 0.12; // Range: 0.59 to 0.71 (more subtle)
const CVar = baseC * (0.95 + t * 0.1); // Slight saturation variation
// Use ColorPalettes oklchToHexSafe if available
if (window.ColorPalettes && window.ColorPalettes.getColors) {
// Try to get sequential colors and adjust hue
// For now, use manual HSL conversion with better parameters
const light = Math.max(0.5, Math.min(0.75, LVar));
const sat = Math.max(0.35, Math.min(0.55, CVar * 2.5));
const hue = h / 360;
const c = sat * (1 - Math.abs(2 * light - 1));
const x = c * (1 - Math.abs((hue * 6) % 2 - 1));
const m = light - c / 2;
let r, g, b;
if (hue < 1/6) { r = c; g = x; b = 0; }
else if (hue < 2/6) { r = x; g = c; b = 0; }
else if (hue < 3/6) { r = 0; g = c; b = x; }
else if (hue < 4/6) { r = 0; g = x; b = c; }
else if (hue < 5/6) { r = x; g = 0; b = c; }
else { r = c; g = 0; b = x; }
const toHex = (n) => Math.round(Math.max(0, Math.min(255, (n + m) * 255))).toString(16).padStart(2, '0').toUpperCase();
variations.push(`#${toHex(r)}${toHex(g)}${toHex(b)}`);
} else {
variations.push(baseColor);
}
}
return variations;
}
}
} catch (_) {}
// Fallback: create subtle variations manually
try {
const parseColor = (color) => {
const el = document.createElement('span');
el.style.color = color;
document.body.appendChild(el);
const rgb = getComputedStyle(el).color.match(/\d+/g);
document.body.removeChild(el);
if (!rgb || rgb.length < 3) return null;
return { r: rgb[0]/255, g: rgb[1]/255, b: rgb[2]/255 };
};
const rgb = parseColor(baseColor);
if (!rgb) return Array(count).fill(baseColor);
// Convert RGB to HSL
const max = Math.max(rgb.r, rgb.g, rgb.b);
const min = Math.min(rgb.r, rgb.g, rgb.b);
const delta = max - min;
let h = 0;
if (delta !== 0) {
if (max === rgb.r) h = ((rgb.g - rgb.b) / delta) % 6;
else if (max === rgb.g) h = (rgb.b - rgb.r) / delta + 2;
else h = (rgb.r - rgb.g) / delta + 4;
}
h = h * 60;
if (h < 0) h += 360;
// More harmonious variations: smaller, subtler changes
const variations = [];
for (let i = 0; i < count; i++) {
const t = count === 1 ? 0.5 : i / (count - 1);
// Subtle lightness variation: smaller range for harmony
const light = 0.6 + (t - 0.5) * 0.15; // Range: 0.525 to 0.675 (more subtle)
const sat = 0.45 + t * 0.15; // Range: 0.45 to 0.60 (more controlled)
// Convert HSL to RGB
const hue = h / 360;
const c = sat * (1 - Math.abs(2 * light - 1));
const x = c * (1 - Math.abs((hue * 6) % 2 - 1));
const m = light - c / 2;
let r, g, b;
if (hue < 1/6) { r = c; g = x; b = 0; }
else if (hue < 2/6) { r = x; g = c; b = 0; }
else if (hue < 3/6) { r = 0; g = c; b = x; }
else if (hue < 4/6) { r = 0; g = x; b = c; }
else if (hue < 5/6) { r = x; g = 0; b = c; }
else { r = c; g = 0; b = x; }
const toHex = (n) => Math.round(Math.max(0, Math.min(255, (n + m) * 255))).toString(16).padStart(2, '0').toUpperCase();
variations.push(`#${toHex(r)}${toHex(g)}${toHex(b)}`);
}
return variations;
} catch (_) {
return Array(count).fill(baseColor);
}
};
// Create color groups
// baseColors[0] is primary color (first in categorical palette)
// baseColors[1] is second color
// baseColors[2] is third color
const clampBase = baseColors[1] || '#4e79a7'; // Second color for clamping group
const optimizedBase = baseColors[2] || '#59a14f'; // Third color for optimized group
const clampVariations = getSequentialVariations(clampBase, 2);
const optimizedVariations = getSequentialVariations(optimizedBase, 2);
const allColors = {
'Prompt': '#4c4c4c', // Keep gray for baseline/reference
'Basic steering': baseColors[0] || '#E889AB', // Primary color (first in palette)
'Clamping': clampVariations[0] || '#4e79a7',
'Clamping + Penalty': clampVariations[1] || '#5a8ab8',
'2D optimized': optimizedVariations[0] || '#59a14f',
'8D optimized': optimizedVariations[1] || '#6bb26b'
};
const gridContainer = document.createElement('div');
gridContainer.className = 'grid-container';
container.appendChild(gridContainer);
// Tooltip
const tooltip = d3.select(container).append('div')
.attr('class', 'd3-tooltip')
.style('transform', 'translate(-9999px, -9999px)');
let hoveredExperiment = null;
// Create each subplot
metrics.forEach((metric, idx) => {
const subplot = document.createElement('div');
subplot.className = 'subplot';
subplot.dataset.metric = metric.key;
gridContainer.appendChild(subplot);
const title = document.createElement('div');
title.className = 'subplot-title';
title.textContent = metric.label;
subplot.appendChild(title);
const svg = d3.select(subplot).append('svg')
.attr('width', '100%')
.style('display', 'block');
const g = svg.append('g');
const gGrid = g.append('g').attr('class', 'grid');
const gBars = g.append('g').attr('class', 'bars');
const gErrorBars = g.append('g').attr('class', 'error-bars');
const gAxes = g.append('g').attr('class', 'axes');
const gLabels = g.append('g').attr('class', 'value-labels');
subplot._render = () => {
const width = subplot.clientWidth || 300;
const height = Math.max(200, Math.round(width * 0.6));
const margin = { top: 10, right: 20, bottom: 70, left: 42 };
const innerWidth = width - margin.left - margin.right;
const innerHeight = height - margin.top - margin.bottom;
svg.attr('height', height);
g.attr('transform', `translate(${margin.left},${margin.top})`);
// Scales - use all experiments for consistent positioning
const x = d3.scaleBand()
.domain(allExperiments)
.range([0, innerWidth])
.padding(0.2);
// Fixed y-axis ranges based on metric type
const yDomains = {
'llm_score_concept': [0, 2],
'llm_score_instruction': [0, 2],
'llm_score_fluency': [0, 2],
'mean_llm_score': [0, 2],
'harmonic_llm_score': [0, 2],
'eiffel': [0, 1],
'minus_log_prob': [0, 2],
'rep3': [0, 0.5]
};
const y = d3.scaleLinear()
.domain(yDomains[metric.key] || [0, 1])
.range([innerHeight, 0]);
// Grid
gGrid.selectAll('*').remove();
gGrid.selectAll('line')
.data(y.ticks(4))
.join('line')
.attr('x1', 0)
.attr('x2', innerWidth)
.attr('y1', d => y(d))
.attr('y2', d => y(d));
// Axes
gAxes.selectAll('*').remove();
const xAxis = gAxes.append('g')
.attr('transform', `translate(0,${innerHeight})`)
.call(d3.axisBottom(x).tickSize(3));
// Only show labels for visible experiments
xAxis.selectAll('text')
.attr('transform', 'rotate(-45)')
.style('text-anchor', 'end')
.attr('dx', '-0.5em')
.attr('dy', '0.15em')
.style('opacity', function() {
const text = d3.select(this).text();
return visibleExperiments.includes(text) ? 1 : 0;
});
gAxes.append('g')
.call(d3.axisLeft(y).ticks(4).tickFormat(metric.format).tickSize(3));
// Draw bars (only for visible experiments)
const bars = [];
visibleExperiments.forEach(exp => {
const d = data[metric.key]?.[exp];
if (d) {
bars.push({
experiment: exp,
mean: d.mean,
std: d.std,
color: allColors[exp],
x: x(exp),
y: y(d.mean),
width: x.bandwidth(),
height: innerHeight - y(d.mean)
});
}
});
gBars.selectAll('rect')
.data(bars)
.join('rect')
.attr('class', 'bar')
.attr('x', d => d.x)
.attr('y', d => d.y)
.attr('width', d => d.width)
.attr('height', d => d.height)
.attr('fill', d => d.color)
.attr('rx', 2)
.classed('dimmed', d => hoveredExperiment && d.experiment !== hoveredExperiment)
.on('mouseenter', (event, d) => {
hoveredExperiment = d.experiment;
// Show value label on bar
gLabels.selectAll('text').remove();
gLabels.append('text')
.attr('x', d.x + d.width / 2)
.attr('y', d.y - 5)
.attr('text-anchor', 'middle')
.attr('fill', 'var(--text-color)')
.attr('font-size', '11px')
.attr('font-weight', '600')
.text(metric.format(d.mean));
updateAll();
tooltip
.style('opacity', 1)
.html(`
<div><strong>${d.experiment}</strong></div>
<div style="margin-top: 4px;">${metric.label}</div>
<div style="margin-top: 4px;"><strong>Mean:</strong> ${metric.format(d.mean)}</div>
<div><strong>Std:</strong> ${metric.format(d.std)}</div>
`);
})
.on('mousemove', (event) => {
const [mx, my] = d3.pointer(event, container);
tooltip.style('transform', `translate(${mx + 10}px, ${my + 10}px)`);
})
.on('mouseleave', () => {
hoveredExperiment = null;
gLabels.selectAll('text').remove();
updateAll();
tooltip.style('opacity', 0).style('transform', 'translate(-9999px, -9999px)');
});
// Error bars
gErrorBars.selectAll('line')
.data(bars)
.join('line')
.attr('x1', d => d.x + d.width / 2)
.attr('x2', d => d.x + d.width / 2)
.attr('y1', d => y(d.mean + d.std))
.attr('y2', d => y(Math.max(0, d.mean - d.std)))
.attr('stroke', '#666')
.attr('stroke-width', 1.5)
.attr('opacity', 0.6);
// Error bar caps
gErrorBars.selectAll('.cap-top')
.data(bars)
.join('line')
.attr('class', 'cap-top')
.attr('x1', d => d.x + d.width / 2 - 3)
.attr('x2', d => d.x + d.width / 2 + 3)
.attr('y1', d => y(d.mean + d.std))
.attr('y2', d => y(d.mean + d.std))
.attr('stroke', '#666')
.attr('stroke-width', 1.5)
.attr('opacity', 0.6);
gErrorBars.selectAll('.cap-bottom')
.data(bars)
.join('line')
.attr('class', 'cap-bottom')
.attr('x1', d => d.x + d.width / 2 - 3)
.attr('x2', d => d.x + d.width / 2 + 3)
.attr('y1', d => y(Math.max(0, d.mean - d.std)))
.attr('y2', d => y(Math.max(0, d.mean - d.std)))
.attr('stroke', '#666')
.attr('stroke-width', 1.5)
.attr('opacity', 0.6);
};
});
const updateAll = () => {
gridContainer.querySelectorAll('.subplot').forEach(subplot => {
if (subplot._render) subplot._render();
});
};
updateAll();
if (window.ResizeObserver) {
const ro = new ResizeObserver(() => updateAll());
ro.observe(container);
} else {
window.addEventListener('resize', updateAll);
}
})
.catch(err => {
container.innerHTML = `<div style="color: red; padding: 20px;">Error: ${err.message}</div>`;
});
};
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
} else {
ensureD3(bootstrap);
}
})();
</script>