Spaces:
Running
Running
| <div class="d3-eval-grid d3-eval-grid-3"></div> | |
| <style> | |
| .d3-eval-grid { | |
| padding: 2px; | |
| font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; | |
| } | |
| .d3-eval-grid .grid-container { | |
| display: grid; | |
| grid-template-columns: repeat(2, 1fr); | |
| gap: 8px; | |
| } | |
| @media (max-width: 768px) { | |
| .d3-eval-grid .grid-container { | |
| grid-template-columns: 1fr; | |
| } | |
| } | |
| .d3-eval-grid .subplot { | |
| padding: 4px; | |
| } | |
| .d3-eval-grid .subplot-title { | |
| font-size: 12px; | |
| font-weight: 600; | |
| color: var(--text-color); | |
| margin-bottom: 4px; | |
| text-align: center; | |
| } | |
| .d3-eval-grid .axes path, | |
| .d3-eval-grid .axes line { | |
| stroke: var(--axis-color); | |
| } | |
| .d3-eval-grid .axes text { | |
| fill: var(--tick-color); | |
| font-size: 9px; | |
| } | |
| .d3-eval-grid .grid line { | |
| stroke: var(--grid-color); | |
| stroke-dasharray: 2,2; | |
| opacity: 0.5; | |
| } | |
| .d3-eval-grid .axis-label { | |
| fill: var(--text-color); | |
| font-size: 11px; | |
| font-weight: 600; | |
| } | |
| .d3-eval-grid .d3-tooltip { | |
| position: absolute; | |
| pointer-events: none; | |
| padding: 8px 10px; | |
| background: var(--surface-bg); | |
| border: 1px solid var(--border-color); | |
| border-radius: 8px; | |
| font-size: 11px; | |
| line-height: 1.5; | |
| box-shadow: 0 4px 24px rgba(0,0,0,.18); | |
| opacity: 0; | |
| transition: opacity 0.2s; | |
| z-index: 1000; | |
| } | |
| .d3-eval-grid .bar { | |
| transition: opacity 0.2s; | |
| } | |
| .d3-eval-grid .bar.dimmed { | |
| opacity: 0.2; | |
| } | |
| </style> | |
| <script> | |
| (() => { | |
| const ensureD3 = (cb) => { | |
| if (window.d3 && typeof window.d3.select === 'function') return cb(); | |
| let s = document.getElementById('d3-cdn-script'); | |
| if (!s) { | |
| s = document.createElement('script'); | |
| s.id = 'd3-cdn-script'; | |
| s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js'; | |
| document.head.appendChild(s); | |
| } | |
| s.addEventListener('load', () => { | |
| if (window.d3 && typeof window.d3.select === 'function') cb(); | |
| }, { once: true }); | |
| }; | |
| const bootstrap = () => { | |
| const scriptEl = document.currentScript; | |
| let container = scriptEl ? scriptEl.previousElementSibling : null; | |
| if (!(container && container.classList && container.classList.contains('d3-eval-grid-3'))) { | |
| const candidates = Array.from(document.querySelectorAll('.d3-eval-grid-3')) | |
| .filter((el) => !(el.dataset && el.dataset.mounted === 'true')); | |
| container = candidates[candidates.length - 1] || null; | |
| } | |
| if (!container) return; | |
| if (container.dataset) { | |
| if (container.dataset.mounted === 'true') return; | |
| container.dataset.mounted = 'true'; | |
| } | |
| // Find data attribute | |
| let mountEl = container; | |
| while (mountEl && !mountEl.getAttribute?.('data-datafiles')) { | |
| mountEl = mountEl.parentElement; | |
| } | |
| let providedData = null; | |
| try { | |
| const attr = mountEl && mountEl.getAttribute ? mountEl.getAttribute('data-datafiles') : null; | |
| if (attr && attr.trim()) { | |
| providedData = attr.trim().startsWith('[') ? JSON.parse(attr) : attr.trim(); | |
| } | |
| } catch(_) {} | |
| // Check for experiments filter attribute | |
| let experimentsFilter = null; | |
| try { | |
| const expAttr = container.getAttribute('data-experiments'); | |
| if (expAttr) { | |
| experimentsFilter = JSON.parse(expAttr); | |
| } | |
| } catch(_) {} | |
| const DEFAULT_JSON = '/data/evaluation_summary.json'; | |
| const ensureDataPrefix = (p) => (typeof p === 'string' && p && !p.includes('/')) ? `/data/${p}` : p; | |
| const JSON_PATHS = typeof providedData === 'string' | |
| ? [ensureDataPrefix(providedData)] | |
| : [ | |
| DEFAULT_JSON, | |
| './assets/data/evaluation_summary.json', | |
| '../assets/data/evaluation_summary.json', | |
| '../../assets/data/evaluation_summary.json' | |
| ]; | |
| const fetchFirstAvailable = async (paths) => { | |
| for (const p of paths) { | |
| try { | |
| const r = await fetch(p, { cache: 'no-cache' }); | |
| if (r.ok) return await r.json(); | |
| } catch(_){} | |
| } | |
| throw new Error('JSON not found'); | |
| }; | |
| fetchFirstAvailable(JSON_PATHS) | |
| .then(rawData => { | |
| // Chart 3: All experiments including multi-layer optimization | |
| const allExperiments = ['Prompt', 'Basic steering', 'Clamping', 'Clamping + Penalty', '2D optimized', '8D optimized']; | |
| const visibleExperiments = allExperiments; | |
| // Metrics in 2x4 grid layout (8 metrics) | |
| const metrics = [ | |
| { key: 'llm_score_concept', label: 'LLM Concept Score', format: d3.format('.2f') }, | |
| { key: 'eiffel', label: 'Explicit Concept Presence', format: d3.format('.2f') }, | |
| { key: 'llm_score_instruction', label: 'LLM Instruction Score', format: d3.format('.2f') }, | |
| { key: 'minus_log_prob', label: 'Surprise in Original Model', format: d3.format('.2f') }, | |
| { key: 'llm_score_fluency', label: 'LLM Fluency Score', format: d3.format('.2f') }, | |
| { key: 'rep3', label: '3-gram Repetition Fraction', format: d3.format('.2f') }, | |
| { key: 'mean_llm_score', label: 'Mean LLM Score', format: d3.format('.2f') }, | |
| { key: 'harmonic_llm_score', label: 'Harmonic Mean LLM Score', format: d3.format('.2f') } | |
| ]; | |
| // Restructure data | |
| const data = {}; | |
| rawData.forEach(d => { | |
| if (!data[d.metric]) data[d.metric] = {}; | |
| data[d.metric][d.experiment] = { mean: d.mean, std: d.std }; | |
| }); | |
| // Color palette - consistent across all charts | |
| const allColors = { | |
| 'Prompt': '#4c4c4c', | |
| 'Basic steering': '#b2b2b2', | |
| 'Clamping': '#b2b2cc', | |
| 'Clamping + Penalty': '#b2b2e6', | |
| '2D optimized': '#b2ffb2', | |
| '8D optimized': '#ffb2ff' | |
| }; | |
| const gridContainer = document.createElement('div'); | |
| gridContainer.className = 'grid-container'; | |
| container.appendChild(gridContainer); | |
| // Tooltip | |
| const tooltip = d3.select(container).append('div') | |
| .attr('class', 'd3-tooltip') | |
| .style('transform', 'translate(-9999px, -9999px)'); | |
| let hoveredExperiment = null; | |
| // Create each subplot | |
| metrics.forEach((metric, idx) => { | |
| const subplot = document.createElement('div'); | |
| subplot.className = 'subplot'; | |
| subplot.dataset.metric = metric.key; | |
| gridContainer.appendChild(subplot); | |
| const title = document.createElement('div'); | |
| title.className = 'subplot-title'; | |
| title.textContent = metric.label; | |
| subplot.appendChild(title); | |
| const svg = d3.select(subplot).append('svg') | |
| .attr('width', '100%') | |
| .style('display', 'block'); | |
| const g = svg.append('g'); | |
| const gGrid = g.append('g').attr('class', 'grid'); | |
| const gBars = g.append('g').attr('class', 'bars'); | |
| const gErrorBars = g.append('g').attr('class', 'error-bars'); | |
| const gAxes = g.append('g').attr('class', 'axes'); | |
| const gLabels = g.append('g').attr('class', 'value-labels'); | |
| subplot._render = () => { | |
| const width = subplot.clientWidth || 300; | |
| const height = Math.max(200, Math.round(width * 0.6)); | |
| const margin = { top: 10, right: 20, bottom: 70, left: 42 }; | |
| const innerWidth = width - margin.left - margin.right; | |
| const innerHeight = height - margin.top - margin.bottom; | |
| svg.attr('height', height); | |
| g.attr('transform', `translate(${margin.left},${margin.top})`); | |
| // Scales - use all experiments for consistent positioning | |
| const x = d3.scaleBand() | |
| .domain(allExperiments) | |
| .range([0, innerWidth]) | |
| .padding(0.2); | |
| // Fixed y-axis ranges based on metric type | |
| const yDomains = { | |
| 'llm_score_concept': [0, 2], | |
| 'llm_score_instruction': [0, 2], | |
| 'llm_score_fluency': [0, 2], | |
| 'mean_llm_score': [0, 2], | |
| 'harmonic_llm_score': [0, 2], | |
| 'eiffel': [0, 1], | |
| 'minus_log_prob': [0, 2], | |
| 'rep3': [0, 0.5] | |
| }; | |
| const y = d3.scaleLinear() | |
| .domain(yDomains[metric.key] || [0, 1]) | |
| .range([innerHeight, 0]); | |
| // Grid | |
| gGrid.selectAll('*').remove(); | |
| gGrid.selectAll('line') | |
| .data(y.ticks(4)) | |
| .join('line') | |
| .attr('x1', 0) | |
| .attr('x2', innerWidth) | |
| .attr('y1', d => y(d)) | |
| .attr('y2', d => y(d)); | |
| // Axes | |
| gAxes.selectAll('*').remove(); | |
| const xAxis = gAxes.append('g') | |
| .attr('transform', `translate(0,${innerHeight})`) | |
| .call(d3.axisBottom(x).tickSize(3)); | |
| // Only show labels for visible experiments | |
| xAxis.selectAll('text') | |
| .attr('transform', 'rotate(-45)') | |
| .style('text-anchor', 'end') | |
| .attr('dx', '-0.5em') | |
| .attr('dy', '0.15em') | |
| .style('opacity', function() { | |
| const text = d3.select(this).text(); | |
| return visibleExperiments.includes(text) ? 1 : 0; | |
| }); | |
| gAxes.append('g') | |
| .call(d3.axisLeft(y).ticks(4).tickFormat(metric.format).tickSize(3)); | |
| // Draw bars (only for visible experiments) | |
| const bars = []; | |
| visibleExperiments.forEach(exp => { | |
| const d = data[metric.key]?.[exp]; | |
| if (d) { | |
| bars.push({ | |
| experiment: exp, | |
| mean: d.mean, | |
| std: d.std, | |
| color: allColors[exp], | |
| x: x(exp), | |
| y: y(d.mean), | |
| width: x.bandwidth(), | |
| height: innerHeight - y(d.mean) | |
| }); | |
| } | |
| }); | |
| gBars.selectAll('rect') | |
| .data(bars) | |
| .join('rect') | |
| .attr('class', 'bar') | |
| .attr('x', d => d.x) | |
| .attr('y', d => d.y) | |
| .attr('width', d => d.width) | |
| .attr('height', d => d.height) | |
| .attr('fill', d => d.color) | |
| .attr('rx', 2) | |
| .classed('dimmed', d => hoveredExperiment && d.experiment !== hoveredExperiment) | |
| .on('mouseenter', (event, d) => { | |
| hoveredExperiment = d.experiment; | |
| // Show value label on bar | |
| gLabels.selectAll('text').remove(); | |
| gLabels.append('text') | |
| .attr('x', d.x + d.width / 2) | |
| .attr('y', d.y - 5) | |
| .attr('text-anchor', 'middle') | |
| .attr('fill', 'var(--text-color)') | |
| .attr('font-size', '11px') | |
| .attr('font-weight', '600') | |
| .text(metric.format(d.mean)); | |
| updateAll(); | |
| tooltip | |
| .style('opacity', 1) | |
| .html(` | |
| <div><strong>${d.experiment}</strong></div> | |
| <div style="margin-top: 4px;">${metric.label}</div> | |
| <div style="margin-top: 4px;"><strong>Mean:</strong> ${metric.format(d.mean)}</div> | |
| <div><strong>Std:</strong> ${metric.format(d.std)}</div> | |
| `); | |
| }) | |
| .on('mousemove', (event) => { | |
| const [mx, my] = d3.pointer(event, container); | |
| tooltip.style('transform', `translate(${mx + 10}px, ${my + 10}px)`); | |
| }) | |
| .on('mouseleave', () => { | |
| hoveredExperiment = null; | |
| gLabels.selectAll('text').remove(); | |
| updateAll(); | |
| tooltip.style('opacity', 0).style('transform', 'translate(-9999px, -9999px)'); | |
| }); | |
| // Error bars | |
| gErrorBars.selectAll('line') | |
| .data(bars) | |
| .join('line') | |
| .attr('x1', d => d.x + d.width / 2) | |
| .attr('x2', d => d.x + d.width / 2) | |
| .attr('y1', d => y(d.mean + d.std)) | |
| .attr('y2', d => y(Math.max(0, d.mean - d.std))) | |
| .attr('stroke', '#666') | |
| .attr('stroke-width', 1.5) | |
| .attr('opacity', 0.6); | |
| // Error bar caps | |
| gErrorBars.selectAll('.cap-top') | |
| .data(bars) | |
| .join('line') | |
| .attr('class', 'cap-top') | |
| .attr('x1', d => d.x + d.width / 2 - 3) | |
| .attr('x2', d => d.x + d.width / 2 + 3) | |
| .attr('y1', d => y(d.mean + d.std)) | |
| .attr('y2', d => y(d.mean + d.std)) | |
| .attr('stroke', '#666') | |
| .attr('stroke-width', 1.5) | |
| .attr('opacity', 0.6); | |
| gErrorBars.selectAll('.cap-bottom') | |
| .data(bars) | |
| .join('line') | |
| .attr('class', 'cap-bottom') | |
| .attr('x1', d => d.x + d.width / 2 - 3) | |
| .attr('x2', d => d.x + d.width / 2 + 3) | |
| .attr('y1', d => y(Math.max(0, d.mean - d.std))) | |
| .attr('y2', d => y(Math.max(0, d.mean - d.std))) | |
| .attr('stroke', '#666') | |
| .attr('stroke-width', 1.5) | |
| .attr('opacity', 0.6); | |
| }; | |
| }); | |
| const updateAll = () => { | |
| gridContainer.querySelectorAll('.subplot').forEach(subplot => { | |
| if (subplot._render) subplot._render(); | |
| }); | |
| }; | |
| updateAll(); | |
| if (window.ResizeObserver) { | |
| const ro = new ResizeObserver(() => updateAll()); | |
| ro.observe(container); | |
| } else { | |
| window.addEventListener('resize', updateAll); | |
| } | |
| }) | |
| .catch(err => { | |
| container.innerHTML = `<div style="color: red; padding: 20px;">Error: ${err.message}</div>`; | |
| }); | |
| }; | |
| if (document.readyState === 'loading') { | |
| document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true }); | |
| } else { | |
| ensureD3(bootstrap); | |
| } | |
| })(); | |
| </script> | |