// DDR-Bench Interactive Charts with Smooth Animations // Using Plotly.js with animate for smooth transitions // Common Plotly layout settings for DDR-Bench design system const darkLayout = { paper_bgcolor: 'rgba(0,0,0,0)', plot_bgcolor: 'rgba(0,0,0,0)', font: { family: "-apple-system, BlinkMacSystemFont, 'SF Pro Display', 'Helvetica Neue', sans-serif", color: '#1d1d1f', size: 15 }, xaxis: { gridcolor: '#d2d2d7', linecolor: '#d2d2d7', tickfont: { color: '#424245', size: 14 }, title: { font: { color: '#1d1d1f', size: 15, weight: 600 } }, zerolinecolor: '#d2d2d7' }, yaxis: { gridcolor: '#d2d2d7', linecolor: '#d2d2d7', tickfont: { color: '#424245', size: 14 }, title: { font: { color: '#1d1d1f', size: 15, weight: 600 } }, zerolinecolor: '#d2d2d7' }, legend: { bgcolor: 'rgba(0,0,0,0)', bordercolor: 'rgba(0,0,0,0)', borderwidth: 0, font: { color: '#1d1d1f', size: 14 }, orientation: 'h', y: 0.99, x: 0.5, xanchor: 'center', yanchor: 'top' }, hoverlabel: { bgcolor: '#ffffff', bordercolor: '#d2d2d7', font: { color: '#1d1d1f', size: 14 }, namelength: -1 }, hovermode: 'closest', margin: { t: 20, r: 10, b: 40, l: 50 }, // Reduced margins specifically for compact cards }; const plotlyConfig = { displayModeBar: false, // Hide modebar completely responsive: true, displaylogo: false }; // Animation settings for smooth transitions const animationSettings = { transition: { duration: 750, easing: 'cubic-in-out' }, frame: { duration: 750, redraw: true } }; // Current state let currentScalingDim = 'turn'; let currentProbingMode = 'byProgress'; let currentRankingMode = 'novelty'; // ============================================================================ // PERFORMANCE OPTIMIZATION UTILITIES // ============================================================================ // Track which charts have been initialized const initializedCharts = new Set(); // Lazy loading observer - only render charts when they enter viewport const lazyLoadObserver = new IntersectionObserver((entries) => { entries.forEach(entry => { if (entry.isIntersecting) { const section = entry.target; const sectionId = section.id; if (!initializedCharts.has(sectionId)) { initializedCharts.add(sectionId); // Use requestIdleCallback for non-blocking initialization const initFn = () => { switch (sectionId) { case 'scaling': initScalingCharts(); break; case 'ranking': initRankingCharts(); break; case 'turn': initTurnCharts(); break; case 'entropy': initEntropyCharts(); break; case 'error': initErrorChart(); break; case 'probing': initProbingCharts(); break; } }; if ('requestIdleCallback' in window) { requestIdleCallback(initFn, { timeout: 100 }); } else { setTimeout(initFn, 0); } } } }); }, { rootMargin: '0px 0px', // Start exactly when entering viewport threshold: 0.15 // Trigger when 15% visible }); // Debounce utility for hover effects function debounce(fn, delay) { let timeoutId; return function (...args) { clearTimeout(timeoutId); timeoutId = setTimeout(() => fn.apply(this, args), delay); }; } // Throttle utility for frequent events function throttle(fn, limit) { let inThrottle = false; return function (...args) { if (!inThrottle) { fn.apply(this, args); inThrottle = true; setTimeout(() => inThrottle = false, limit); } }; } // Batch DOM updates using requestAnimationFrame function batchUpdate(updateFn) { return new Promise(resolve => { requestAnimationFrame(() => { updateFn(); resolve(); }); }); } // ============================================================================ // SCALING ANALYSIS - 3 Charts with animated dimension switching // ============================================================================ // Helper to normalize values to [0, 1] function normalizeData(values, type) { if (values.length === 0) return { normalized: [], min: 0, max: 1 }; let min, max; let normalized; if (type === 'log') { // Filter positive values for log const positiveValues = values.filter(v => v > 0); min = Math.min(...positiveValues); max = Math.max(...positiveValues); const logMin = Math.log10(min); const logMax = Math.log10(max); const range = logMax - logMin || 1; normalized = values.map(v => v > 0 ? (Math.log10(v) - logMin) / range : 0); } else { min = 0; // Always start linear scales at 0 for this use case max = Math.max(...values); const range = max - min || 1; normalized = values.map(v => (v - min) / range); } return { normalized, min, max }; } // Helper to generate pretty ticks for normalized scale [0, 1] function generateTicks(min, max, type) { const tickVals = [0, 0.2, 0.4, 0.6, 0.8, 1.0]; let tickText; if (type === 'log') { const logMin = Math.log10(min); const logMax = Math.log10(max); const range = logMax - logMin; tickText = tickVals.map(v => { const val = Math.pow(10, logMin + (v * range)); if (val >= 1) return val.toFixed(1); return val.toFixed(3); // More precision for small costs }); // Format as currency tickText = tickText.map(t => '$' + t); } else { const range = max - min; tickText = tickVals.map(v => { const val = min + (v * range); if (val >= 1000) return (val / 1000).toFixed(0) + 'k'; return val.toFixed(0); }); } return { tickVals, tickText }; } // Exact axis ranges from Python scripts const SCALING_Y_RANGES = { 'mimic': [5, 40], // Python: y_min=5, y_max=40 '10k': [0, 85], // Python: y_min=0, y_max=85 'globem': [0, 50] // Python: y_min=0, y_max=50 }; // Populate shared legend for a section function populateSharedLegend(containerId, models, colorMap) { const container = document.getElementById(containerId); if (!container) return; container.innerHTML = models.map(model => { const color = (colorMap && colorMap[model]) || '#888'; return `
${model}
`; }).join(''); } function initScalingCharts() { // Check if data is loaded if (typeof DDR_DATA === 'undefined' || !DDR_DATA.scaling) { console.warn('DDR_DATA not loaded yet, retrying...'); setTimeout(initScalingCharts, 100); return; } const scenarios = ['mimic', '10k', 'globem']; scenarios.forEach(scenario => { const data = DDR_DATA.scaling[scenario]; if (!data) return; const models = Object.keys(data); const traces = []; // Initial dimension is 'turn' const allTurns = models.flatMap(m => data[m].turns); const { normalized: normTurns, min: minTurn, max: maxTurn } = normalizeData(allTurns, 'linear'); const { tickVals, tickText } = generateTicks(minTurn, maxTurn, 'linear'); // We need to slice the normalized array back to per-model arrays let offset = 0; models.forEach(model => { const len = data[model].turns.length; const modelNormX = normTurns.slice(offset, offset + len); offset += len; // Start with markers only (lines will be animated in) traces.push({ x: modelNormX, y: data[model].accuracy, mode: 'markers', // Start with markers only name: model, line: { color: DDR_DATA.modelColors[model] || '#888', width: 2 }, marker: { size: 6, color: DDR_DATA.modelColors[model] || '#888' }, hovertemplate: `${model}
Turn: %{customdata}
Accuracy: %{y:.2f}%`, customdata: data[model].turns }); }); const yRange = SCALING_Y_RANGES[scenario] || [0, 100]; // Sparse ticks for 10k scenario const dtickVal = scenario === '10k' ? 10 : 5; const layout = { ...darkLayout, xaxis: { ...darkLayout.xaxis, title: { text: 'Number of Interaction Turns', font: { size: 15, color: '#1d1d1f' } }, type: 'linear', range: [-0.05, 1.05], tickmode: 'array', tickvals: tickVals, ticktext: tickText, zeroline: false }, yaxis: { ...darkLayout.yaxis, title: { text: 'Accuracy (%)', font: { size: 15, color: '#1d1d1f' } }, dtick: dtickVal, range: yRange }, showlegend: false }; // Create chart with markers only first Plotly.newPlot(`scaling-${scenario}`, traces, layout, plotlyConfig).then(() => { // After a short delay, animate in the lines setTimeout(() => { animateScalingLinesIn(`scaling-${scenario}`, models, data, normTurns); }, 300); }); }); // Populate shared legend with models from first scenario const firstScenario = scenarios.find(s => DDR_DATA.scaling[s]); if (firstScenario) { const models = Object.keys(DDR_DATA.scaling[firstScenario]); populateSharedLegend('scaling-legend', models, DDR_DATA.modelColors); } // Apply hover effects after charts are rendered setTimeout(() => applyHoverEffectsForSection('scaling'), 500); } // Animate lines drawing in for scaling charts function animateScalingLinesIn(containerId, models, data, normTurns) { const graphDiv = document.getElementById(containerId); if (!graphDiv) return; // Update to show lines+markers let offset = 0; const tracesWithLines = models.map(model => { const len = data[model].turns.length; const modelNormX = normTurns.slice(offset, offset + len); offset += len; return { x: modelNormX, y: data[model].accuracy, mode: 'lines+markers', name: model, line: { color: DDR_DATA.modelColors[model] || '#888', width: 2 }, marker: { size: 6, color: DDR_DATA.modelColors[model] || '#888' }, hovertemplate: `${model}
Turn: %{customdata}
Accuracy: %{y:.2f}%`, customdata: data[model].turns }; }); // First, add lines with opacity 0 Plotly.react(containerId, tracesWithLines, graphDiv.layout, plotlyConfig).then(() => { // Get all line paths const paths = graphDiv.querySelectorAll('.scatterlayer .trace .lines path'); // Set initial state: lines hidden via stroke-dashoffset paths.forEach((path) => { const len = path.getTotalLength(); if (len > 0) { path.style.transition = 'none'; path.style.strokeDasharray = len + ' ' + len; path.style.strokeDashoffset = len; } }); // Force reflow graphDiv.getBoundingClientRect(); // Animate the lines drawing in with staggered delay requestAnimationFrame(() => { paths.forEach((path, index) => { const len = path.getTotalLength(); if (len > 0) { // Stagger the animation for each line const delay = index * 80; // 80ms delay between each line path.style.transition = `stroke-dashoffset 0.8s ease-out ${delay}ms`; path.style.strokeDashoffset = '0'; } }); }); }); } function updateScalingCharts(dimension) { const scenarios = ['mimic', '10k', 'globem']; const xLabels = { 'turn': 'Number of Interaction Turns', 'token': 'Total Costed Tokens', 'cost': 'Inference Cost ($)' }; scenarios.forEach(scenario => { const data = DDR_DATA.scaling[scenario]; if (!data) return; const models = Object.keys(data); // 1. Collect all raw X values for normalization const allRawX = []; models.forEach(model => { switch (dimension) { case 'turn': allRawX.push(...data[model].turns); break; case 'token': allRawX.push(...data[model].tokens); break; case 'cost': allRawX.push(...data[model].costs); break; } }); // 2. Normalize data const type = dimension === 'cost' ? 'log' : 'linear'; const { normalized: allNormX, min: minX, max: maxX } = normalizeData(allRawX, type); const { tickVals, tickText } = generateTicks(minX, maxX, type); // 3. Prepare update data const newTraces = []; let offset = 0; const hoverLabels = { 'turn': 'Turns', 'token': 'Tokens', 'cost': 'Cost' }; models.forEach((model, i) => { const len = data[model].turns.length; const modelNormX = allNormX.slice(offset, offset + len); // Get raw values for customdata (hover) let rawValues; switch (dimension) { case 'turn': rawValues = data[model].turns; break; case 'token': rawValues = data[model].tokens; break; case 'cost': rawValues = data[model].costs; break; } offset += len; newTraces.push({ x: modelNormX, y: data[model].accuracy, customdata: rawValues, name: model, // CRITICAL: Preserve model name mode: 'lines+markers', hovertemplate: `${model}
${hoverLabels[dimension]}: %{customdata}
Accuracy: %{y:.2f}%` }); }); // Two-Phase Animation: Points Only -> Add Lines with Drawing Effect const graphDiv = document.getElementById(`scaling-${scenario}`); // Phase 1: Update to markers-only mode and animate points const markersOnlyTraces = newTraces.map(trace => ({ ...trace, mode: 'markers' // Remove lines completely })); // Update ticks Plotly.relayout(`scaling-${scenario}`, { 'xaxis.title.text': xLabels[dimension], 'xaxis.tickvals': tickVals, 'xaxis.ticktext': tickText }); // Animate points to new positions (no lines) Plotly.animate(`scaling-${scenario}`, { data: markersOnlyTraces, traces: models.map((_, i) => i) }, { transition: { duration: 500, easing: 'cubic-in-out' }, frame: { duration: 500, redraw: true } }).then(() => { // Phase 2: Add lines back with drawing animation // CRITICAL: Pre-hide lines BEFORE react renders them const linesAndMarkersTraces = newTraces.map(trace => ({ ...trace, mode: 'lines+markers', line: { ...trace.line, // Start with invisible line (will be animated in) width: 0 } })); // First, add the lines with width 0 (invisible) Plotly.react(`scaling-${scenario}`, linesAndMarkersTraces, { ...graphDiv.layout }, plotlyConfig).then(() => { // Now set line width back and prepare for stroke animation const visibleTraces = newTraces.map(trace => ({ ...trace, mode: 'lines+markers' })); // Immediately query paths and set them to hidden state BEFORE making visible const paths = graphDiv.querySelectorAll('.scatterlayer .trace .lines path'); // Pre-set all paths to invisible using stroke-dashoffset paths.forEach((path) => { const len = path.getTotalLength(); if (len > 0) { path.style.transition = 'none'; path.style.strokeDasharray = len + ' ' + len; path.style.strokeDashoffset = len; } }); // Now make lines visible (they're hidden by dashoffset) Plotly.restyle(`scaling-${scenario}`, { 'line.width': models.map(() => 2) }).then(() => { // Force reflow graphDiv.getBoundingClientRect(); // Start the stroke animation after a short delay requestAnimationFrame(() => { paths.forEach((path) => { const len = path.getTotalLength(); if (len > 0) { path.style.transition = 'stroke-dashoffset 0.8s ease-out'; path.style.strokeDashoffset = '0'; } }); }); }); }); }); }); } // Dimension toggle event listeners for SCALING only document.addEventListener('DOMContentLoaded', () => { const scalingButtons = document.querySelectorAll('#scaling .dim-btn'); scalingButtons.forEach(btn => { btn.addEventListener('click', () => { // Only update scaling buttons scalingButtons.forEach(b => b.classList.remove('active')); btn.classList.add('active'); const dimension = btn.dataset.dim; currentScalingDim = dimension; updateScalingCharts(dimension); }); }); }); // ============================================================================ // RANKING COMPARISON - With animated mode switching // ============================================================================ const RANKING_DISPLAY_NAMES = { 'run_api_deepseek_deepseek-chat': 'DeepSeek-V3.2', 'qwen3-next-80b-a3b-instruct': 'Qwen3-Next-80BA3B', 'qwen2.5-14B-Instruct-1M': 'Qwen2.5-14B-1M', 'qwen2.5-7B-Instruct-1M': 'Qwen2.5-7B-1M', 'qwen2.5-14B-Instruct': 'Qwen2.5-14B', 'qwen2.5-7B-Instruct': 'Qwen2.5-7B', 'qwen2.5-72B-Instruct': 'Qwen2.5-72B', 'qwen2.5-32b-instruct': 'Qwen2.5-32B', 'qwen3-4B-Instruct-2507': 'Qwen3-4B', 'gemini2.5-flash-lite': 'Gemini2.5-Flash-Lite', 'gemini2.5-flash': 'Gemini2.5-Flash', 'gemini2.5-pro': 'Gemini2.5-Pro', 'claude4.5-sonnet': 'Claude4.5-Sonnet', 'llama3.3-70B': 'Llama3.3-70B', 'minimax-m2': 'MiniMax-M2', 'gpt5mini': 'GPT-5-mini', 'gpt5-mini': 'GPT-5-mini', 'gpt5.1': 'GPT-5.1', 'gpt5.2': 'GPT-5.2', 'kimi-k2': 'Kimi-K2', 'glm4.6': 'GLM-4.6', 'qwen3': 'Qwen3-30B-A3B', 'gemini3-flash': 'Gemini3-Flash', }; const PROPRIETARY_COLOR = '#6A0DAD'; // Vivid purple const OPENSOURCE_COLOR = '#228B22'; // Forest green function getDisplayName(model) { return RANKING_DISPLAY_NAMES[model] || model; } function renderRankingCharts(mode, animate = false) { const scenarios = [ { key: 'MIMIC', id: 'mimic' }, { key: '10K', id: '10k' }, { key: 'GLOBEM', id: 'globem' } ]; scenarios.forEach(({ key, id }) => { const rawData = DDR_DATA.ranking[key]; if (!rawData) return; // 1. Establish Base Order (Always sorted by Novelty/BT Rank initially) // This ensures traces maintain object identity for animation const baseModels = [...rawData].sort((a, b) => a.bt_rank - b.bt_rank); const topN = baseModels.length; // 2. Calculate Target Y-Positions based on current mode // We need to know where each model *should* be let sortedIndices; if (mode === 'novelty') { // In novelty mode, order matches baseModels (0, 1, 2...) sortedIndices = baseModels.map((_, i) => i); } else { // In accuracy mode, we need to find the rank index of each baseModel // Sort a copy to find the target order const accSorted = [...baseModels].map((m, i) => ({ model: m.model, acc_rank: m.acc_rank, originalIdx: i })) .sort((a, b) => a.acc_rank - b.acc_rank); // Map: originalIdx -> targetY const indexMap = new Array(topN); accSorted.forEach((item, targetY) => { indexMap[item.originalIdx] = targetY; }); sortedIndices = indexMap; } // 3. Prepare Data Arrays using Base Order // Invert Y-values so Rank 1 (Best) is at the TOP const yValues = sortedIndices.map(idx => topN - 1 - idx); const xBt = baseModels.map(m => m.bt_rank); const xAcc = baseModels.map(m => m.acc_rank); const names = baseModels.map(m => getDisplayName(m.model)); const colors = baseModels.map(m => m.is_proprietary ? PROPRIETARY_COLOR : OPENSOURCE_COLOR); const traces = []; // Trace 0: Connection Lines (Consolidated) const lineX = []; const lineY = []; baseModels.forEach((_, i) => { lineX.push(xBt[i], xAcc[i], null); lineY.push(yValues[i], yValues[i], null); }); traces.push({ x: lineX, y: lineY, mode: 'lines', line: { color: 'rgba(148, 163, 184, 0.4)', width: 1.5, dash: 'dash' }, showlegend: false, hoverinfo: 'skip' }); // Trace 1: Novelty Rank Points traces.push({ x: xBt, y: yValues, mode: 'markers', name: 'Novelty Rank', marker: { size: mode === 'novelty' ? 12 : 10, symbol: 'circle', color: colors, line: { color: '#fff', width: 1.5 } }, text: baseModels.map(m => `${getDisplayName(m.model)}
Novelty: #${m.bt_rank}
Win Rate: ${m.win_rate}%`), hovertemplate: '%{text}' }); // Trace 2: Accuracy Rank Points traces.push({ x: xAcc, y: yValues, mode: 'markers', name: 'Accuracy Rank', marker: { size: mode === 'accuracy' ? 12 : 10, symbol: 'diamond-open', color: colors, line: { width: 2 } }, text: baseModels.map(m => `${getDisplayName(m.model)}
Accuracy: #${m.acc_rank}
${m.accuracy}%`), hovertemplate: '%{text}' }); // Trace 3: Animated Y-Axis Labels (Model Names) // Place them to the left of the max rank. // X-axis is inverted (Max -> 1), so we place labels at Max + padding // We want labels on the LEFT side. // If range is [topN + 8, 0.5], then topN + 8 is on the LEFT. // So we place labels at topN + 1. const labelX = new Array(topN).fill(topN + 1); traces.push({ x: labelX, y: yValues, mode: 'text', text: names, textposition: 'middle left', textfont: { size: 10, color: '#515154', family: '-apple-system, BlinkMacSystemFont, "SF Pro Text", sans-serif' }, hoverinfo: 'skip', showlegend: false }); // Calculate correlation (same as before) const btRanks = baseModels.map(m => m.bt_rank); const accRanks = baseModels.map(m => m.acc_rank); const n = btRanks.length; const meanBt = btRanks.reduce((a, b) => a + b, 0) / n; const meanAcc = accRanks.reduce((a, b) => a + b, 0) / n; let num = 0, denBt = 0, denAcc = 0; for (let i = 0; i < n; i++) { num += (btRanks[i] - meanBt) * (accRanks[i] - meanAcc); denBt += (btRanks[i] - meanBt) ** 2; denAcc += (accRanks[i] - meanAcc) ** 2; } const rho = num / Math.sqrt(denBt * denAcc); const sortLabel = mode === 'novelty' ? 'Sorted by Novelty' : 'Sorted by Accuracy'; const layout = { ...darkLayout, xaxis: { ...darkLayout.xaxis, title: { text: 'Rank', font: { size: 10, color: '#1d1d1f' } }, range: [topN + 8, 0.5], // Revert padding tickmode: 'array', // Explicitly set ticks tickvals: Array.from({ length: topN }, (_, i) => i + 1), // Only show ticks 1 to N zeroline: false }, yaxis: { ...darkLayout.yaxis, showticklabels: false, // Hide native ticks automargin: false, // We handle margin manually range: [-1, topN + 2], // Add vertical padding zeroline: false }, showlegend: false, annotations: [ { x: 0.02, y: 0.98, xref: 'paper', yref: 'paper', text: `ρ = ${rho.toFixed(2)}`, showarrow: false, font: { size: 11, color: '#515154', family: '-apple-system, BlinkMacSystemFont, "SF Pro Text", sans-serif' }, bgcolor: 'rgba(255, 255, 255, 0.9)', borderpad: 4 }, { x: 0.98, y: 0.98, xref: 'paper', yref: 'paper', text: sortLabel, showarrow: false, font: { size: 10, color: mode === 'novelty' ? PROPRIETARY_COLOR : OPENSOURCE_COLOR, family: '-apple-system, BlinkMacSystemFont, "SF Pro Text", sans-serif' }, bgcolor: 'rgba(255, 255, 255, 0.9)', borderpad: 4 } ], // Adjust margins: Left needs to be smaller since labels are now inside the plot area (but visually left) // Actually, since we extended X-range, we can keep normal margins or reduce left margin: { t: 15, r: 15, b: 40, l: 20 } }; if (animate) { Plotly.animate(`ranking-${id}`, { data: traces, layout: layout }, animationSettings); } else { Plotly.newPlot(`ranking-${id}`, traces, layout, plotlyConfig); } }); } function initRankingCharts() { // Check if data is loaded if (typeof DDR_DATA === 'undefined' || !DDR_DATA.ranking) { setTimeout(initRankingCharts, 100); return; } renderRankingCharts('novelty', false); // Add fade-in animation for ranking charts setTimeout(() => { ['mimic', '10k', 'globem'].forEach((id, index) => { const chart = document.getElementById(`ranking-${id}`); if (chart) { chart.style.opacity = '0'; chart.style.transition = `opacity 0.6s ease-out ${index * 150}ms`; requestAnimationFrame(() => { chart.style.opacity = '1'; }); } }); }, 100); } // Ranking mode toggle event listener document.addEventListener('DOMContentLoaded', () => { const rankingButtons = document.querySelectorAll('#ranking .dim-btn'); rankingButtons.forEach(btn => { btn.addEventListener('click', () => { const mode = btn.dataset.mode; if (mode === currentRankingMode) return; // Only update ranking buttons rankingButtons.forEach(b => b.classList.remove('active')); btn.classList.add('active'); currentRankingMode = mode; renderRankingCharts(mode, true); }); }); }); // ============================================================================ // TURN DISTRIBUTION - 3 Charts (Ridgeline style) // ============================================================================ const TURN_DISPLAY_NAMES = { 'run_api_deepseek_deepseek-chat': 'DeepSeek-V3.2', 'qwen3-next-80b-a3b-instruct': 'Qwen3-Next-80A3B', 'qwen3-next-80b-a3b-instruct-note': 'Qwen3-Next-80A3B-Note', 'qwen3-next-80b-a3b-instruct-noreasoning': 'Qwen3-Next-80A3B-NoR', 'qwen3-next-80b-a3b-instruct-longreasoning': 'Qwen3-Next-80A3B-LR', 'qwen3-next-80b-a3b-instruct-shortreasoning': 'Qwen3-Next-80A3B-SR', 'qwen2.5-14B-Instruct-1M': 'Qwen2.5-14B-1M', 'qwen2.5-7B-Instruct-1M': 'Qwen2.5-7B-1M', 'qwen2.5-14B-Instruct': 'Qwen2.5-14B', 'qwen2.5-7B-Instruct': 'Qwen2.5-7B', 'qwen2.5-72B-Instruct': 'Qwen2.5-72B', 'qwen2.5-32b-instruct': 'Qwen2.5-32B', 'qwen3-4B-Instruct-2507': 'Qwen3-4B', 'gemini2.5-flash-lite': 'Gemini2.5-Flash-Lite', 'gemini2.5-flash': 'Gemini2.5-Flash', 'gemini2.5-pro': 'Gemini2.5-Pro', 'claude4.5-sonnet': 'Claude4.5-Sonnet', 'llama3.3-70B': 'Llama3.3-70B', 'llama-3.3-70B': 'Llama3.3-70B', 'minimax-m2': 'MiniMax-M2', 'gpt5mini': 'GPT-5-mini', 'gpt5-mini': 'GPT-5-mini', 'gpt5.1': 'GPT-5.1', 'gpt5.2': 'GPT-5.2', 'kimi-k2': 'Kimi-K2', 'glm4.6': 'GLM-4.6', 'qwen3': 'Qwen3-30B-A3B', 'gemini3-flash': 'Gemini3-Flash', }; function getTurnDisplayName(model) { return TURN_DISPLAY_NAMES[model] || model; } function initTurnCharts() { // Check if data is loaded if (typeof DDR_DATA === 'undefined' || !DDR_DATA.turn) { setTimeout(initTurnCharts, 100); return; } const scenarios = ['mimic', '10k', 'globem']; // Family colors matching the Python script const familyColors = { 'claude': '#D97706', 'gpt': '#10A37F', 'gemini': '#4285F4', 'deepseek': '#1E3A8A', 'glm': '#7C3AED', 'kimi': '#DC2626', 'minimax': '#EC4899', 'qwen': '#0EA5E9', 'llama': '#F59E0B' }; function getModelColor(modelName) { const lower = modelName.toLowerCase(); for (const [family, color] of Object.entries(familyColors)) { if (lower.includes(family)) return color; } return '#666666'; } scenarios.forEach(scenario => { const data = DDR_DATA.turn[scenario]; if (!data) return; // Sort by median descending to get top 15 const sortedData = [...data].sort((a, b) => b.median - a.median); // Limit to top 15 models, then reverse so highest median is at top of chart const displayData = sortedData.slice(0, 15).reverse(); const traces = []; const binCenters = [5, 15, 25, 35, 45, 55, 65, 75, 85, 95]; displayData.forEach((model, idx) => { const color = getModelColor(model.model); const yOffset = idx; const displayName = getTurnDisplayName(model.model); const maxDist = Math.max(...model.distribution) || 1; // Original bin centers and values const binCenters = [5, 15, 25, 35, 45, 55, 65, 75, 85, 95]; const binValues = model.distribution.map(d => d / maxDist * 0.75); // Interpolate more points for smoother curve (similar to KDE) const xSmooth = []; const ySmooth = []; // Add start point at baseline xSmooth.push(0); ySmooth.push(yOffset); // Interpolate between bin centers for smoothness for (let i = 0; i < binCenters.length; i++) { xSmooth.push(binCenters[i]); ySmooth.push(yOffset + binValues[i]); } // Add end point at baseline xSmooth.push(100); ySmooth.push(yOffset); // Create the curve trace with spline smoothing traces.push({ x: xSmooth, y: ySmooth, mode: 'lines', line: { color: color, width: 2, shape: 'spline', // Smooth spline interpolation smoothing: 1.3 // Smoothing factor }, fill: 'toself', fillcolor: color + '60', name: displayName, hovertemplate: `${displayName}
Median: ${model.median}`, showlegend: false }); }); const layout = { ...darkLayout, xaxis: { ...darkLayout.xaxis, title: { text: 'Number of Turns', font: { size: 14, color: '#1d1d1f' } }, // Larger axis title range: scenario === 'globem' ? [0, 40] : [0, 80], dtick: 20 }, yaxis: { ...darkLayout.yaxis, tickmode: 'array', tickvals: displayData.map((_, i) => i + 0.35), ticktext: displayData.map(m => getTurnDisplayName(m.model)), tickfont: { size: 10, color: '#424245' }, // Small font for model names as requested automargin: true, range: [-0.5, displayData.length], showgrid: false, zeroline: false }, margin: { ...darkLayout.margin, l: 85 }, // Reduced left margin for turn chart (was 140) showlegend: false }; Plotly.newPlot(`turn-${scenario}`, traces, layout, plotlyConfig).then(() => { // Animate fill areas growing from baseline const graphDiv = document.getElementById(`turn-${scenario}`); if (!graphDiv) return; // Get all fill paths and animate them const paths = graphDiv.querySelectorAll('.scatterlayer .trace path'); paths.forEach((path, index) => { const len = path.getTotalLength(); if (len > 0) { path.style.transition = 'none'; path.style.strokeDasharray = len + ' ' + len; path.style.strokeDashoffset = len; path.style.opacity = '0'; // Staggered animation const delay = index * 50; requestAnimationFrame(() => { path.style.transition = `stroke-dashoffset 0.8s ease-out ${delay}ms, opacity 0.4s ease-out ${delay}ms`; path.style.strokeDashoffset = '0'; path.style.opacity = '1'; }); } }); }); }); } // ============================================================================ // PROBING RESULTS - 3 Charts with animated mode switching // ============================================================================ let probingChartsInitialized = false; function initProbingCharts() { // Check if data is loaded if (typeof DDR_DATA === 'undefined' || !DDR_DATA.probing) { setTimeout(initProbingCharts, 100); return; } renderProbingCharts('byProgress'); // Add line drawing animation for initial render if (!probingChartsInitialized) { probingChartsInitialized = true; setTimeout(() => { ['mimic', 'globem', '10k'].forEach((scenario, scenarioIndex) => { const graphDiv = document.getElementById(`probing-${scenario}`); if (!graphDiv) return; const paths = graphDiv.querySelectorAll('.scatterlayer .trace .lines path'); paths.forEach((path, index) => { const len = path.getTotalLength(); if (len > 0) { path.style.transition = 'none'; path.style.strokeDasharray = len + ' ' + len; path.style.strokeDashoffset = len; const delay = scenarioIndex * 100 + index * 60; requestAnimationFrame(() => { path.style.transition = `stroke-dashoffset 0.8s ease-out ${delay}ms`; path.style.strokeDashoffset = '0'; }); } }); }); }, 200); } } function renderProbingCharts(mode) { const scenarios = ['mimic', 'globem', '10k']; const scenarioIds = { 'mimic': 'mimic', 'globem': 'globem', '10k': '10k' }; scenarios.forEach(scenario => { const modeKey = mode === 'byTurn' ? 'byTurn' : 'byProgress'; const data = DDR_DATA.probing[modeKey]?.[scenario]; if (!data) return; const traces = []; const allModels = Object.keys(data); // Filter out 7B and 14B models const models = allModels.filter(m => !m.includes('7B') && !m.includes('14B')); models.forEach(model => { const modelData = data[model]; const xKey = mode === 'byTurn' ? 'turns' : 'progress'; const xLabel = mode === 'byTurn' ? 'Turn' : 'Progress (%)'; // Main line - CONSISTENT STYLE traces.push({ x: modelData[xKey], y: modelData.logprob, mode: 'lines+markers', // Show both lines and data points name: model, line: { color: (DDR_DATA.modelColors && DDR_DATA.modelColors[model]) || '#888', width: 2 }, marker: { size: 6, color: (DDR_DATA.modelColors && DDR_DATA.modelColors[model]) || '#888' }, hovertemplate: `${model}
${xLabel}: %{x}
Log Prob: %{y:.2f}` }); // Error band if (modelData.sem) { const upper = modelData.logprob.map((v, i) => v + modelData.sem[i]); const lower = modelData.logprob.map((v, i) => v - modelData.sem[i]); traces.push({ x: [...modelData[xKey], ...modelData[xKey].slice().reverse()], y: [...upper, ...lower.slice().reverse()], fill: 'toself', fillcolor: ((DDR_DATA.modelColors && DDR_DATA.modelColors[model]) || '#888') + '25', line: { width: 0 }, showlegend: false, hoverinfo: 'skip' }); } }); // Set different x-axis ranges based on mode const xaxisConfig = mode === 'byTurn' ? { title: { text: 'Turn', font: { size: 11, color: '#1d1d1f' } }, range: [0.5, 10.5], // Turns from 1-10 dtick: 1 } : { title: { text: 'Interaction Progress (%)', font: { size: 11, color: '#1d1d1f' } }, range: [0, 100], // Progress from 0-100% dtick: 10 }; const layout = { ...darkLayout, xaxis: { ...darkLayout.xaxis, ...xaxisConfig }, yaxis: { ...darkLayout.yaxis, title: { text: 'Avg Log Probability', font: { size: 11, color: '#1d1d1f' } } }, showlegend: false // Use shared legend instead }; const chartId = `probing-${scenarioIds[scenario]}`; // Check if chart exists const chartDiv = document.getElementById(chartId); if (chartDiv && chartDiv.data) { // Use animate for smooth transition with layout update Plotly.animate(chartId, { data: traces, layout: layout }, animationSettings); } else { // Initial plot Plotly.newPlot(chartId, traces, layout, plotlyConfig); } }); // Populate shared legend with filtered models from first available scenario const firstScenario = scenarios.find(s => DDR_DATA.probing[mode === 'byTurn' ? 'byTurn' : 'byProgress']?.[s]); if (firstScenario) { const allModels = Object.keys(DDR_DATA.probing[mode === 'byTurn' ? 'byTurn' : 'byProgress'][firstScenario]); const filteredModels = allModels.filter(m => !m.includes('7B') && !m.includes('14B')); populateSharedLegend('probing-legend', filteredModels, DDR_DATA.modelColors); } // Apply hover effects after charts are rendered setTimeout(() => applyHoverEffectsForSection('probing'), 100); } // ============================================================================ // ERROR ANALYSIS - Hierarchical Bar Chart // ============================================================================ function initErrorChart() { // Check if data is loaded if (typeof DDR_DATA === 'undefined') { setTimeout(initErrorChart, 100); return; } const data = DDR_DATA.error; if (!data || data.length === 0) return; // Group by main category for bracket annotations const categoryGroups = {}; data.forEach((item, idx) => { if (!categoryGroups[item.main_category]) { categoryGroups[item.main_category] = { start: idx, end: idx, items: [] }; } categoryGroups[item.main_category].end = idx; categoryGroups[item.main_category].items.push(item); }); const traces = [{ x: data.map(d => d.subcategory), y: data.map(d => d.percentage), type: 'bar', marker: { color: data.map(d => d.color), line: { color: '#fff', width: 0.5 } }, text: data.map(d => `${d.percentage}%`), textposition: 'outside', textfont: { size: 14, color: '#1d1d1f' }, // Larger bar text hovertemplate: '%{x}
%{y:.1f}%
Count: %{customdata}', customdata: data.map(d => d.count), showlegend: false }]; const maxPct = Math.max(...data.map(d => d.percentage)); // Create annotations for main category labels const annotations = []; Object.entries(categoryGroups).forEach(([catName, group]) => { const midIdx = (group.start + group.end) / 2; annotations.push({ x: midIdx, y: maxPct * 1.15, text: `${catName}`, showarrow: false, font: { size: 13, color: '#1d1d1f' }, // Larger category labels xanchor: 'center', yanchor: 'bottom' }); }); const layout = { ...darkLayout, xaxis: { ...darkLayout.xaxis, tickangle: 0, tickfont: { size: 14, color: '#515154' } // Larger ticks }, yaxis: { ...darkLayout.yaxis, title: { text: 'Percentage (%)', font: { size: 15, color: '#1d1d1f' } }, // Larger axis title range: [0, maxPct * 1.25] }, annotations: annotations, margin: { t: 50, r: 20, b: 100, l: 50 } }; // Start with zero-height bars for animation const initialTraces = [{ ...traces[0], y: data.map(() => 0), // Start at 0 text: data.map(() => '') // Hide text initially }]; Plotly.newPlot('error-chart', initialTraces, layout, plotlyConfig).then(() => { // Animate bars growing from 0 to target values setTimeout(() => { Plotly.animate('error-chart', { data: traces, traces: [0] }, { transition: { duration: 800, easing: 'cubic-out' }, frame: { duration: 800, redraw: true } }); }, 200); }); } // ============================================================================ // ENTROPY ANALYSIS - Scatter plots by model (Entropy vs Coverage, Opacity = Accuracy) // ============================================================================ const ENTROPY_MODELS = [ 'GPT-5.2', 'Claude-4.5-Sonnet', 'Gemini-3-Flash', 'GLM-4.6', 'Qwen3-Next-80B-A3B', 'DeepSeek-V3.2' ]; let currentEntropyScenario = '10k'; let entropyChartsInitialized = false; function initEntropyCharts() { if (typeof ENTROPY_DATA === 'undefined') { // Retry if data not loaded yet setTimeout(initEntropyCharts, 100); return; } // Setup toggle buttons document.querySelectorAll('[data-entropy-scenario]').forEach(btn => { btn.addEventListener('click', () => { document.querySelectorAll('[data-entropy-scenario]').forEach(b => b.classList.remove('active')); btn.classList.add('active'); currentEntropyScenario = btn.dataset.entropyScenario; renderEntropyCharts(currentEntropyScenario); }); }); // Initial render renderEntropyCharts('10k'); // Add scatter point animation for initial render if (!entropyChartsInitialized) { entropyChartsInitialized = true; setTimeout(() => { for (let i = 0; i < 6; i++) { const chart = document.getElementById(`entropy-model-${i}`); if (chart) { chart.style.opacity = '0'; chart.style.transform = 'scale(0.95)'; chart.style.transition = `opacity 0.5s ease-out ${i * 100}ms, transform 0.5s ease-out ${i * 100}ms`; requestAnimationFrame(() => { chart.style.opacity = '1'; chart.style.transform = 'scale(1)'; }); } } }, 100); } } function renderEntropyCharts(scenario) { const entropyData = ENTROPY_DATA; const datasetInfo = entropyData.datasets[scenario]; if (!datasetInfo) { console.error(`No entropy data for scenario: ${scenario}`); return; } const points = datasetInfo.points; const yMax = datasetInfo.y_max || 1; const accMin = datasetInfo.acc_min || 0; const accMax = datasetInfo.acc_max || 100; const hasAccRange = accMax > accMin; const colors = entropyData.modelColors; // Group points by model const modelGroups = {}; points.forEach(p => { if (!modelGroups[p.model]) { modelGroups[p.model] = []; } modelGroups[p.model].push(p); }); // Render each model's subplot ENTROPY_MODELS.forEach((model, idx) => { const chartId = `entropy-model-${idx}`; const titleId = `entropy-model-${idx}-title`; const color = colors[model] || '#888888'; const pts = modelGroups[model] || []; // Update title with sample count const titleEl = document.getElementById(titleId); if (titleEl) { titleEl.textContent = `${model} (n=${pts.length})`; } if (pts.length === 0) { // Show empty chart with message const layout = { ...darkLayout, xaxis: { ...darkLayout.xaxis, range: [0.6, 1.05], title: { text: 'Entropy', font: { size: 10, color: '#1d1d1f' } } }, yaxis: { ...darkLayout.yaxis, range: [-0.05, yMax], title: { text: 'Coverage', font: { size: 10, color: '#1d1d1f' } } }, annotations: [{ text: 'No data', xref: 'paper', yref: 'paper', x: 0.5, y: 0.5, showarrow: false, font: { size: 14, color: '#888' } }] }; Plotly.newPlot(chartId, [], layout, plotlyConfig); return; } // Calculate alphas based on accuracy const alphas = pts.map(p => { if (hasAccRange) { return 0.15 + (p.accuracy - accMin) / (accMax - accMin) * 0.85; } return 0.7; }); const trace = { x: pts.map(p => p.entropy), y: pts.map(p => p.coverage), mode: 'markers', type: 'scatter', marker: { color: color, size: 7, opacity: alphas, line: { color: '#333', width: 0.5 } }, name: model, text: pts.map(p => `Entropy: ${p.entropy.toFixed(3)}
Coverage: ${(p.coverage * 100).toFixed(1)}%
Accuracy: ${p.accuracy.toFixed(1)}%`), hovertemplate: '' + model + '
%{text}', showlegend: false }; const layout = { ...darkLayout, xaxis: { ...darkLayout.xaxis, title: { text: 'Entropy', font: { size: 16, color: '#1d1d1f' } }, // Larger range: [0.6, 1.05], dtick: 0.1 }, yaxis: { ...darkLayout.yaxis, title: { text: 'Coverage', font: { size: 16, color: '#1d1d1f' } }, // Larger range: [-0.05, yMax] }, margin: { t: 20, r: 20, b: 50, l: 50 } }; const chartDiv = document.getElementById(chartId); if (chartDiv) { // Apply CSS fade-out chartDiv.style.transition = 'opacity 0.3s ease'; chartDiv.style.opacity = '0.3'; setTimeout(() => { // Update chart with react (faster than newPlot) Plotly.react(chartId, [trace], layout, plotlyConfig); // Fade back in chartDiv.style.opacity = '1'; // Re-apply hover effects after chart update addHoverHighlight(chartId); }, 150); } else { Plotly.newPlot(chartId, [trace], layout, plotlyConfig); // Apply hover effects for new chart setTimeout(() => addHoverHighlight(chartId), 50); } }); } // ============================================================================ // INITIALIZE ALL CHARTS - Using Lazy Loading for Performance // ============================================================================ document.addEventListener('DOMContentLoaded', () => { // Register all sections for lazy loading // Charts will only be initialized when they become visible const sections = document.querySelectorAll('section.section'); sections.forEach(section => { lazyLoadObserver.observe(section); }); }); // Handle window resize with longer debounce for better performance let resizeTimeout; const resizeHandler = throttle(() => { // Only resize charts that have been initialized if (initializedCharts.has('scaling')) { ['mimic', '10k', 'globem'].forEach(s => { const el = document.getElementById(`scaling-${s}`); if (el && el.data) Plotly.Plots.resize(el); }); } if (initializedCharts.has('ranking')) { ['mimic', '10k', 'globem'].forEach(s => { const el = document.getElementById(`ranking-${s}`); if (el && el.data) Plotly.Plots.resize(el); }); } if (initializedCharts.has('turn')) { ['mimic', '10k', 'globem'].forEach(s => { const el = document.getElementById(`turn-${s}`); if (el && el.data) Plotly.Plots.resize(el); }); } if (initializedCharts.has('probing')) { ['mimic', '10k', 'globem'].forEach(s => { const el = document.getElementById(`probing-${s}`); if (el && el.data) Plotly.Plots.resize(el); }); } if (initializedCharts.has('entropy')) { for (let i = 0; i < 6; i++) { const el = document.getElementById(`entropy-model-${i}`); if (el && el.data) Plotly.Plots.resize(el); } } if (initializedCharts.has('error')) { const el = document.getElementById('error-chart'); if (el && el.data) Plotly.Plots.resize(el); } }, 250); window.addEventListener('resize', () => { clearTimeout(resizeTimeout); resizeTimeout = setTimeout(resizeHandler, 250); }); // ============================================================================ // HOVER HIGHLIGHT EFFECTS - Optimized with batched updates // ============================================================================ function addHoverHighlight(chartId) { const chart = document.getElementById(chartId); if (!chart || !chart.on) return; let lastHoveredTrace = null; let lastHoveredPoint = null; let isAnimating = false; // Throttled hover handler to prevent excessive updates const handleHover = throttle(function (data) { if (!data || !data.points || !data.points[0]) return; const point = data.points[0]; const traceIndex = point.curveNumber; const pointIndex = point.pointNumber; // Skip if same point or currently animating if ((traceIndex === lastHoveredTrace && pointIndex === lastHoveredPoint) || isAnimating) return; lastHoveredTrace = traceIndex; lastHoveredPoint = pointIndex; isAnimating = true; // Build batch update arrays const opacities = []; const markerSizes = []; const lineWidths = []; const traceIndices = []; const numTraces = chart.data?.length || 0; for (let i = 0; i < numTraces; i++) { const trace = chart.data[i]; if (!trace) continue; // Skip fill traces (error bands) if (trace.fill === 'toself') continue; traceIndices.push(i); if (i === traceIndex) { opacities.push(1); lineWidths.push(4); const numPoints = trace.x?.length || 0; const sizes = Array(numPoints).fill(6); if (pointIndex < numPoints) sizes[pointIndex] = 12; markerSizes.push(sizes); } else { opacities.push(0.4); lineWidths.push(2); const numPoints = trace.x?.length || 0; markerSizes.push(Array(numPoints).fill(6)); } } // Single batched restyle call requestAnimationFrame(() => { if (traceIndices.length > 0) { Plotly.restyle(chartId, { 'opacity': opacities, 'marker.size': markerSizes, 'line.width': lineWidths }, traceIndices).then(() => { isAnimating = false; }).catch(() => { isAnimating = false; }); } else { isAnimating = false; } }); }, 50); // Throttle to max 20 updates per second chart.on('plotly_hover', handleHover); chart.on('plotly_unhover', function () { lastHoveredTrace = null; lastHoveredPoint = null; const numTraces = chart.data?.length || 0; if (numTraces === 0) return; // Build reset arrays const opacities = []; const markerSizes = []; const lineWidths = []; const traceIndices = []; for (let i = 0; i < numTraces; i++) { const trace = chart.data[i]; if (!trace) continue; // Skip fill traces if (trace.fill === 'toself') continue; traceIndices.push(i); opacities.push(1); lineWidths.push(2); const numPoints = trace.x?.length || 0; markerSizes.push(Array(numPoints).fill(6)); } // Single batched reset call if (traceIndices.length > 0) { requestAnimationFrame(() => { Plotly.restyle(chartId, { 'opacity': opacities, 'marker.size': markerSizes, 'line.width': lineWidths }, traceIndices); }); } }); } // Apply hover effects when charts are initialized (called from init functions) function applyHoverEffectsForSection(sectionId) { requestAnimationFrame(() => { switch (sectionId) { case 'scaling': ['mimic', '10k', 'globem'].forEach(s => addHoverHighlight(`scaling-${s}`)); break; case 'probing': ['mimic', '10k', 'globem'].forEach(s => addHoverHighlight(`probing-${s}`)); break; case 'entropy': for (let i = 0; i < 6; i++) addHoverHighlight(`entropy-model-${i}`); break; } }); }