// DDR-Bench Interactive Charts with Smooth Animations
// Using Plotly.js with animate for smooth transitions
// Common Plotly layout settings for DDR-Bench design system
const darkLayout = {
paper_bgcolor: 'rgba(0,0,0,0)',
plot_bgcolor: 'rgba(0,0,0,0)',
font: {
family: "-apple-system, BlinkMacSystemFont, 'SF Pro Display', 'Helvetica Neue', sans-serif",
color: '#1d1d1f',
size: 15
},
xaxis: {
gridcolor: '#d2d2d7',
linecolor: '#d2d2d7',
tickfont: { color: '#424245', size: 14 },
title: { font: { color: '#1d1d1f', size: 15, weight: 600 } },
zerolinecolor: '#d2d2d7'
},
yaxis: {
gridcolor: '#d2d2d7',
linecolor: '#d2d2d7',
tickfont: { color: '#424245', size: 14 },
title: { font: { color: '#1d1d1f', size: 15, weight: 600 } },
zerolinecolor: '#d2d2d7'
},
legend: {
bgcolor: 'rgba(0,0,0,0)',
bordercolor: 'rgba(0,0,0,0)',
borderwidth: 0,
font: { color: '#1d1d1f', size: 14 },
orientation: 'h',
y: 0.99,
x: 0.5,
xanchor: 'center',
yanchor: 'top'
},
hoverlabel: {
bgcolor: '#ffffff',
bordercolor: '#d2d2d7',
font: { color: '#1d1d1f', size: 14 },
namelength: -1
},
hovermode: 'closest',
margin: { t: 20, r: 10, b: 40, l: 50 }, // Reduced margins specifically for compact cards
};
const plotlyConfig = {
displayModeBar: false, // Hide modebar completely
responsive: true,
displaylogo: false
};
// Animation settings for smooth transitions
const animationSettings = {
transition: {
duration: 750,
easing: 'cubic-in-out'
},
frame: {
duration: 750,
redraw: true
}
};
// Current state
let currentScalingDim = 'turn';
let currentProbingMode = 'byProgress';
let currentRankingMode = 'novelty';
// ============================================================================
// PERFORMANCE OPTIMIZATION UTILITIES
// ============================================================================
// Track which charts have been initialized
const initializedCharts = new Set();
// Lazy loading observer - only render charts when they enter viewport
const lazyLoadObserver = new IntersectionObserver((entries) => {
entries.forEach(entry => {
if (entry.isIntersecting) {
const section = entry.target;
const sectionId = section.id;
if (!initializedCharts.has(sectionId)) {
initializedCharts.add(sectionId);
// Use requestIdleCallback for non-blocking initialization
const initFn = () => {
switch (sectionId) {
case 'scaling': initScalingCharts(); break;
case 'ranking': initRankingCharts(); break;
case 'turn': initTurnCharts(); break;
case 'entropy': initEntropyCharts(); break;
case 'error': initErrorChart(); break;
case 'probing': initProbingCharts(); break;
}
};
if ('requestIdleCallback' in window) {
requestIdleCallback(initFn, { timeout: 100 });
} else {
setTimeout(initFn, 0);
}
}
}
});
}, {
rootMargin: '0px 0px', // Start exactly when entering viewport
threshold: 0.15 // Trigger when 15% visible
});
// Debounce utility for hover effects
function debounce(fn, delay) {
let timeoutId;
return function (...args) {
clearTimeout(timeoutId);
timeoutId = setTimeout(() => fn.apply(this, args), delay);
};
}
// Throttle utility for frequent events
function throttle(fn, limit) {
let inThrottle = false;
return function (...args) {
if (!inThrottle) {
fn.apply(this, args);
inThrottle = true;
setTimeout(() => inThrottle = false, limit);
}
};
}
// Batch DOM updates using requestAnimationFrame
function batchUpdate(updateFn) {
return new Promise(resolve => {
requestAnimationFrame(() => {
updateFn();
resolve();
});
});
}
// ============================================================================
// SCALING ANALYSIS - 3 Charts with animated dimension switching
// ============================================================================
// Helper to normalize values to [0, 1]
function normalizeData(values, type) {
if (values.length === 0) return { normalized: [], min: 0, max: 1 };
let min, max;
let normalized;
if (type === 'log') {
// Filter positive values for log
const positiveValues = values.filter(v => v > 0);
min = Math.min(...positiveValues);
max = Math.max(...positiveValues);
const logMin = Math.log10(min);
const logMax = Math.log10(max);
const range = logMax - logMin || 1;
normalized = values.map(v => v > 0 ? (Math.log10(v) - logMin) / range : 0);
} else {
min = 0; // Always start linear scales at 0 for this use case
max = Math.max(...values);
const range = max - min || 1;
normalized = values.map(v => (v - min) / range);
}
return { normalized, min, max };
}
// Helper to generate pretty ticks for normalized scale [0, 1]
function generateTicks(min, max, type) {
const tickVals = [0, 0.2, 0.4, 0.6, 0.8, 1.0];
let tickText;
if (type === 'log') {
const logMin = Math.log10(min);
const logMax = Math.log10(max);
const range = logMax - logMin;
tickText = tickVals.map(v => {
const val = Math.pow(10, logMin + (v * range));
if (val >= 1) return val.toFixed(1);
return val.toFixed(3); // More precision for small costs
});
// Format as currency
tickText = tickText.map(t => '$' + t);
} else {
const range = max - min;
tickText = tickVals.map(v => {
const val = min + (v * range);
if (val >= 1000) return (val / 1000).toFixed(0) + 'k';
return val.toFixed(0);
});
}
return { tickVals, tickText };
}
// Exact axis ranges from Python scripts
const SCALING_Y_RANGES = {
'mimic': [5, 40], // Python: y_min=5, y_max=40
'10k': [0, 85], // Python: y_min=0, y_max=85
'globem': [0, 50] // Python: y_min=0, y_max=50
};
// Populate shared legend for a section
function populateSharedLegend(containerId, models, colorMap) {
const container = document.getElementById(containerId);
if (!container) return;
container.innerHTML = models.map(model => {
const color = (colorMap && colorMap[model]) || '#888';
return `
${model}
`;
}).join('');
}
function initScalingCharts() {
// Check if data is loaded
if (typeof DDR_DATA === 'undefined' || !DDR_DATA.scaling) {
console.warn('DDR_DATA not loaded yet, retrying...');
setTimeout(initScalingCharts, 100);
return;
}
const scenarios = ['mimic', '10k', 'globem'];
scenarios.forEach(scenario => {
const data = DDR_DATA.scaling[scenario];
if (!data) return;
const models = Object.keys(data);
const traces = [];
// Initial dimension is 'turn'
const allTurns = models.flatMap(m => data[m].turns);
const { normalized: normTurns, min: minTurn, max: maxTurn } = normalizeData(allTurns, 'linear');
const { tickVals, tickText } = generateTicks(minTurn, maxTurn, 'linear');
// We need to slice the normalized array back to per-model arrays
let offset = 0;
models.forEach(model => {
const len = data[model].turns.length;
const modelNormX = normTurns.slice(offset, offset + len);
offset += len;
// Start with markers only (lines will be animated in)
traces.push({
x: modelNormX,
y: data[model].accuracy,
mode: 'markers', // Start with markers only
name: model,
line: { color: DDR_DATA.modelColors[model] || '#888', width: 2 },
marker: { size: 6, color: DDR_DATA.modelColors[model] || '#888' },
hovertemplate: `${model}
Turn: %{customdata}
Accuracy: %{y:.2f}%`,
customdata: data[model].turns
});
});
const yRange = SCALING_Y_RANGES[scenario] || [0, 100];
// Sparse ticks for 10k scenario
const dtickVal = scenario === '10k' ? 10 : 5;
const layout = {
...darkLayout,
xaxis: {
...darkLayout.xaxis,
title: { text: 'Number of Interaction Turns', font: { size: 15, color: '#1d1d1f' } },
type: 'linear',
range: [-0.05, 1.05],
tickmode: 'array',
tickvals: tickVals,
ticktext: tickText,
zeroline: false
},
yaxis: {
...darkLayout.yaxis,
title: { text: 'Accuracy (%)', font: { size: 15, color: '#1d1d1f' } },
dtick: dtickVal,
range: yRange
},
showlegend: false
};
// Create chart with markers only first
Plotly.newPlot(`scaling-${scenario}`, traces, layout, plotlyConfig).then(() => {
// After a short delay, animate in the lines
setTimeout(() => {
animateScalingLinesIn(`scaling-${scenario}`, models, data, normTurns);
}, 300);
});
});
// Populate shared legend with models from first scenario
const firstScenario = scenarios.find(s => DDR_DATA.scaling[s]);
if (firstScenario) {
const models = Object.keys(DDR_DATA.scaling[firstScenario]);
populateSharedLegend('scaling-legend', models, DDR_DATA.modelColors);
}
// Apply hover effects after charts are rendered
setTimeout(() => applyHoverEffectsForSection('scaling'), 500);
}
// Animate lines drawing in for scaling charts
function animateScalingLinesIn(containerId, models, data, normTurns) {
const graphDiv = document.getElementById(containerId);
if (!graphDiv) return;
// Update to show lines+markers
let offset = 0;
const tracesWithLines = models.map(model => {
const len = data[model].turns.length;
const modelNormX = normTurns.slice(offset, offset + len);
offset += len;
return {
x: modelNormX,
y: data[model].accuracy,
mode: 'lines+markers',
name: model,
line: { color: DDR_DATA.modelColors[model] || '#888', width: 2 },
marker: { size: 6, color: DDR_DATA.modelColors[model] || '#888' },
hovertemplate: `${model}
Turn: %{customdata}
Accuracy: %{y:.2f}%`,
customdata: data[model].turns
};
});
// First, add lines with opacity 0
Plotly.react(containerId, tracesWithLines, graphDiv.layout, plotlyConfig).then(() => {
// Get all line paths
const paths = graphDiv.querySelectorAll('.scatterlayer .trace .lines path');
// Set initial state: lines hidden via stroke-dashoffset
paths.forEach((path) => {
const len = path.getTotalLength();
if (len > 0) {
path.style.transition = 'none';
path.style.strokeDasharray = len + ' ' + len;
path.style.strokeDashoffset = len;
}
});
// Force reflow
graphDiv.getBoundingClientRect();
// Animate the lines drawing in with staggered delay
requestAnimationFrame(() => {
paths.forEach((path, index) => {
const len = path.getTotalLength();
if (len > 0) {
// Stagger the animation for each line
const delay = index * 80; // 80ms delay between each line
path.style.transition = `stroke-dashoffset 0.8s ease-out ${delay}ms`;
path.style.strokeDashoffset = '0';
}
});
});
});
}
function updateScalingCharts(dimension) {
const scenarios = ['mimic', '10k', 'globem'];
const xLabels = {
'turn': 'Number of Interaction Turns',
'token': 'Total Costed Tokens',
'cost': 'Inference Cost ($)'
};
scenarios.forEach(scenario => {
const data = DDR_DATA.scaling[scenario];
if (!data) return;
const models = Object.keys(data);
// 1. Collect all raw X values for normalization
const allRawX = [];
models.forEach(model => {
switch (dimension) {
case 'turn': allRawX.push(...data[model].turns); break;
case 'token': allRawX.push(...data[model].tokens); break;
case 'cost': allRawX.push(...data[model].costs); break;
}
});
// 2. Normalize data
const type = dimension === 'cost' ? 'log' : 'linear';
const { normalized: allNormX, min: minX, max: maxX } = normalizeData(allRawX, type);
const { tickVals, tickText } = generateTicks(minX, maxX, type);
// 3. Prepare update data
const newTraces = [];
let offset = 0;
const hoverLabels = { 'turn': 'Turns', 'token': 'Tokens', 'cost': 'Cost' };
models.forEach((model, i) => {
const len = data[model].turns.length;
const modelNormX = allNormX.slice(offset, offset + len);
// Get raw values for customdata (hover)
let rawValues;
switch (dimension) {
case 'turn': rawValues = data[model].turns; break;
case 'token': rawValues = data[model].tokens; break;
case 'cost': rawValues = data[model].costs; break;
}
offset += len;
newTraces.push({
x: modelNormX,
y: data[model].accuracy,
customdata: rawValues,
name: model, // CRITICAL: Preserve model name
mode: 'lines+markers',
hovertemplate: `${model}
${hoverLabels[dimension]}: %{customdata}
Accuracy: %{y:.2f}%`
});
});
// Two-Phase Animation: Points Only -> Add Lines with Drawing Effect
const graphDiv = document.getElementById(`scaling-${scenario}`);
// Phase 1: Update to markers-only mode and animate points
const markersOnlyTraces = newTraces.map(trace => ({
...trace,
mode: 'markers' // Remove lines completely
}));
// Update ticks
Plotly.relayout(`scaling-${scenario}`, {
'xaxis.title.text': xLabels[dimension],
'xaxis.tickvals': tickVals,
'xaxis.ticktext': tickText
});
// Animate points to new positions (no lines)
Plotly.animate(`scaling-${scenario}`, {
data: markersOnlyTraces,
traces: models.map((_, i) => i)
}, {
transition: {
duration: 500,
easing: 'cubic-in-out'
},
frame: {
duration: 500,
redraw: true
}
}).then(() => {
// Phase 2: Add lines back with drawing animation
// CRITICAL: Pre-hide lines BEFORE react renders them
const linesAndMarkersTraces = newTraces.map(trace => ({
...trace,
mode: 'lines+markers',
line: {
...trace.line,
// Start with invisible line (will be animated in)
width: 0
}
}));
// First, add the lines with width 0 (invisible)
Plotly.react(`scaling-${scenario}`, linesAndMarkersTraces, {
...graphDiv.layout
}, plotlyConfig).then(() => {
// Now set line width back and prepare for stroke animation
const visibleTraces = newTraces.map(trace => ({
...trace,
mode: 'lines+markers'
}));
// Immediately query paths and set them to hidden state BEFORE making visible
const paths = graphDiv.querySelectorAll('.scatterlayer .trace .lines path');
// Pre-set all paths to invisible using stroke-dashoffset
paths.forEach((path) => {
const len = path.getTotalLength();
if (len > 0) {
path.style.transition = 'none';
path.style.strokeDasharray = len + ' ' + len;
path.style.strokeDashoffset = len;
}
});
// Now make lines visible (they're hidden by dashoffset)
Plotly.restyle(`scaling-${scenario}`, {
'line.width': models.map(() => 2)
}).then(() => {
// Force reflow
graphDiv.getBoundingClientRect();
// Start the stroke animation after a short delay
requestAnimationFrame(() => {
paths.forEach((path) => {
const len = path.getTotalLength();
if (len > 0) {
path.style.transition = 'stroke-dashoffset 0.8s ease-out';
path.style.strokeDashoffset = '0';
}
});
});
});
});
});
});
}
// Dimension toggle event listeners for SCALING only
document.addEventListener('DOMContentLoaded', () => {
const scalingButtons = document.querySelectorAll('#scaling .dim-btn');
scalingButtons.forEach(btn => {
btn.addEventListener('click', () => {
// Only update scaling buttons
scalingButtons.forEach(b => b.classList.remove('active'));
btn.classList.add('active');
const dimension = btn.dataset.dim;
currentScalingDim = dimension;
updateScalingCharts(dimension);
});
});
});
// ============================================================================
// RANKING COMPARISON - With animated mode switching
// ============================================================================
const RANKING_DISPLAY_NAMES = {
'run_api_deepseek_deepseek-chat': 'DeepSeek-V3.2',
'qwen3-next-80b-a3b-instruct': 'Qwen3-Next-80BA3B',
'qwen2.5-14B-Instruct-1M': 'Qwen2.5-14B-1M',
'qwen2.5-7B-Instruct-1M': 'Qwen2.5-7B-1M',
'qwen2.5-14B-Instruct': 'Qwen2.5-14B',
'qwen2.5-7B-Instruct': 'Qwen2.5-7B',
'qwen2.5-72B-Instruct': 'Qwen2.5-72B',
'qwen2.5-32b-instruct': 'Qwen2.5-32B',
'qwen3-4B-Instruct-2507': 'Qwen3-4B',
'gemini2.5-flash-lite': 'Gemini2.5-Flash-Lite',
'gemini2.5-flash': 'Gemini2.5-Flash',
'gemini2.5-pro': 'Gemini2.5-Pro',
'claude4.5-sonnet': 'Claude4.5-Sonnet',
'llama3.3-70B': 'Llama3.3-70B',
'minimax-m2': 'MiniMax-M2',
'gpt5mini': 'GPT-5-mini',
'gpt5-mini': 'GPT-5-mini',
'gpt5.1': 'GPT-5.1',
'gpt5.2': 'GPT-5.2',
'kimi-k2': 'Kimi-K2',
'glm4.6': 'GLM-4.6',
'qwen3': 'Qwen3-30B-A3B',
'gemini3-flash': 'Gemini3-Flash',
};
const PROPRIETARY_COLOR = '#6A0DAD'; // Vivid purple
const OPENSOURCE_COLOR = '#228B22'; // Forest green
function getDisplayName(model) {
return RANKING_DISPLAY_NAMES[model] || model;
}
function renderRankingCharts(mode, animate = false) {
const scenarios = [
{ key: 'MIMIC', id: 'mimic' },
{ key: '10K', id: '10k' },
{ key: 'GLOBEM', id: 'globem' }
];
scenarios.forEach(({ key, id }) => {
const rawData = DDR_DATA.ranking[key];
if (!rawData) return;
// 1. Establish Base Order (Always sorted by Novelty/BT Rank initially)
// This ensures traces maintain object identity for animation
const baseModels = [...rawData].sort((a, b) => a.bt_rank - b.bt_rank);
const topN = baseModels.length;
// 2. Calculate Target Y-Positions based on current mode
// We need to know where each model *should* be
let sortedIndices;
if (mode === 'novelty') {
// In novelty mode, order matches baseModels (0, 1, 2...)
sortedIndices = baseModels.map((_, i) => i);
} else {
// In accuracy mode, we need to find the rank index of each baseModel
// Sort a copy to find the target order
const accSorted = [...baseModels].map((m, i) => ({ model: m.model, acc_rank: m.acc_rank, originalIdx: i }))
.sort((a, b) => a.acc_rank - b.acc_rank);
// Map: originalIdx -> targetY
const indexMap = new Array(topN);
accSorted.forEach((item, targetY) => {
indexMap[item.originalIdx] = targetY;
});
sortedIndices = indexMap;
}
// 3. Prepare Data Arrays using Base Order
// Invert Y-values so Rank 1 (Best) is at the TOP
const yValues = sortedIndices.map(idx => topN - 1 - idx);
const xBt = baseModels.map(m => m.bt_rank);
const xAcc = baseModels.map(m => m.acc_rank);
const names = baseModels.map(m => getDisplayName(m.model));
const colors = baseModels.map(m => m.is_proprietary ? PROPRIETARY_COLOR : OPENSOURCE_COLOR);
const traces = [];
// Trace 0: Connection Lines (Consolidated)
const lineX = [];
const lineY = [];
baseModels.forEach((_, i) => {
lineX.push(xBt[i], xAcc[i], null);
lineY.push(yValues[i], yValues[i], null);
});
traces.push({
x: lineX,
y: lineY,
mode: 'lines',
line: {
color: 'rgba(148, 163, 184, 0.4)',
width: 1.5,
dash: 'dash'
},
showlegend: false,
hoverinfo: 'skip'
});
// Trace 1: Novelty Rank Points
traces.push({
x: xBt,
y: yValues,
mode: 'markers',
name: 'Novelty Rank',
marker: {
size: mode === 'novelty' ? 12 : 10,
symbol: 'circle',
color: colors,
line: { color: '#fff', width: 1.5 }
},
text: baseModels.map(m => `${getDisplayName(m.model)}
Novelty: #${m.bt_rank}
Win Rate: ${m.win_rate}%`),
hovertemplate: '%{text}'
});
// Trace 2: Accuracy Rank Points
traces.push({
x: xAcc,
y: yValues,
mode: 'markers',
name: 'Accuracy Rank',
marker: {
size: mode === 'accuracy' ? 12 : 10,
symbol: 'diamond-open',
color: colors,
line: { width: 2 }
},
text: baseModels.map(m => `${getDisplayName(m.model)}
Accuracy: #${m.acc_rank}
${m.accuracy}%`),
hovertemplate: '%{text}'
});
// Trace 3: Animated Y-Axis Labels (Model Names)
// Place them to the left of the max rank.
// X-axis is inverted (Max -> 1), so we place labels at Max + padding
// We want labels on the LEFT side.
// If range is [topN + 8, 0.5], then topN + 8 is on the LEFT.
// So we place labels at topN + 1.
const labelX = new Array(topN).fill(topN + 1);
traces.push({
x: labelX,
y: yValues,
mode: 'text',
text: names,
textposition: 'middle left',
textfont: { size: 10, color: '#515154', family: '-apple-system, BlinkMacSystemFont, "SF Pro Text", sans-serif' },
hoverinfo: 'skip',
showlegend: false
});
// Calculate correlation (same as before)
const btRanks = baseModels.map(m => m.bt_rank);
const accRanks = baseModels.map(m => m.acc_rank);
const n = btRanks.length;
const meanBt = btRanks.reduce((a, b) => a + b, 0) / n;
const meanAcc = accRanks.reduce((a, b) => a + b, 0) / n;
let num = 0, denBt = 0, denAcc = 0;
for (let i = 0; i < n; i++) {
num += (btRanks[i] - meanBt) * (accRanks[i] - meanAcc);
denBt += (btRanks[i] - meanBt) ** 2;
denAcc += (accRanks[i] - meanAcc) ** 2;
}
const rho = num / Math.sqrt(denBt * denAcc);
const sortLabel = mode === 'novelty' ? 'Sorted by Novelty' : 'Sorted by Accuracy';
const layout = {
...darkLayout,
xaxis: {
...darkLayout.xaxis,
title: { text: 'Rank', font: { size: 10, color: '#1d1d1f' } },
range: [topN + 8, 0.5], // Revert padding
tickmode: 'array', // Explicitly set ticks
tickvals: Array.from({ length: topN }, (_, i) => i + 1), // Only show ticks 1 to N
zeroline: false
},
yaxis: {
...darkLayout.yaxis,
showticklabels: false, // Hide native ticks
automargin: false, // We handle margin manually
range: [-1, topN + 2], // Add vertical padding
zeroline: false
},
showlegend: false,
annotations: [
{
x: 0.02,
y: 0.98,
xref: 'paper',
yref: 'paper',
text: `ρ = ${rho.toFixed(2)}`,
showarrow: false,
font: { size: 11, color: '#515154', family: '-apple-system, BlinkMacSystemFont, "SF Pro Text", sans-serif' },
bgcolor: 'rgba(255, 255, 255, 0.9)',
borderpad: 4
},
{
x: 0.98,
y: 0.98,
xref: 'paper',
yref: 'paper',
text: sortLabel,
showarrow: false,
font: { size: 10, color: mode === 'novelty' ? PROPRIETARY_COLOR : OPENSOURCE_COLOR, family: '-apple-system, BlinkMacSystemFont, "SF Pro Text", sans-serif' },
bgcolor: 'rgba(255, 255, 255, 0.9)',
borderpad: 4
}
],
// Adjust margins: Left needs to be smaller since labels are now inside the plot area (but visually left)
// Actually, since we extended X-range, we can keep normal margins or reduce left
margin: { t: 15, r: 15, b: 40, l: 20 }
};
if (animate) {
Plotly.animate(`ranking-${id}`, {
data: traces,
layout: layout
}, animationSettings);
} else {
Plotly.newPlot(`ranking-${id}`, traces, layout, plotlyConfig);
}
});
}
function initRankingCharts() {
// Check if data is loaded
if (typeof DDR_DATA === 'undefined' || !DDR_DATA.ranking) {
setTimeout(initRankingCharts, 100);
return;
}
renderRankingCharts('novelty', false);
// Add fade-in animation for ranking charts
setTimeout(() => {
['mimic', '10k', 'globem'].forEach((id, index) => {
const chart = document.getElementById(`ranking-${id}`);
if (chart) {
chart.style.opacity = '0';
chart.style.transition = `opacity 0.6s ease-out ${index * 150}ms`;
requestAnimationFrame(() => {
chart.style.opacity = '1';
});
}
});
}, 100);
}
// Ranking mode toggle event listener
document.addEventListener('DOMContentLoaded', () => {
const rankingButtons = document.querySelectorAll('#ranking .dim-btn');
rankingButtons.forEach(btn => {
btn.addEventListener('click', () => {
const mode = btn.dataset.mode;
if (mode === currentRankingMode) return;
// Only update ranking buttons
rankingButtons.forEach(b => b.classList.remove('active'));
btn.classList.add('active');
currentRankingMode = mode;
renderRankingCharts(mode, true);
});
});
});
// ============================================================================
// TURN DISTRIBUTION - 3 Charts (Ridgeline style)
// ============================================================================
const TURN_DISPLAY_NAMES = {
'run_api_deepseek_deepseek-chat': 'DeepSeek-V3.2',
'qwen3-next-80b-a3b-instruct': 'Qwen3-Next-80A3B',
'qwen3-next-80b-a3b-instruct-note': 'Qwen3-Next-80A3B-Note',
'qwen3-next-80b-a3b-instruct-noreasoning': 'Qwen3-Next-80A3B-NoR',
'qwen3-next-80b-a3b-instruct-longreasoning': 'Qwen3-Next-80A3B-LR',
'qwen3-next-80b-a3b-instruct-shortreasoning': 'Qwen3-Next-80A3B-SR',
'qwen2.5-14B-Instruct-1M': 'Qwen2.5-14B-1M',
'qwen2.5-7B-Instruct-1M': 'Qwen2.5-7B-1M',
'qwen2.5-14B-Instruct': 'Qwen2.5-14B',
'qwen2.5-7B-Instruct': 'Qwen2.5-7B',
'qwen2.5-72B-Instruct': 'Qwen2.5-72B',
'qwen2.5-32b-instruct': 'Qwen2.5-32B',
'qwen3-4B-Instruct-2507': 'Qwen3-4B',
'gemini2.5-flash-lite': 'Gemini2.5-Flash-Lite',
'gemini2.5-flash': 'Gemini2.5-Flash',
'gemini2.5-pro': 'Gemini2.5-Pro',
'claude4.5-sonnet': 'Claude4.5-Sonnet',
'llama3.3-70B': 'Llama3.3-70B',
'llama-3.3-70B': 'Llama3.3-70B',
'minimax-m2': 'MiniMax-M2',
'gpt5mini': 'GPT-5-mini',
'gpt5-mini': 'GPT-5-mini',
'gpt5.1': 'GPT-5.1',
'gpt5.2': 'GPT-5.2',
'kimi-k2': 'Kimi-K2',
'glm4.6': 'GLM-4.6',
'qwen3': 'Qwen3-30B-A3B',
'gemini3-flash': 'Gemini3-Flash',
};
function getTurnDisplayName(model) {
return TURN_DISPLAY_NAMES[model] || model;
}
function initTurnCharts() {
// Check if data is loaded
if (typeof DDR_DATA === 'undefined' || !DDR_DATA.turn) {
setTimeout(initTurnCharts, 100);
return;
}
const scenarios = ['mimic', '10k', 'globem'];
// Family colors matching the Python script
const familyColors = {
'claude': '#D97706',
'gpt': '#10A37F',
'gemini': '#4285F4',
'deepseek': '#1E3A8A',
'glm': '#7C3AED',
'kimi': '#DC2626',
'minimax': '#EC4899',
'qwen': '#0EA5E9',
'llama': '#F59E0B'
};
function getModelColor(modelName) {
const lower = modelName.toLowerCase();
for (const [family, color] of Object.entries(familyColors)) {
if (lower.includes(family)) return color;
}
return '#666666';
}
scenarios.forEach(scenario => {
const data = DDR_DATA.turn[scenario];
if (!data) return;
// Sort by median descending to get top 15
const sortedData = [...data].sort((a, b) => b.median - a.median);
// Limit to top 15 models, then reverse so highest median is at top of chart
const displayData = sortedData.slice(0, 15).reverse();
const traces = [];
const binCenters = [5, 15, 25, 35, 45, 55, 65, 75, 85, 95];
displayData.forEach((model, idx) => {
const color = getModelColor(model.model);
const yOffset = idx;
const displayName = getTurnDisplayName(model.model);
const maxDist = Math.max(...model.distribution) || 1;
// Original bin centers and values
const binCenters = [5, 15, 25, 35, 45, 55, 65, 75, 85, 95];
const binValues = model.distribution.map(d => d / maxDist * 0.75);
// Interpolate more points for smoother curve (similar to KDE)
const xSmooth = [];
const ySmooth = [];
// Add start point at baseline
xSmooth.push(0);
ySmooth.push(yOffset);
// Interpolate between bin centers for smoothness
for (let i = 0; i < binCenters.length; i++) {
xSmooth.push(binCenters[i]);
ySmooth.push(yOffset + binValues[i]);
}
// Add end point at baseline
xSmooth.push(100);
ySmooth.push(yOffset);
// Create the curve trace with spline smoothing
traces.push({
x: xSmooth,
y: ySmooth,
mode: 'lines',
line: {
color: color,
width: 2,
shape: 'spline', // Smooth spline interpolation
smoothing: 1.3 // Smoothing factor
},
fill: 'toself',
fillcolor: color + '60',
name: displayName,
hovertemplate: `${displayName}
Median: ${model.median}`,
showlegend: false
});
});
const layout = {
...darkLayout,
xaxis: {
...darkLayout.xaxis,
title: { text: 'Number of Turns', font: { size: 14, color: '#1d1d1f' } }, // Larger axis title
range: scenario === 'globem' ? [0, 40] : [0, 80],
dtick: 20
},
yaxis: {
...darkLayout.yaxis,
tickmode: 'array',
tickvals: displayData.map((_, i) => i + 0.35),
ticktext: displayData.map(m => getTurnDisplayName(m.model)),
tickfont: { size: 10, color: '#424245' }, // Small font for model names as requested
automargin: true,
range: [-0.5, displayData.length],
showgrid: false,
zeroline: false
},
margin: { ...darkLayout.margin, l: 85 }, // Reduced left margin for turn chart (was 140)
showlegend: false
};
Plotly.newPlot(`turn-${scenario}`, traces, layout, plotlyConfig).then(() => {
// Animate fill areas growing from baseline
const graphDiv = document.getElementById(`turn-${scenario}`);
if (!graphDiv) return;
// Get all fill paths and animate them
const paths = graphDiv.querySelectorAll('.scatterlayer .trace path');
paths.forEach((path, index) => {
const len = path.getTotalLength();
if (len > 0) {
path.style.transition = 'none';
path.style.strokeDasharray = len + ' ' + len;
path.style.strokeDashoffset = len;
path.style.opacity = '0';
// Staggered animation
const delay = index * 50;
requestAnimationFrame(() => {
path.style.transition = `stroke-dashoffset 0.8s ease-out ${delay}ms, opacity 0.4s ease-out ${delay}ms`;
path.style.strokeDashoffset = '0';
path.style.opacity = '1';
});
}
});
});
});
}
// ============================================================================
// PROBING RESULTS - 3 Charts with animated mode switching
// ============================================================================
let probingChartsInitialized = false;
function initProbingCharts() {
// Check if data is loaded
if (typeof DDR_DATA === 'undefined' || !DDR_DATA.probing) {
setTimeout(initProbingCharts, 100);
return;
}
renderProbingCharts('byProgress');
// Add line drawing animation for initial render
if (!probingChartsInitialized) {
probingChartsInitialized = true;
setTimeout(() => {
['mimic', 'globem', '10k'].forEach((scenario, scenarioIndex) => {
const graphDiv = document.getElementById(`probing-${scenario}`);
if (!graphDiv) return;
const paths = graphDiv.querySelectorAll('.scatterlayer .trace .lines path');
paths.forEach((path, index) => {
const len = path.getTotalLength();
if (len > 0) {
path.style.transition = 'none';
path.style.strokeDasharray = len + ' ' + len;
path.style.strokeDashoffset = len;
const delay = scenarioIndex * 100 + index * 60;
requestAnimationFrame(() => {
path.style.transition = `stroke-dashoffset 0.8s ease-out ${delay}ms`;
path.style.strokeDashoffset = '0';
});
}
});
});
}, 200);
}
}
function renderProbingCharts(mode) {
const scenarios = ['mimic', 'globem', '10k'];
const scenarioIds = { 'mimic': 'mimic', 'globem': 'globem', '10k': '10k' };
scenarios.forEach(scenario => {
const modeKey = mode === 'byTurn' ? 'byTurn' : 'byProgress';
const data = DDR_DATA.probing[modeKey]?.[scenario];
if (!data) return;
const traces = [];
const allModels = Object.keys(data);
// Filter out 7B and 14B models
const models = allModels.filter(m => !m.includes('7B') && !m.includes('14B'));
models.forEach(model => {
const modelData = data[model];
const xKey = mode === 'byTurn' ? 'turns' : 'progress';
const xLabel = mode === 'byTurn' ? 'Turn' : 'Progress (%)';
// Main line - CONSISTENT STYLE
traces.push({
x: modelData[xKey],
y: modelData.logprob,
mode: 'lines+markers', // Show both lines and data points
name: model,
line: {
color: (DDR_DATA.modelColors && DDR_DATA.modelColors[model]) || '#888',
width: 2
},
marker: { size: 6, color: (DDR_DATA.modelColors && DDR_DATA.modelColors[model]) || '#888' },
hovertemplate: `${model}
${xLabel}: %{x}
Log Prob: %{y:.2f}`
});
// Error band
if (modelData.sem) {
const upper = modelData.logprob.map((v, i) => v + modelData.sem[i]);
const lower = modelData.logprob.map((v, i) => v - modelData.sem[i]);
traces.push({
x: [...modelData[xKey], ...modelData[xKey].slice().reverse()],
y: [...upper, ...lower.slice().reverse()],
fill: 'toself',
fillcolor: ((DDR_DATA.modelColors && DDR_DATA.modelColors[model]) || '#888') + '25',
line: { width: 0 },
showlegend: false,
hoverinfo: 'skip'
});
}
});
// Set different x-axis ranges based on mode
const xaxisConfig = mode === 'byTurn' ? {
title: { text: 'Turn', font: { size: 11, color: '#1d1d1f' } },
range: [0.5, 10.5], // Turns from 1-10
dtick: 1
} : {
title: { text: 'Interaction Progress (%)', font: { size: 11, color: '#1d1d1f' } },
range: [0, 100], // Progress from 0-100%
dtick: 10
};
const layout = {
...darkLayout,
xaxis: {
...darkLayout.xaxis,
...xaxisConfig
},
yaxis: {
...darkLayout.yaxis,
title: { text: 'Avg Log Probability', font: { size: 11, color: '#1d1d1f' } }
},
showlegend: false // Use shared legend instead
};
const chartId = `probing-${scenarioIds[scenario]}`;
// Check if chart exists
const chartDiv = document.getElementById(chartId);
if (chartDiv && chartDiv.data) {
// Use animate for smooth transition with layout update
Plotly.animate(chartId, {
data: traces,
layout: layout
}, animationSettings);
} else {
// Initial plot
Plotly.newPlot(chartId, traces, layout, plotlyConfig);
}
});
// Populate shared legend with filtered models from first available scenario
const firstScenario = scenarios.find(s => DDR_DATA.probing[mode === 'byTurn' ? 'byTurn' : 'byProgress']?.[s]);
if (firstScenario) {
const allModels = Object.keys(DDR_DATA.probing[mode === 'byTurn' ? 'byTurn' : 'byProgress'][firstScenario]);
const filteredModels = allModels.filter(m => !m.includes('7B') && !m.includes('14B'));
populateSharedLegend('probing-legend', filteredModels, DDR_DATA.modelColors);
}
// Apply hover effects after charts are rendered
setTimeout(() => applyHoverEffectsForSection('probing'), 100);
}
// ============================================================================
// ERROR ANALYSIS - Hierarchical Bar Chart
// ============================================================================
function initErrorChart() {
// Check if data is loaded
if (typeof DDR_DATA === 'undefined') {
setTimeout(initErrorChart, 100);
return;
}
const data = DDR_DATA.error;
if (!data || data.length === 0) return;
// Group by main category for bracket annotations
const categoryGroups = {};
data.forEach((item, idx) => {
if (!categoryGroups[item.main_category]) {
categoryGroups[item.main_category] = { start: idx, end: idx, items: [] };
}
categoryGroups[item.main_category].end = idx;
categoryGroups[item.main_category].items.push(item);
});
const traces = [{
x: data.map(d => d.subcategory),
y: data.map(d => d.percentage),
type: 'bar',
marker: {
color: data.map(d => d.color),
line: { color: '#fff', width: 0.5 }
},
text: data.map(d => `${d.percentage}%`),
textposition: 'outside',
textfont: { size: 14, color: '#1d1d1f' }, // Larger bar text
hovertemplate: '%{x}
%{y:.1f}%
Count: %{customdata}',
customdata: data.map(d => d.count),
showlegend: false
}];
const maxPct = Math.max(...data.map(d => d.percentage));
// Create annotations for main category labels
const annotations = [];
Object.entries(categoryGroups).forEach(([catName, group]) => {
const midIdx = (group.start + group.end) / 2;
annotations.push({
x: midIdx,
y: maxPct * 1.15,
text: `${catName}`,
showarrow: false,
font: { size: 13, color: '#1d1d1f' }, // Larger category labels
xanchor: 'center',
yanchor: 'bottom'
});
});
const layout = {
...darkLayout,
xaxis: {
...darkLayout.xaxis,
tickangle: 0,
tickfont: { size: 14, color: '#515154' } // Larger ticks
},
yaxis: {
...darkLayout.yaxis,
title: { text: 'Percentage (%)', font: { size: 15, color: '#1d1d1f' } }, // Larger axis title
range: [0, maxPct * 1.25]
},
annotations: annotations,
margin: { t: 50, r: 20, b: 100, l: 50 }
};
// Start with zero-height bars for animation
const initialTraces = [{
...traces[0],
y: data.map(() => 0), // Start at 0
text: data.map(() => '') // Hide text initially
}];
Plotly.newPlot('error-chart', initialTraces, layout, plotlyConfig).then(() => {
// Animate bars growing from 0 to target values
setTimeout(() => {
Plotly.animate('error-chart', {
data: traces,
traces: [0]
}, {
transition: {
duration: 800,
easing: 'cubic-out'
},
frame: {
duration: 800,
redraw: true
}
});
}, 200);
});
}
// ============================================================================
// ENTROPY ANALYSIS - Scatter plots by model (Entropy vs Coverage, Opacity = Accuracy)
// ============================================================================
const ENTROPY_MODELS = [
'GPT-5.2',
'Claude-4.5-Sonnet',
'Gemini-3-Flash',
'GLM-4.6',
'Qwen3-Next-80B-A3B',
'DeepSeek-V3.2'
];
let currentEntropyScenario = '10k';
let entropyChartsInitialized = false;
function initEntropyCharts() {
if (typeof ENTROPY_DATA === 'undefined') {
// Retry if data not loaded yet
setTimeout(initEntropyCharts, 100);
return;
}
// Setup toggle buttons
document.querySelectorAll('[data-entropy-scenario]').forEach(btn => {
btn.addEventListener('click', () => {
document.querySelectorAll('[data-entropy-scenario]').forEach(b => b.classList.remove('active'));
btn.classList.add('active');
currentEntropyScenario = btn.dataset.entropyScenario;
renderEntropyCharts(currentEntropyScenario);
});
});
// Initial render
renderEntropyCharts('10k');
// Add scatter point animation for initial render
if (!entropyChartsInitialized) {
entropyChartsInitialized = true;
setTimeout(() => {
for (let i = 0; i < 6; i++) {
const chart = document.getElementById(`entropy-model-${i}`);
if (chart) {
chart.style.opacity = '0';
chart.style.transform = 'scale(0.95)';
chart.style.transition = `opacity 0.5s ease-out ${i * 100}ms, transform 0.5s ease-out ${i * 100}ms`;
requestAnimationFrame(() => {
chart.style.opacity = '1';
chart.style.transform = 'scale(1)';
});
}
}
}, 100);
}
}
function renderEntropyCharts(scenario) {
const entropyData = ENTROPY_DATA;
const datasetInfo = entropyData.datasets[scenario];
if (!datasetInfo) {
console.error(`No entropy data for scenario: ${scenario}`);
return;
}
const points = datasetInfo.points;
const yMax = datasetInfo.y_max || 1;
const accMin = datasetInfo.acc_min || 0;
const accMax = datasetInfo.acc_max || 100;
const hasAccRange = accMax > accMin;
const colors = entropyData.modelColors;
// Group points by model
const modelGroups = {};
points.forEach(p => {
if (!modelGroups[p.model]) {
modelGroups[p.model] = [];
}
modelGroups[p.model].push(p);
});
// Render each model's subplot
ENTROPY_MODELS.forEach((model, idx) => {
const chartId = `entropy-model-${idx}`;
const titleId = `entropy-model-${idx}-title`;
const color = colors[model] || '#888888';
const pts = modelGroups[model] || [];
// Update title with sample count
const titleEl = document.getElementById(titleId);
if (titleEl) {
titleEl.textContent = `${model} (n=${pts.length})`;
}
if (pts.length === 0) {
// Show empty chart with message
const layout = {
...darkLayout,
xaxis: { ...darkLayout.xaxis, range: [0.6, 1.05], title: { text: 'Entropy', font: { size: 10, color: '#1d1d1f' } } },
yaxis: { ...darkLayout.yaxis, range: [-0.05, yMax], title: { text: 'Coverage', font: { size: 10, color: '#1d1d1f' } } },
annotations: [{
text: 'No data',
xref: 'paper', yref: 'paper',
x: 0.5, y: 0.5,
showarrow: false,
font: { size: 14, color: '#888' }
}]
};
Plotly.newPlot(chartId, [], layout, plotlyConfig);
return;
}
// Calculate alphas based on accuracy
const alphas = pts.map(p => {
if (hasAccRange) {
return 0.15 + (p.accuracy - accMin) / (accMax - accMin) * 0.85;
}
return 0.7;
});
const trace = {
x: pts.map(p => p.entropy),
y: pts.map(p => p.coverage),
mode: 'markers',
type: 'scatter',
marker: {
color: color,
size: 7,
opacity: alphas,
line: { color: '#333', width: 0.5 }
},
name: model,
text: pts.map(p => `Entropy: ${p.entropy.toFixed(3)}
Coverage: ${(p.coverage * 100).toFixed(1)}%
Accuracy: ${p.accuracy.toFixed(1)}%`),
hovertemplate: '' + model + '
%{text}',
showlegend: false
};
const layout = {
...darkLayout,
xaxis: {
...darkLayout.xaxis,
title: { text: 'Entropy', font: { size: 16, color: '#1d1d1f' } }, // Larger
range: [0.6, 1.05],
dtick: 0.1
},
yaxis: {
...darkLayout.yaxis,
title: { text: 'Coverage', font: { size: 16, color: '#1d1d1f' } }, // Larger
range: [-0.05, yMax]
},
margin: { t: 20, r: 20, b: 50, l: 50 }
};
const chartDiv = document.getElementById(chartId);
if (chartDiv) {
// Apply CSS fade-out
chartDiv.style.transition = 'opacity 0.3s ease';
chartDiv.style.opacity = '0.3';
setTimeout(() => {
// Update chart with react (faster than newPlot)
Plotly.react(chartId, [trace], layout, plotlyConfig);
// Fade back in
chartDiv.style.opacity = '1';
// Re-apply hover effects after chart update
addHoverHighlight(chartId);
}, 150);
} else {
Plotly.newPlot(chartId, [trace], layout, plotlyConfig);
// Apply hover effects for new chart
setTimeout(() => addHoverHighlight(chartId), 50);
}
});
}
// ============================================================================
// INITIALIZE ALL CHARTS - Using Lazy Loading for Performance
// ============================================================================
document.addEventListener('DOMContentLoaded', () => {
// Register all sections for lazy loading
// Charts will only be initialized when they become visible
const sections = document.querySelectorAll('section.section');
sections.forEach(section => {
lazyLoadObserver.observe(section);
});
});
// Handle window resize with longer debounce for better performance
let resizeTimeout;
const resizeHandler = throttle(() => {
// Only resize charts that have been initialized
if (initializedCharts.has('scaling')) {
['mimic', '10k', 'globem'].forEach(s => {
const el = document.getElementById(`scaling-${s}`);
if (el && el.data) Plotly.Plots.resize(el);
});
}
if (initializedCharts.has('ranking')) {
['mimic', '10k', 'globem'].forEach(s => {
const el = document.getElementById(`ranking-${s}`);
if (el && el.data) Plotly.Plots.resize(el);
});
}
if (initializedCharts.has('turn')) {
['mimic', '10k', 'globem'].forEach(s => {
const el = document.getElementById(`turn-${s}`);
if (el && el.data) Plotly.Plots.resize(el);
});
}
if (initializedCharts.has('probing')) {
['mimic', '10k', 'globem'].forEach(s => {
const el = document.getElementById(`probing-${s}`);
if (el && el.data) Plotly.Plots.resize(el);
});
}
if (initializedCharts.has('entropy')) {
for (let i = 0; i < 6; i++) {
const el = document.getElementById(`entropy-model-${i}`);
if (el && el.data) Plotly.Plots.resize(el);
}
}
if (initializedCharts.has('error')) {
const el = document.getElementById('error-chart');
if (el && el.data) Plotly.Plots.resize(el);
}
}, 250);
window.addEventListener('resize', () => {
clearTimeout(resizeTimeout);
resizeTimeout = setTimeout(resizeHandler, 250);
});
// ============================================================================
// HOVER HIGHLIGHT EFFECTS - Optimized with batched updates
// ============================================================================
function addHoverHighlight(chartId) {
const chart = document.getElementById(chartId);
if (!chart || !chart.on) return;
let lastHoveredTrace = null;
let lastHoveredPoint = null;
let isAnimating = false;
// Throttled hover handler to prevent excessive updates
const handleHover = throttle(function (data) {
if (!data || !data.points || !data.points[0]) return;
const point = data.points[0];
const traceIndex = point.curveNumber;
const pointIndex = point.pointNumber;
// Skip if same point or currently animating
if ((traceIndex === lastHoveredTrace && pointIndex === lastHoveredPoint) || isAnimating) return;
lastHoveredTrace = traceIndex;
lastHoveredPoint = pointIndex;
isAnimating = true;
// Build batch update arrays
const opacities = [];
const markerSizes = [];
const lineWidths = [];
const traceIndices = [];
const numTraces = chart.data?.length || 0;
for (let i = 0; i < numTraces; i++) {
const trace = chart.data[i];
if (!trace) continue;
// Skip fill traces (error bands)
if (trace.fill === 'toself') continue;
traceIndices.push(i);
if (i === traceIndex) {
opacities.push(1);
lineWidths.push(4);
const numPoints = trace.x?.length || 0;
const sizes = Array(numPoints).fill(6);
if (pointIndex < numPoints) sizes[pointIndex] = 12;
markerSizes.push(sizes);
} else {
opacities.push(0.4);
lineWidths.push(2);
const numPoints = trace.x?.length || 0;
markerSizes.push(Array(numPoints).fill(6));
}
}
// Single batched restyle call
requestAnimationFrame(() => {
if (traceIndices.length > 0) {
Plotly.restyle(chartId, {
'opacity': opacities,
'marker.size': markerSizes,
'line.width': lineWidths
}, traceIndices).then(() => {
isAnimating = false;
}).catch(() => {
isAnimating = false;
});
} else {
isAnimating = false;
}
});
}, 50); // Throttle to max 20 updates per second
chart.on('plotly_hover', handleHover);
chart.on('plotly_unhover', function () {
lastHoveredTrace = null;
lastHoveredPoint = null;
const numTraces = chart.data?.length || 0;
if (numTraces === 0) return;
// Build reset arrays
const opacities = [];
const markerSizes = [];
const lineWidths = [];
const traceIndices = [];
for (let i = 0; i < numTraces; i++) {
const trace = chart.data[i];
if (!trace) continue;
// Skip fill traces
if (trace.fill === 'toself') continue;
traceIndices.push(i);
opacities.push(1);
lineWidths.push(2);
const numPoints = trace.x?.length || 0;
markerSizes.push(Array(numPoints).fill(6));
}
// Single batched reset call
if (traceIndices.length > 0) {
requestAnimationFrame(() => {
Plotly.restyle(chartId, {
'opacity': opacities,
'marker.size': markerSizes,
'line.width': lineWidths
}, traceIndices);
});
}
});
}
// Apply hover effects when charts are initialized (called from init functions)
function applyHoverEffectsForSection(sectionId) {
requestAnimationFrame(() => {
switch (sectionId) {
case 'scaling':
['mimic', '10k', 'globem'].forEach(s => addHoverHighlight(`scaling-${s}`));
break;
case 'probing':
['mimic', '10k', 'globem'].forEach(s => addHoverHighlight(`probing-${s}`));
break;
case 'entropy':
for (let i = 0; i < 6; i++) addHoverHighlight(`entropy-model-${i}`);
break;
}
});
}