thinkwee commited on
Commit
41d056a
Β·
1 Parent(s): 5026fae

fix display

Browse files
Files changed (4) hide show
  1. charts.js +239 -81
  2. data.js +59 -1
  3. index.html +24 -4
  4. styles.css +16 -0
charts.js CHANGED
@@ -377,9 +377,11 @@ document.querySelectorAll('.dim-btn:not(.probing-dim)').forEach(btn => {
377
  });
378
 
379
  // ============================================================================
380
- // RANKING COMPARISON - 3 Charts
381
  // ============================================================================
382
- function initRankingCharts() {
 
 
383
  const scenarios = [
384
  { key: 'MIMIC', id: 'mimic' },
385
  { key: '10K', id: '10k' },
@@ -390,106 +392,162 @@ function initRankingCharts() {
390
  const data = DDR_DATA.ranking[key];
391
  if (!data) return;
392
 
393
- const models = data.slice(0, 15); // Top 15 models
394
  const traces = [];
395
 
396
- // Connection lines
397
- models.forEach((m, i) => {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
398
  traces.push({
399
- x: [m.bt_rank, m.acc_rank],
400
- y: [i, i],
401
- mode: 'lines',
402
- line: {
403
- color: 'rgba(148, 163, 184, 0.25)',
404
- width: 1,
405
- dash: 'dash'
 
 
406
  },
407
- showlegend: false,
408
- hoverinfo: 'skip'
409
  });
410
- });
411
 
412
- // Novelty rank points
413
- traces.push({
414
- x: models.map(m => m.bt_rank),
415
- y: models.map((_, i) => i),
416
- mode: 'markers',
417
- name: 'Novelty',
418
- marker: {
419
- size: 10,
420
- symbol: 'circle',
421
- color: models.map(m => m.is_proprietary ? '#8B5CF6' : '#22C55E'),
422
- line: { color: '#000', width: 0.5 }
423
- },
424
- text: models.map(m => `${m.model}<br>Novelty: #${m.bt_rank}<br>Win: ${m.win_rate}%`),
425
- hovertemplate: '%{text}<extra></extra>'
426
- });
 
 
 
 
 
427
 
428
- // Accuracy rank points
429
- traces.push({
430
- x: models.map(m => m.acc_rank),
431
- y: models.map((_, i) => i),
432
- mode: 'markers',
433
- name: 'Accuracy',
434
- marker: {
435
- size: 12,
436
- symbol: 'diamond-open',
437
- color: models.map(m => m.is_proprietary ? '#8B5CF6' : '#22C55E'),
438
- line: { width: 2 }
439
- },
440
- text: models.map(m => `${m.model}<br>Accuracy: #${m.acc_rank}<br>${m.accuracy}%`),
441
- hovertemplate: '%{text}<extra></extra>'
442
- });
 
 
 
 
 
 
 
 
443
 
444
  const layout = {
445
  ...darkLayout,
446
  xaxis: {
447
  ...darkLayout.xaxis,
448
  title: { text: 'Rank', font: { size: 11, color: '#e2e8f0' } },
449
- range: [Math.max(...models.map(m => Math.max(m.bt_rank, m.acc_rank))) + 1, 0],
450
  dtick: 2
451
  },
452
  yaxis: {
453
  ...darkLayout.yaxis,
454
  tickmode: 'array',
455
  tickvals: models.map((_, i) => i),
456
- ticktext: models.map(m => m.model.substring(0, 15)),
457
  automargin: true
458
  },
459
  showlegend: true,
460
  legend: {
461
  ...darkLayout.legend,
462
- y: -0.12
463
  },
464
- margin: { ...darkLayout.margin, l: 120 }
465
  };
466
 
467
- Plotly.newPlot(`ranking-${id}`, traces, layout, plotlyConfig);
468
  });
469
  }
470
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
471
  // ============================================================================
472
- // TURN DISTRIBUTION - 3 Charts (Box plots)
473
  // ============================================================================
474
  function initTurnCharts() {
475
  const scenarios = ['mimic', '10k', 'globem'];
476
 
477
  // Family colors
478
  const familyColors = {
479
- 'Claude': '#FF6D00',
480
- 'GPT': '#00C853',
481
- 'Gemini': '#2196F3',
482
- 'DeepSeek': '#E91E63',
483
- 'GLM': '#9C27B0',
484
- 'Kimi': '#FFA500',
485
- 'MiniMax': '#20B2AA',
486
- 'Qwen': '#0EA5E9',
487
- 'Llama': '#F59E0B'
488
  };
489
 
490
  function getModelColor(modelName) {
 
491
  for (const [family, color] of Object.entries(familyColors)) {
492
- if (modelName.includes(family)) return color;
493
  }
494
  return '#888';
495
  }
@@ -498,43 +556,70 @@ function initTurnCharts() {
498
  const data = DDR_DATA.turn[scenario];
499
  if (!data) return;
500
 
501
- const sortedData = [...data].sort((a, b) => a.median - b.median);
 
502
 
503
- const traces = sortedData.map((model, i) => {
 
 
 
 
 
 
 
 
504
  const color = getModelColor(model.model);
 
 
 
 
 
505
 
506
- return {
507
- y: [model.model],
508
- x: [model.median],
509
- type: 'bar',
510
- orientation: 'h',
 
 
 
511
  name: model.model,
512
- marker: {
513
- color: color,
514
- opacity: 0.8
515
- },
516
- text: [`${model.median}`],
517
- textposition: 'outside',
518
- textfont: { size: 9, color: '#94a3b8' },
519
- hovertemplate: `<b>${model.model}</b><br>Median: ${model.median} turns<extra></extra>`,
 
 
 
 
 
 
520
  showlegend: false
521
- };
522
  });
523
 
524
  const layout = {
525
  ...darkLayout,
526
- barmode: 'group',
527
  xaxis: {
528
  ...darkLayout.xaxis,
529
  title: { text: 'Number of Turns', font: { size: 11, color: '#e2e8f0' } },
530
- range: [0, Math.max(...sortedData.map(d => d.median)) * 1.15]
 
531
  },
532
  yaxis: {
533
  ...darkLayout.yaxis,
 
 
 
534
  automargin: true,
535
- tickfont: { size: 9 }
536
  },
537
- margin: { ...darkLayout.margin, l: 130 }
 
538
  };
539
 
540
  Plotly.newPlot(`turn-${scenario}`, traces, layout, plotlyConfig);
@@ -638,6 +723,75 @@ document.querySelectorAll('.probing-dim').forEach(btn => {
638
  });
639
  });
640
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
641
  // ============================================================================
642
  // INITIALIZE ALL CHARTS
643
  // ============================================================================
@@ -645,6 +799,7 @@ document.addEventListener('DOMContentLoaded', () => {
645
  initScalingCharts();
646
  initRankingCharts();
647
  initTurnCharts();
 
648
  initProbingCharts();
649
  });
650
 
@@ -659,5 +814,8 @@ window.addEventListener('resize', () => {
659
  Plotly.Plots.resize(`turn-${s}`);
660
  Plotly.Plots.resize(`probing-${s}`);
661
  });
 
 
 
662
  }, 100);
663
  });
 
377
  });
378
 
379
  // ============================================================================
380
+ // RANKING COMPARISON - 3 Charts with animated mode switching
381
  // ============================================================================
382
+ let currentRankingMode = 'comparison';
383
+
384
+ function renderRankingCharts(mode) {
385
  const scenarios = [
386
  { key: 'MIMIC', id: 'mimic' },
387
  { key: '10K', id: '10k' },
 
392
  const data = DDR_DATA.ranking[key];
393
  if (!data) return;
394
 
395
+ const models = data.slice(0, 12); // Top 12 models for better fit
396
  const traces = [];
397
 
398
+ // Get x-axis values based on mode
399
+ const getXValue = (m) => {
400
+ switch (mode) {
401
+ case 'novelty': return m.bt_rank;
402
+ case 'accuracy': return m.acc_rank;
403
+ default: return m.bt_rank; // For comparison, use bt_rank as base
404
+ }
405
+ };
406
+
407
+ if (mode === 'comparison') {
408
+ // Connection lines
409
+ models.forEach((m, i) => {
410
+ traces.push({
411
+ x: [m.bt_rank, m.acc_rank],
412
+ y: [i, i],
413
+ mode: 'lines',
414
+ line: {
415
+ color: 'rgba(148, 163, 184, 0.3)',
416
+ width: 1.5,
417
+ dash: 'dot'
418
+ },
419
+ showlegend: false,
420
+ hoverinfo: 'skip'
421
+ });
422
+ });
423
+
424
+ // Novelty rank points
425
  traces.push({
426
+ x: models.map(m => m.bt_rank),
427
+ y: models.map((_, i) => i),
428
+ mode: 'markers',
429
+ name: 'Novelty Rank',
430
+ marker: {
431
+ size: 10,
432
+ symbol: 'circle',
433
+ color: '#8B5CF6',
434
+ line: { color: '#fff', width: 1 }
435
  },
436
+ text: models.map(m => `${m.model}<br>Novelty: #${m.bt_rank}<br>Win Rate: ${m.win_rate}%`),
437
+ hovertemplate: '%{text}<extra></extra>'
438
  });
 
439
 
440
+ // Accuracy rank points
441
+ traces.push({
442
+ x: models.map(m => m.acc_rank),
443
+ y: models.map((_, i) => i),
444
+ mode: 'markers',
445
+ name: 'Accuracy Rank',
446
+ marker: {
447
+ size: 10,
448
+ symbol: 'diamond',
449
+ color: '#22C55E',
450
+ line: { color: '#fff', width: 1 }
451
+ },
452
+ text: models.map(m => `${m.model}<br>Accuracy: #${m.acc_rank}<br>${m.accuracy}%`),
453
+ hovertemplate: '%{text}<extra></extra>'
454
+ });
455
+ } else {
456
+ // Single mode - just points
457
+ const xVals = models.map(m => mode === 'novelty' ? m.bt_rank : m.acc_rank);
458
+ const color = mode === 'novelty' ? '#8B5CF6' : '#22C55E';
459
+ const label = mode === 'novelty' ? 'Novelty' : 'Accuracy';
460
 
461
+ traces.push({
462
+ x: xVals,
463
+ y: models.map((_, i) => i),
464
+ mode: 'markers',
465
+ name: label,
466
+ marker: {
467
+ size: 12,
468
+ symbol: 'circle',
469
+ color: color,
470
+ line: { color: '#fff', width: 1 }
471
+ },
472
+ text: models.map(m => {
473
+ if (mode === 'novelty') {
474
+ return `${m.model}<br>Novelty: #${m.bt_rank}<br>Win Rate: ${m.win_rate}%`;
475
+ } else {
476
+ return `${m.model}<br>Accuracy: #${m.acc_rank}<br>${m.accuracy}%`;
477
+ }
478
+ }),
479
+ hovertemplate: '%{text}<extra></extra>'
480
+ });
481
+ }
482
+
483
+ const maxRank = Math.max(...models.map(m => Math.max(m.bt_rank, m.acc_rank)));
484
 
485
  const layout = {
486
  ...darkLayout,
487
  xaxis: {
488
  ...darkLayout.xaxis,
489
  title: { text: 'Rank', font: { size: 11, color: '#e2e8f0' } },
490
+ range: [maxRank + 1, 0],
491
  dtick: 2
492
  },
493
  yaxis: {
494
  ...darkLayout.yaxis,
495
  tickmode: 'array',
496
  tickvals: models.map((_, i) => i),
497
+ ticktext: models.map(m => m.model.length > 18 ? m.model.substring(0, 16) + '...' : m.model),
498
  automargin: true
499
  },
500
  showlegend: true,
501
  legend: {
502
  ...darkLayout.legend,
503
+ y: -0.15
504
  },
505
+ margin: { ...darkLayout.margin, l: 130, b: 70 }
506
  };
507
 
508
+ Plotly.react(`ranking-${id}`, traces, layout, plotlyConfig);
509
  });
510
  }
511
 
512
+ function initRankingCharts() {
513
+ renderRankingCharts('comparison');
514
+ }
515
+
516
+ // Ranking mode toggle event listener
517
+ document.querySelectorAll('.ranking-dim').forEach(btn => {
518
+ btn.addEventListener('click', () => {
519
+ document.querySelectorAll('.ranking-dim').forEach(b => b.classList.remove('active'));
520
+ btn.classList.add('active');
521
+
522
+ const mode = btn.dataset.mode;
523
+ currentRankingMode = mode;
524
+ renderRankingCharts(mode);
525
+ });
526
+ });
527
+
528
  // ============================================================================
529
+ // TURN DISTRIBUTION - 3 Charts (Ridgeline style)
530
  // ============================================================================
531
  function initTurnCharts() {
532
  const scenarios = ['mimic', '10k', 'globem'];
533
 
534
  // Family colors
535
  const familyColors = {
536
+ 'claude': '#FF6D00',
537
+ 'gpt': '#00C853',
538
+ 'gemini': '#2196F3',
539
+ 'deepseek': '#E91E63',
540
+ 'glm': '#9C27B0',
541
+ 'kimi': '#FFA500',
542
+ 'minimax': '#20B2AA',
543
+ 'qwen': '#0EA5E9',
544
+ 'llama': '#F59E0B'
545
  };
546
 
547
  function getModelColor(modelName) {
548
+ const lower = modelName.toLowerCase();
549
  for (const [family, color] of Object.entries(familyColors)) {
550
+ if (lower.includes(family)) return color;
551
  }
552
  return '#888';
553
  }
 
556
  const data = DDR_DATA.turn[scenario];
557
  if (!data) return;
558
 
559
+ // Sort by median descending (highest median at top)
560
+ const sortedData = [...data].sort((a, b) => b.median - a.median);
561
 
562
+ // Limit to top 15 models for readability
563
+ const displayData = sortedData.slice(0, 15);
564
+
565
+ const traces = [];
566
+ const binLabels = ['0-10', '10-20', '20-30', '30-40', '40-50', '50-60', '60-70', '70-80', '80-90', '90-100'];
567
+ const binCenters = [5, 15, 25, 35, 45, 55, 65, 75, 85, 95];
568
+
569
+ // Create ridgeline traces (area charts stacked vertically)
570
+ displayData.forEach((model, idx) => {
571
  const color = getModelColor(model.model);
572
+ const yOffset = idx;
573
+
574
+ // Scale distribution to fit in the row (max height ~0.8)
575
+ const maxDist = Math.max(...model.distribution) || 1;
576
+ const scaledDist = model.distribution.map(d => d / maxDist * 0.7);
577
 
578
+ // Create filled area trace
579
+ traces.push({
580
+ x: binCenters,
581
+ y: scaledDist.map(d => yOffset + d),
582
+ mode: 'lines',
583
+ fill: 'toself',
584
+ fillcolor: color + '40', // 25% opacity
585
+ line: { color: color, width: 1.5 },
586
  name: model.model,
587
+ text: model.distribution.map((d, i) =>
588
+ `${model.model}<br>${binLabels[i]} turns: ${d.toFixed(1)}%<br>Median: ${model.median}`
589
+ ),
590
+ hovertemplate: '%{text}<extra></extra>',
591
+ showlegend: false
592
+ });
593
+
594
+ // Add baseline
595
+ traces.push({
596
+ x: [0, 100],
597
+ y: [yOffset, yOffset],
598
+ mode: 'lines',
599
+ line: { color: 'rgba(148, 163, 184, 0.2)', width: 0.5 },
600
+ hoverinfo: 'skip',
601
  showlegend: false
602
+ });
603
  });
604
 
605
  const layout = {
606
  ...darkLayout,
 
607
  xaxis: {
608
  ...darkLayout.xaxis,
609
  title: { text: 'Number of Turns', font: { size: 11, color: '#e2e8f0' } },
610
+ range: [0, 100],
611
+ dtick: 20
612
  },
613
  yaxis: {
614
  ...darkLayout.yaxis,
615
+ tickmode: 'array',
616
+ tickvals: displayData.map((_, i) => i),
617
+ ticktext: displayData.map(m => m.model.length > 20 ? m.model.substring(0, 18) + '...' : m.model),
618
  automargin: true,
619
+ range: [-0.5, displayData.length]
620
  },
621
+ margin: { ...darkLayout.margin, l: 140 },
622
+ showlegend: false
623
  };
624
 
625
  Plotly.newPlot(`turn-${scenario}`, traces, layout, plotlyConfig);
 
723
  });
724
  });
725
 
726
+ // ============================================================================
727
+ // ERROR ANALYSIS - Hierarchical Bar Chart
728
+ // ============================================================================
729
+ function initErrorChart() {
730
+ const data = DDR_DATA.error;
731
+ if (!data || data.length === 0) return;
732
+
733
+ // Group by main category for bracket annotations
734
+ const categoryGroups = {};
735
+ data.forEach((item, idx) => {
736
+ if (!categoryGroups[item.main_category]) {
737
+ categoryGroups[item.main_category] = { start: idx, end: idx, items: [] };
738
+ }
739
+ categoryGroups[item.main_category].end = idx;
740
+ categoryGroups[item.main_category].items.push(item);
741
+ });
742
+
743
+ const traces = [{
744
+ x: data.map(d => d.subcategory),
745
+ y: data.map(d => d.percentage),
746
+ type: 'bar',
747
+ marker: {
748
+ color: data.map(d => d.color),
749
+ line: { color: '#fff', width: 0.5 }
750
+ },
751
+ text: data.map(d => `${d.percentage}%`),
752
+ textposition: 'outside',
753
+ textfont: { size: 11, color: '#e2e8f0' },
754
+ hovertemplate: '<b>%{x}</b><br>%{y:.1f}%<br>Count: %{customdata}<extra></extra>',
755
+ customdata: data.map(d => d.count),
756
+ showlegend: false
757
+ }];
758
+
759
+ const maxPct = Math.max(...data.map(d => d.percentage));
760
+
761
+ // Create annotations for main category labels
762
+ const annotations = [];
763
+ Object.entries(categoryGroups).forEach(([catName, group]) => {
764
+ const midIdx = (group.start + group.end) / 2;
765
+ annotations.push({
766
+ x: midIdx,
767
+ y: maxPct * 1.15,
768
+ text: `<b>${catName}</b>`,
769
+ showarrow: false,
770
+ font: { size: 10, color: '#e2e8f0' },
771
+ xanchor: 'center',
772
+ yanchor: 'bottom'
773
+ });
774
+ });
775
+
776
+ const layout = {
777
+ ...darkLayout,
778
+ xaxis: {
779
+ ...darkLayout.xaxis,
780
+ tickangle: -30,
781
+ tickfont: { size: 10, color: '#94a3b8' }
782
+ },
783
+ yaxis: {
784
+ ...darkLayout.yaxis,
785
+ title: { text: 'Percentage (%)', font: { size: 11, color: '#e2e8f0' } },
786
+ range: [0, maxPct * 1.25]
787
+ },
788
+ annotations: annotations,
789
+ margin: { t: 50, r: 20, b: 100, l: 50 }
790
+ };
791
+
792
+ Plotly.newPlot('error-chart', traces, layout, plotlyConfig);
793
+ }
794
+
795
  // ============================================================================
796
  // INITIALIZE ALL CHARTS
797
  // ============================================================================
 
799
  initScalingCharts();
800
  initRankingCharts();
801
  initTurnCharts();
802
+ initErrorChart();
803
  initProbingCharts();
804
  });
805
 
 
814
  Plotly.Plots.resize(`turn-${s}`);
815
  Plotly.Plots.resize(`probing-${s}`);
816
  });
817
+ if (document.getElementById('error-chart')) {
818
+ Plotly.Plots.resize('error-chart');
819
+ }
820
  }, 100);
821
  });
data.js CHANGED
@@ -3756,5 +3756,63 @@ const DDR_DATA = {
3756
  "Qwen3-4B": "#57E389",
3757
  "Qwen3-30B-A3B": "#26A269",
3758
  "Qwen3-Next-80B-A3B": "#9141AC"
3759
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3760
  };
 
3756
  "Qwen3-4B": "#57E389",
3757
  "Qwen3-30B-A3B": "#26A269",
3758
  "Qwen3-Next-80B-A3B": "#9141AC"
3759
+ },
3760
+ error: [
3761
+ {
3762
+ "main_category": "Fail in Exploration",
3763
+ "subcategory": "Insufficient Breadth",
3764
+ "count": 64,
3765
+ "percentage": 31.1,
3766
+ "color": "#1565C0"
3767
+ },
3768
+ {
3769
+ "main_category": "Fail in Exploration",
3770
+ "subcategory": "Insufficient Depth",
3771
+ "count": 56,
3772
+ "percentage": 27.2,
3773
+ "color": "#42A5F5"
3774
+ },
3775
+ {
3776
+ "main_category": "Poor Data-to-Insight",
3777
+ "subcategory": "Insight Misinterpretation",
3778
+ "count": 19,
3779
+ "percentage": 9.2,
3780
+ "color": "#2E7D32"
3781
+ },
3782
+ {
3783
+ "main_category": "Poor Data-to-Insight",
3784
+ "subcategory": "Superficial Analysis",
3785
+ "count": 16,
3786
+ "percentage": 7.8,
3787
+ "color": "#43A047"
3788
+ },
3789
+ {
3790
+ "main_category": "Poor Data-to-Insight",
3791
+ "subcategory": "Over Reasoning",
3792
+ "count": 15,
3793
+ "percentage": 7.3,
3794
+ "color": "#81C784"
3795
+ },
3796
+ {
3797
+ "main_category": "Lost in Context",
3798
+ "subcategory": "Lost in Debugging",
3799
+ "count": 18,
3800
+ "percentage": 8.7,
3801
+ "color": "#C62828"
3802
+ },
3803
+ {
3804
+ "main_category": "Lost in Context",
3805
+ "subcategory": "Fail in Summarization",
3806
+ "count": 10,
3807
+ "percentage": 4.9,
3808
+ "color": "#E53935"
3809
+ },
3810
+ {
3811
+ "main_category": "Lost in Context",
3812
+ "subcategory": "Poor Instruction Following",
3813
+ "count": 8,
3814
+ "percentage": 3.9,
3815
+ "color": "#EF9A9A"
3816
+ }
3817
+ ]
3818
  };
index.html CHANGED
@@ -46,6 +46,7 @@
46
  <button class="nav-tab active" data-section="scaling">πŸ“ˆ Scaling Analysis</button>
47
  <button class="nav-tab" data-section="ranking">πŸ† Ranking Comparison</button>
48
  <button class="nav-tab" data-section="turn">πŸ”„ Turn Distribution</button>
 
49
  <button class="nav-tab" data-section="probing">πŸ” Probing Results</button>
50
  </nav>
51
 
@@ -79,29 +80,35 @@
79
  </div>
80
  </section>
81
 
82
- <!-- Ranking Comparison Section - 3 charts -->
83
  <section id="ranking" class="section">
84
  <div class="section-header">
85
  <h2>Novelty vs Accuracy Ranking</h2>
86
  <p>Compare model rankings based on Bradley-Terry pairwise ranking against traditional accuracy ranking.
87
  </p>
88
  </div>
 
 
 
 
 
89
  <div class="charts-grid three-col">
90
  <div class="chart-card">
91
  <h3>MIMIC</h3>
92
- <div id="ranking-mimic" class="chart-container-tall"></div>
93
  </div>
94
  <div class="chart-card">
95
  <h3>10-K</h3>
96
- <div id="ranking-10k" class="chart-container-tall"></div>
97
  </div>
98
  <div class="chart-card">
99
  <h3>GLOBEM</h3>
100
- <div id="ranking-globem" class="chart-container-tall"></div>
101
  </div>
102
  </div>
103
  </section>
104
 
 
105
  <!-- Turn Distribution Section - 3 charts -->
106
  <section id="turn" class="section">
107
  <div class="section-header">
@@ -124,6 +131,19 @@
124
  </div>
125
  </section>
126
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  <!-- Probing Results Section -->
128
  <section id="probing" class="section">
129
  <div class="section-header">
 
46
  <button class="nav-tab active" data-section="scaling">πŸ“ˆ Scaling Analysis</button>
47
  <button class="nav-tab" data-section="ranking">πŸ† Ranking Comparison</button>
48
  <button class="nav-tab" data-section="turn">πŸ”„ Turn Distribution</button>
49
+ <button class="nav-tab" data-section="error">⚠️ Error Analysis</button>
50
  <button class="nav-tab" data-section="probing">πŸ” Probing Results</button>
51
  </nav>
52
 
 
80
  </div>
81
  </section>
82
 
83
+ <!-- Ranking Comparison Section - 3 charts with toggle -->
84
  <section id="ranking" class="section">
85
  <div class="section-header">
86
  <h2>Novelty vs Accuracy Ranking</h2>
87
  <p>Compare model rankings based on Bradley-Terry pairwise ranking against traditional accuracy ranking.
88
  </p>
89
  </div>
90
+ <div class="dimension-toggle">
91
+ <button class="dim-btn ranking-dim active" data-mode="comparison">πŸ”€ Comparison View</button>
92
+ <button class="dim-btn ranking-dim" data-mode="novelty">🎯 Novelty Rank</button>
93
+ <button class="dim-btn ranking-dim" data-mode="accuracy">πŸ“Š Accuracy Rank</button>
94
+ </div>
95
  <div class="charts-grid three-col">
96
  <div class="chart-card">
97
  <h3>MIMIC</h3>
98
+ <div id="ranking-mimic" class="chart-container-md"></div>
99
  </div>
100
  <div class="chart-card">
101
  <h3>10-K</h3>
102
+ <div id="ranking-10k" class="chart-container-md"></div>
103
  </div>
104
  <div class="chart-card">
105
  <h3>GLOBEM</h3>
106
+ <div id="ranking-globem" class="chart-container-md"></div>
107
  </div>
108
  </div>
109
  </section>
110
 
111
+
112
  <!-- Turn Distribution Section - 3 charts -->
113
  <section id="turn" class="section">
114
  <div class="section-header">
 
131
  </div>
132
  </section>
133
 
134
+ <!-- Error Analysis Section -->
135
+ <section id="error" class="section">
136
+ <div class="section-header">
137
+ <h2>Error Type Analysis</h2>
138
+ <p>Breakdown of error types encountered during agent interactions, grouped by main categories.</p>
139
+ </div>
140
+ <div class="charts-grid single">
141
+ <div class="chart-card wide">
142
+ <div id="error-chart" class="chart-container-md"></div>
143
+ </div>
144
+ </div>
145
+ </section>
146
+
147
  <!-- Probing Results Section -->
148
  <section id="probing" class="section">
149
  <div class="section-header">
styles.css CHANGED
@@ -294,11 +294,27 @@ body {
294
  min-height: 300px;
295
  }
296
 
 
 
 
 
 
297
  .chart-container-tall {
298
  height: 550px;
299
  min-height: 500px;
300
  }
301
 
 
 
 
 
 
 
 
 
 
 
 
302
  /* Footer */
303
  .footer {
304
  text-align: center;
 
294
  min-height: 300px;
295
  }
296
 
297
+ .chart-container-md {
298
+ height: 450px;
299
+ min-height: 400px;
300
+ }
301
+
302
  .chart-container-tall {
303
  height: 550px;
304
  min-height: 500px;
305
  }
306
 
307
+ /* Single chart grid */
308
+ .charts-grid.single {
309
+ grid-template-columns: 1fr;
310
+ max-width: 1000px;
311
+ margin: 0 auto;
312
+ }
313
+
314
+ .chart-card.wide {
315
+ padding: 1.5rem;
316
+ }
317
+
318
  /* Footer */
319
  .footer {
320
  text-align: center;