thinkwee commited on
Commit
655137d
Β·
1 Parent(s): 2dcac27

wider layout, ranking toggle animation

Browse files
Files changed (3) hide show
  1. charts.js +89 -52
  2. index.html +7 -2
  3. styles.css +3 -3
charts.js CHANGED
@@ -362,7 +362,7 @@ document.querySelectorAll('.dim-btn:not(.probing-dim)').forEach(btn => {
362
  });
363
 
364
  // ============================================================================
365
- // RANKING COMPARISON - Matches Python create_rank_figure.py exactly
366
  // ============================================================================
367
  const RANKING_DISPLAY_NAMES = {
368
  'run_api_deepseek_deepseek-chat': 'DeepSeek-V3.2',
@@ -390,45 +390,47 @@ const RANKING_DISPLAY_NAMES = {
390
  'gemini3-flash': 'Gemini3-Flash',
391
  };
392
 
 
 
 
393
  function getDisplayName(model) {
394
  return RANKING_DISPLAY_NAMES[model] || model;
395
  }
396
 
397
- function initRankingCharts() {
 
 
398
  const scenarios = [
399
  { key: 'MIMIC', id: 'mimic' },
400
  { key: '10K', id: '10k' },
401
  { key: 'GLOBEM', id: 'globem' }
402
  ];
403
 
404
- // Colors matching Python script
405
- const PROPRIETARY_COLOR = '#6A0DAD'; // Vivid purple
406
- const OPENSOURCE_COLOR = '#228B22'; // Forest green
407
-
408
  scenarios.forEach(({ key, id }) => {
409
  const rawData = DDR_DATA.ranking[key];
410
  if (!rawData) return;
411
 
412
- // Sort by acc_rank (like Python: df.sort_values(['acc_rank', 'bt_rank']))
413
- const sortedModels = [...rawData].sort((a, b) => {
414
- if (a.acc_rank !== b.acc_rank) return a.acc_rank - b.acc_rank;
415
- return a.bt_rank - b.bt_rank;
416
- });
 
 
417
 
418
- const models = sortedModels; // Use all models (up to 22)
419
- const traces = [];
420
  const topN = models.length;
 
421
 
422
- // Connection lines (dashed black)
423
  models.forEach((m, i) => {
424
- const accRankClipped = Math.min(m.acc_rank, topN + 1);
425
  traces.push({
426
- x: [m.bt_rank, accRankClipped],
427
  y: [i, i],
428
  mode: 'lines',
429
  line: {
430
- color: 'rgba(0, 0, 0, 0.3)',
431
- width: 1,
432
  dash: 'dash'
433
  },
434
  showlegend: false,
@@ -436,41 +438,39 @@ function initRankingCharts() {
436
  });
437
  });
438
 
439
- // Accuracy rank points (hollow diamonds) - drawn first (lower z)
440
- const accColors = models.map(m => m.is_proprietary ? PROPRIETARY_COLOR : OPENSOURCE_COLOR);
441
  traces.push({
442
- x: models.map(m => m.acc_rank),
443
  y: models.map((_, i) => i),
444
  mode: 'markers',
445
- name: 'Accuracy Rank',
446
  marker: {
447
- size: 12,
448
- symbol: 'diamond-open',
449
- color: accColors,
450
- line: { width: 2 }
451
  },
452
- text: models.map(m => `<b>${getDisplayName(m.model)}</b><br>Accuracy Rank: #${m.acc_rank}<br>Accuracy: ${m.accuracy}%`),
453
  hovertemplate: '%{text}<extra></extra>'
454
  });
455
 
456
- // Novelty rank points (filled circles) - drawn on top
457
- const noveltyColors = models.map(m => m.is_proprietary ? PROPRIETARY_COLOR : OPENSOURCE_COLOR);
458
  traces.push({
459
- x: models.map(m => m.bt_rank),
460
  y: models.map((_, i) => i),
461
  mode: 'markers',
462
- name: 'Novelty Rank',
463
  marker: {
464
- size: 10,
465
- symbol: 'circle',
466
- color: noveltyColors,
467
- line: { color: '#000', width: 1 }
468
  },
469
- text: models.map(m => `<b>${getDisplayName(m.model)}</b><br>Novelty Rank: #${m.bt_rank}<br>Win Rate: ${m.win_rate}%`),
470
  hovertemplate: '%{text}<extra></extra>'
471
  });
472
 
473
- // Calculate Spearman correlation
474
  const btRanks = models.map(m => m.bt_rank);
475
  const accRanks = models.map(m => m.acc_rank);
476
  const n = btRanks.length;
@@ -484,12 +484,14 @@ function initRankingCharts() {
484
  }
485
  const rho = num / Math.sqrt(denBt * denAcc);
486
 
 
 
487
  const layout = {
488
  ...darkLayout,
489
  xaxis: {
490
  ...darkLayout.xaxis,
491
  title: { text: 'Rank', font: { size: 10, color: '#e2e8f0' } },
492
- range: [topN + 0.5, 0.5], // Inverted: high ranks left, 1 on right
493
  dtick: 2,
494
  tick0: 2
495
  },
@@ -498,29 +500,64 @@ function initRankingCharts() {
498
  tickmode: 'array',
499
  tickvals: models.map((_, i) => i),
500
  ticktext: models.map(m => getDisplayName(m.model)),
501
- tickfont: { size: 8, color: '#94a3b8' },
502
  automargin: true,
503
  range: [-0.5, models.length - 0.5]
504
  },
505
  showlegend: false,
506
- annotations: [{
507
- x: 0.02,
508
- y: 0.98,
509
- xref: 'paper',
510
- yref: 'paper',
511
- text: `ρ = ${rho.toFixed(2)}`,
512
- showarrow: false,
513
- font: { size: 11, color: '#94a3b8', family: 'Inter' },
514
- bgcolor: 'rgba(30, 41, 59, 0.8)',
515
- borderpad: 4
516
- }],
517
- margin: { t: 15, r: 10, b: 40, l: 110 }
 
 
 
 
 
 
 
 
 
 
 
 
 
518
  };
519
 
520
- Plotly.newPlot(`ranking-${id}`, traces, layout, plotlyConfig);
 
 
 
 
521
  });
522
  }
523
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
524
  // ============================================================================
525
  // TURN DISTRIBUTION - 3 Charts (Ridgeline style)
526
  // ============================================================================
 
362
  });
363
 
364
  // ============================================================================
365
+ // RANKING COMPARISON - With animated mode switching
366
  // ============================================================================
367
  const RANKING_DISPLAY_NAMES = {
368
  'run_api_deepseek_deepseek-chat': 'DeepSeek-V3.2',
 
390
  'gemini3-flash': 'Gemini3-Flash',
391
  };
392
 
393
+ const PROPRIETARY_COLOR = '#6A0DAD'; // Vivid purple
394
+ const OPENSOURCE_COLOR = '#228B22'; // Forest green
395
+
396
  function getDisplayName(model) {
397
  return RANKING_DISPLAY_NAMES[model] || model;
398
  }
399
 
400
+ let currentRankingMode = 'novelty';
401
+
402
+ function renderRankingCharts(mode, animate = false) {
403
  const scenarios = [
404
  { key: 'MIMIC', id: 'mimic' },
405
  { key: '10K', id: '10k' },
406
  { key: 'GLOBEM', id: 'globem' }
407
  ];
408
 
 
 
 
 
409
  scenarios.forEach(({ key, id }) => {
410
  const rawData = DDR_DATA.ranking[key];
411
  if (!rawData) return;
412
 
413
+ // Sort by primary ranking based on mode
414
+ let sortedModels;
415
+ if (mode === 'novelty') {
416
+ sortedModels = [...rawData].sort((a, b) => a.bt_rank - b.bt_rank);
417
+ } else {
418
+ sortedModels = [...rawData].sort((a, b) => a.acc_rank - b.acc_rank);
419
+ }
420
 
421
+ const models = sortedModels;
 
422
  const topN = models.length;
423
+ const traces = [];
424
 
425
+ // Connection lines (dashed)
426
  models.forEach((m, i) => {
 
427
  traces.push({
428
+ x: [m.bt_rank, m.acc_rank],
429
  y: [i, i],
430
  mode: 'lines',
431
  line: {
432
+ color: 'rgba(148, 163, 184, 0.4)',
433
+ width: 1.5,
434
  dash: 'dash'
435
  },
436
  showlegend: false,
 
438
  });
439
  });
440
 
441
+ // Novelty rank points (filled circles)
 
442
  traces.push({
443
+ x: models.map(m => m.bt_rank),
444
  y: models.map((_, i) => i),
445
  mode: 'markers',
446
+ name: 'Novelty Rank',
447
  marker: {
448
+ size: mode === 'novelty' ? 12 : 10,
449
+ symbol: 'circle',
450
+ color: models.map(m => m.is_proprietary ? PROPRIETARY_COLOR : OPENSOURCE_COLOR),
451
+ line: { color: '#fff', width: 1.5 }
452
  },
453
+ text: models.map(m => `<b>${getDisplayName(m.model)}</b><br>Novelty: #${m.bt_rank}<br>Win Rate: ${m.win_rate}%`),
454
  hovertemplate: '%{text}<extra></extra>'
455
  });
456
 
457
+ // Accuracy rank points (hollow diamonds)
 
458
  traces.push({
459
+ x: models.map(m => m.acc_rank),
460
  y: models.map((_, i) => i),
461
  mode: 'markers',
462
+ name: 'Accuracy Rank',
463
  marker: {
464
+ size: mode === 'accuracy' ? 12 : 10,
465
+ symbol: 'diamond-open',
466
+ color: models.map(m => m.is_proprietary ? PROPRIETARY_COLOR : OPENSOURCE_COLOR),
467
+ line: { width: 2 }
468
  },
469
+ text: models.map(m => `<b>${getDisplayName(m.model)}</b><br>Accuracy: #${m.acc_rank}<br>${m.accuracy}%`),
470
  hovertemplate: '%{text}<extra></extra>'
471
  });
472
 
473
+ // Calculate correlation
474
  const btRanks = models.map(m => m.bt_rank);
475
  const accRanks = models.map(m => m.acc_rank);
476
  const n = btRanks.length;
 
484
  }
485
  const rho = num / Math.sqrt(denBt * denAcc);
486
 
487
+ const sortLabel = mode === 'novelty' ? 'Sorted by Novelty' : 'Sorted by Accuracy';
488
+
489
  const layout = {
490
  ...darkLayout,
491
  xaxis: {
492
  ...darkLayout.xaxis,
493
  title: { text: 'Rank', font: { size: 10, color: '#e2e8f0' } },
494
+ range: [topN + 0.5, 0.5],
495
  dtick: 2,
496
  tick0: 2
497
  },
 
500
  tickmode: 'array',
501
  tickvals: models.map((_, i) => i),
502
  ticktext: models.map(m => getDisplayName(m.model)),
503
+ tickfont: { size: 9, color: '#94a3b8' },
504
  automargin: true,
505
  range: [-0.5, models.length - 0.5]
506
  },
507
  showlegend: false,
508
+ annotations: [
509
+ {
510
+ x: 0.02,
511
+ y: 0.98,
512
+ xref: 'paper',
513
+ yref: 'paper',
514
+ text: `ρ = ${rho.toFixed(2)}`,
515
+ showarrow: false,
516
+ font: { size: 11, color: '#94a3b8', family: 'Inter' },
517
+ bgcolor: 'rgba(30, 41, 59, 0.8)',
518
+ borderpad: 4
519
+ },
520
+ {
521
+ x: 0.98,
522
+ y: 0.98,
523
+ xref: 'paper',
524
+ yref: 'paper',
525
+ text: sortLabel,
526
+ showarrow: false,
527
+ font: { size: 10, color: mode === 'novelty' ? PROPRIETARY_COLOR : OPENSOURCE_COLOR, family: 'Inter' },
528
+ bgcolor: 'rgba(30, 41, 59, 0.8)',
529
+ borderpad: 4
530
+ }
531
+ ],
532
+ margin: { t: 15, r: 15, b: 40, l: 120 }
533
  };
534
 
535
+ if (animate) {
536
+ Plotly.react(`ranking-${id}`, traces, layout, plotlyConfig);
537
+ } else {
538
+ Plotly.newPlot(`ranking-${id}`, traces, layout, plotlyConfig);
539
+ }
540
  });
541
  }
542
 
543
+ function initRankingCharts() {
544
+ renderRankingCharts('novelty', false);
545
+ }
546
+
547
+ // Ranking mode toggle event listener
548
+ document.querySelectorAll('.ranking-dim').forEach(btn => {
549
+ btn.addEventListener('click', () => {
550
+ const mode = btn.dataset.mode;
551
+ if (mode === currentRankingMode) return;
552
+
553
+ document.querySelectorAll('.ranking-dim').forEach(b => b.classList.remove('active'));
554
+ btn.classList.add('active');
555
+
556
+ currentRankingMode = mode;
557
+ renderRankingCharts(mode, true);
558
+ });
559
+ });
560
+
561
  // ============================================================================
562
  // TURN DISTRIBUTION - 3 Charts (Ridgeline style)
563
  // ============================================================================
index.html CHANGED
@@ -75,10 +75,15 @@
75
  <section id="ranking" class="section visible">
76
  <div class="section-header">
77
  <h2>πŸ† Ranking Comparison</h2>
78
- <p>Novelty (Bradley-Terry pairwise) vs Accuracy ranking. ● = Novelty Rank, β—‡ = Accuracy Rank. Purple =
79
- Proprietary, Green = Open-source.</p>
 
 
 
 
80
  </div>
81
  <div class="charts-grid three-col">
 
82
  <div class="chart-card">
83
  <h3>MIMIC</h3>
84
  <div id="ranking-mimic" class="chart-container-tall"></div>
 
75
  <section id="ranking" class="section visible">
76
  <div class="section-header">
77
  <h2>πŸ† Ranking Comparison</h2>
78
+ <p>Novelty (Bradley-Terry) vs Accuracy ranking. ● = Novelty, β—‡ = Accuracy. Purple = Proprietary, Green =
79
+ Open-source.</p>
80
+ </div>
81
+ <div class="dimension-toggle">
82
+ <button class="dim-btn ranking-dim active" data-mode="novelty">🎯 Sort by Novelty</button>
83
+ <button class="dim-btn ranking-dim" data-mode="accuracy">πŸ“Š Sort by Accuracy</button>
84
  </div>
85
  <div class="charts-grid three-col">
86
+
87
  <div class="chart-card">
88
  <h3>MIMIC</h3>
89
  <div id="ranking-mimic" class="chart-container-tall"></div>
styles.css CHANGED
@@ -131,9 +131,9 @@ body {
131
 
132
  /* Main Content */
133
  .content {
134
- max-width: 1400px;
135
  margin: 0 auto;
136
- padding: 1rem 1.5rem;
137
  }
138
 
139
  /* Sections - all visible */
@@ -202,7 +202,7 @@ body {
202
  /* Charts Grid */
203
  .charts-grid {
204
  display: grid;
205
- gap: 0.75rem;
206
  }
207
 
208
  .charts-grid.three-col {
 
131
 
132
  /* Main Content */
133
  .content {
134
+ max-width: 1800px;
135
  margin: 0 auto;
136
+ padding: 1rem 2rem;
137
  }
138
 
139
  /* Sections - all visible */
 
202
  /* Charts Grid */
203
  .charts-grid {
204
  display: grid;
205
+ gap: 1.25rem;
206
  }
207
 
208
  .charts-grid.three-col {