thinkwee commited on
Commit
9200a73
Β·
1 Parent(s): 9c2d624

fix display

Browse files
Files changed (2) hide show
  1. charts.js +100 -85
  2. index.html +6 -8
charts.js CHANGED
@@ -362,150 +362,165 @@ document.querySelectorAll('.dim-btn:not(.probing-dim)').forEach(btn => {
362
  });
363
 
364
  // ============================================================================
365
- // RANKING COMPARISON - 3 Charts with mode switching (novelty vs accuracy)
366
  // ============================================================================
367
- let currentRankingMode = 'novelty';
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
368
 
369
- function renderRankingCharts(mode) {
370
  const scenarios = [
371
  { key: 'MIMIC', id: 'mimic' },
372
  { key: '10K', id: '10k' },
373
  { key: 'GLOBEM', id: 'globem' }
374
  ];
375
 
 
 
 
 
376
  scenarios.forEach(({ key, id }) => {
377
  const rawData = DDR_DATA.ranking[key];
378
  if (!rawData) return;
379
 
380
- // Sort models by the primary ranking
381
- let sortedModels;
382
- if (mode === 'novelty') {
383
- sortedModels = [...rawData].sort((a, b) => a.bt_rank - b.bt_rank);
384
- } else {
385
- sortedModels = [...rawData].sort((a, b) => a.acc_rank - b.acc_rank);
386
- }
387
 
388
- // Take top 12 for display
389
- const models = sortedModels.slice(0, 12);
390
  const traces = [];
 
391
 
392
- // Define colors
393
- const primaryColor = mode === 'novelty' ? '#8B5CF6' : '#22C55E';
394
- const secondaryColor = mode === 'novelty' ? '#22C55E' : '#8B5CF6';
395
- const primaryLabel = mode === 'novelty' ? 'Novelty Rank' : 'Accuracy Rank';
396
- const secondaryLabel = mode === 'novelty' ? 'Accuracy Rank' : 'Novelty Rank';
397
-
398
- // Connection lines (dashed) from primary to secondary
399
  models.forEach((m, i) => {
400
- const primaryX = mode === 'novelty' ? m.bt_rank : m.acc_rank;
401
- const secondaryX = mode === 'novelty' ? m.acc_rank : m.bt_rank;
402
-
403
  traces.push({
404
- x: [primaryX, secondaryX],
405
  y: [i, i],
406
  mode: 'lines',
407
  line: {
408
- color: 'rgba(148, 163, 184, 0.4)',
409
- width: 1.5,
410
- dash: 'dot'
411
  },
412
  showlegend: false,
413
  hoverinfo: 'skip'
414
  });
415
  });
416
 
417
- // Primary rank points (filled circles)
 
418
  traces.push({
419
- x: models.map(m => mode === 'novelty' ? m.bt_rank : m.acc_rank),
420
  y: models.map((_, i) => i),
421
  mode: 'markers',
422
- name: primaryLabel,
423
  marker: {
424
- size: 11,
425
- symbol: 'circle',
426
- color: primaryColor,
427
- line: { color: '#fff', width: 1.5 }
428
  },
429
- text: models.map(m => {
430
- if (mode === 'novelty') {
431
- return `<b>${m.model}</b><br>Novelty: #${m.bt_rank}<br>Win Rate: ${m.win_rate}%`;
432
- } else {
433
- return `<b>${m.model}</b><br>Accuracy: #${m.acc_rank}<br>${m.accuracy}%`;
434
- }
435
- }),
436
  hovertemplate: '%{text}<extra></extra>'
437
  });
438
 
439
- // Secondary rank points (diamond outline)
 
440
  traces.push({
441
- x: models.map(m => mode === 'novelty' ? m.acc_rank : m.bt_rank),
442
  y: models.map((_, i) => i),
443
  mode: 'markers',
444
- name: secondaryLabel,
445
  marker: {
446
- size: 9,
447
- symbol: 'diamond-open',
448
- color: secondaryColor,
449
- line: { width: 2 }
450
  },
451
- text: models.map(m => {
452
- if (mode === 'novelty') {
453
- return `<b>${m.model}</b><br>Accuracy: #${m.acc_rank}<br>${m.accuracy}%`;
454
- } else {
455
- return `<b>${m.model}</b><br>Novelty: #${m.bt_rank}<br>Win Rate: ${m.win_rate}%`;
456
- }
457
- }),
458
  hovertemplate: '%{text}<extra></extra>'
459
  });
460
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
461
  const layout = {
462
  ...darkLayout,
463
  xaxis: {
464
  ...darkLayout.xaxis,
465
- title: { text: 'Rank', font: { size: 11, color: '#e2e8f0' } },
466
- range: [23, 0], // Fixed range for all charts
467
- dtick: 5,
468
- tick0: 0
469
  },
470
  yaxis: {
471
  ...darkLayout.yaxis,
472
  tickmode: 'array',
473
  tickvals: models.map((_, i) => i),
474
- ticktext: models.map(m => m.model.length > 16 ? m.model.substring(0, 14) + '...' : m.model),
 
475
  automargin: true,
476
  range: [-0.5, models.length - 0.5]
477
  },
478
- showlegend: true,
479
- legend: {
480
- ...darkLayout.legend,
481
- y: -0.18,
482
- orientation: 'h',
483
- x: 0.5,
484
- xanchor: 'center'
485
- },
486
- margin: { t: 20, r: 15, b: 65, l: 120 }
 
 
 
 
487
  };
488
 
489
- Plotly.react(`ranking-${id}`, traces, layout, plotlyConfig);
490
  });
491
  }
492
 
493
- function initRankingCharts() {
494
- renderRankingCharts('novelty');
495
- }
496
-
497
- // Ranking mode toggle event listener
498
- document.querySelectorAll('.ranking-dim').forEach(btn => {
499
- btn.addEventListener('click', () => {
500
- document.querySelectorAll('.ranking-dim').forEach(b => b.classList.remove('active'));
501
- btn.classList.add('active');
502
-
503
- const mode = btn.dataset.mode;
504
- currentRankingMode = mode;
505
- renderRankingCharts(mode);
506
- });
507
- });
508
-
509
  // ============================================================================
510
  // TURN DISTRIBUTION - 3 Charts (Ridgeline style)
511
  // ============================================================================
 
362
  });
363
 
364
  // ============================================================================
365
+ // RANKING COMPARISON - Matches Python create_rank_figure.py exactly
366
  // ============================================================================
367
+ const RANKING_DISPLAY_NAMES = {
368
+ 'run_api_deepseek_deepseek-chat': 'DeepSeek-V3.2',
369
+ 'qwen3-next-80b-a3b-instruct': 'Qwen3-Next-80BA3B',
370
+ 'qwen2.5-14B-Instruct-1M': 'Qwen2.5-14B-1M',
371
+ 'qwen2.5-7B-Instruct-1M': 'Qwen2.5-7B-1M',
372
+ 'qwen2.5-14B-Instruct': 'Qwen2.5-14B',
373
+ 'qwen2.5-7B-Instruct': 'Qwen2.5-7B',
374
+ 'qwen2.5-72B-Instruct': 'Qwen2.5-72B',
375
+ 'qwen2.5-32b-instruct': 'Qwen2.5-32B',
376
+ 'qwen3-4B-Instruct-2507': 'Qwen3-4B',
377
+ 'gemini2.5-flash-lite': 'Gemini2.5-Flash-Lite',
378
+ 'gemini2.5-flash': 'Gemini2.5-Flash',
379
+ 'gemini2.5-pro': 'Gemini2.5-Pro',
380
+ 'claude4.5-sonnet': 'Claude4.5-Sonnet',
381
+ 'llama3.3-70B': 'Llama3.3-70B',
382
+ 'minimax-m2': 'MiniMax-M2',
383
+ 'gpt5mini': 'GPT-5-mini',
384
+ 'gpt5-mini': 'GPT-5-mini',
385
+ 'gpt5.1': 'GPT-5.1',
386
+ 'gpt5.2': 'GPT-5.2',
387
+ 'kimi-k2': 'Kimi-K2',
388
+ 'glm4.6': 'GLM-4.6',
389
+ 'qwen3': 'Qwen3-30B-A3B',
390
+ 'gemini3-flash': 'Gemini3-Flash',
391
+ };
392
+
393
+ function getDisplayName(model) {
394
+ return RANKING_DISPLAY_NAMES[model] || model;
395
+ }
396
 
397
+ function initRankingCharts() {
398
  const scenarios = [
399
  { key: 'MIMIC', id: 'mimic' },
400
  { key: '10K', id: '10k' },
401
  { key: 'GLOBEM', id: 'globem' }
402
  ];
403
 
404
+ // Colors matching Python script
405
+ const PROPRIETARY_COLOR = '#6A0DAD'; // Vivid purple
406
+ const OPENSOURCE_COLOR = '#228B22'; // Forest green
407
+
408
  scenarios.forEach(({ key, id }) => {
409
  const rawData = DDR_DATA.ranking[key];
410
  if (!rawData) return;
411
 
412
+ // Sort by acc_rank (like Python: df.sort_values(['acc_rank', 'bt_rank']))
413
+ const sortedModels = [...rawData].sort((a, b) => {
414
+ if (a.acc_rank !== b.acc_rank) return a.acc_rank - b.acc_rank;
415
+ return a.bt_rank - b.bt_rank;
416
+ });
 
 
417
 
418
+ const models = sortedModels; // Use all models (up to 22)
 
419
  const traces = [];
420
+ const topN = models.length;
421
 
422
+ // Connection lines (dashed black)
 
 
 
 
 
 
423
  models.forEach((m, i) => {
424
+ const accRankClipped = Math.min(m.acc_rank, topN + 1);
 
 
425
  traces.push({
426
+ x: [m.bt_rank, accRankClipped],
427
  y: [i, i],
428
  mode: 'lines',
429
  line: {
430
+ color: 'rgba(0, 0, 0, 0.3)',
431
+ width: 1,
432
+ dash: 'dash'
433
  },
434
  showlegend: false,
435
  hoverinfo: 'skip'
436
  });
437
  });
438
 
439
+ // Accuracy rank points (hollow diamonds) - drawn first (lower z)
440
+ const accColors = models.map(m => m.is_proprietary ? PROPRIETARY_COLOR : OPENSOURCE_COLOR);
441
  traces.push({
442
+ x: models.map(m => m.acc_rank),
443
  y: models.map((_, i) => i),
444
  mode: 'markers',
445
+ name: 'Accuracy Rank',
446
  marker: {
447
+ size: 12,
448
+ symbol: 'diamond-open',
449
+ color: accColors,
450
+ line: { width: 2 }
451
  },
452
+ text: models.map(m => `<b>${getDisplayName(m.model)}</b><br>Accuracy Rank: #${m.acc_rank}<br>Accuracy: ${m.accuracy}%`),
 
 
 
 
 
 
453
  hovertemplate: '%{text}<extra></extra>'
454
  });
455
 
456
+ // Novelty rank points (filled circles) - drawn on top
457
+ const noveltyColors = models.map(m => m.is_proprietary ? PROPRIETARY_COLOR : OPENSOURCE_COLOR);
458
  traces.push({
459
+ x: models.map(m => m.bt_rank),
460
  y: models.map((_, i) => i),
461
  mode: 'markers',
462
+ name: 'Novelty Rank',
463
  marker: {
464
+ size: 10,
465
+ symbol: 'circle',
466
+ color: noveltyColors,
467
+ line: { color: '#000', width: 1 }
468
  },
469
+ text: models.map(m => `<b>${getDisplayName(m.model)}</b><br>Novelty Rank: #${m.bt_rank}<br>Win Rate: ${m.win_rate}%`),
 
 
 
 
 
 
470
  hovertemplate: '%{text}<extra></extra>'
471
  });
472
 
473
+ // Calculate Spearman correlation
474
+ const btRanks = models.map(m => m.bt_rank);
475
+ const accRanks = models.map(m => m.acc_rank);
476
+ const n = btRanks.length;
477
+ const meanBt = btRanks.reduce((a, b) => a + b, 0) / n;
478
+ const meanAcc = accRanks.reduce((a, b) => a + b, 0) / n;
479
+ let num = 0, denBt = 0, denAcc = 0;
480
+ for (let i = 0; i < n; i++) {
481
+ num += (btRanks[i] - meanBt) * (accRanks[i] - meanAcc);
482
+ denBt += (btRanks[i] - meanBt) ** 2;
483
+ denAcc += (accRanks[i] - meanAcc) ** 2;
484
+ }
485
+ const rho = num / Math.sqrt(denBt * denAcc);
486
+
487
  const layout = {
488
  ...darkLayout,
489
  xaxis: {
490
  ...darkLayout.xaxis,
491
+ title: { text: 'Rank', font: { size: 10, color: '#e2e8f0' } },
492
+ range: [topN + 0.5, 0.5], // Inverted: high ranks left, 1 on right
493
+ dtick: 2,
494
+ tick0: 2
495
  },
496
  yaxis: {
497
  ...darkLayout.yaxis,
498
  tickmode: 'array',
499
  tickvals: models.map((_, i) => i),
500
+ ticktext: models.map(m => getDisplayName(m.model)),
501
+ tickfont: { size: 8, color: '#94a3b8' },
502
  automargin: true,
503
  range: [-0.5, models.length - 0.5]
504
  },
505
+ showlegend: false,
506
+ annotations: [{
507
+ x: 0.02,
508
+ y: 0.98,
509
+ xref: 'paper',
510
+ yref: 'paper',
511
+ text: `ρ = ${rho.toFixed(2)}`,
512
+ showarrow: false,
513
+ font: { size: 11, color: '#94a3b8', family: 'Inter' },
514
+ bgcolor: 'rgba(30, 41, 59, 0.8)',
515
+ borderpad: 4
516
+ }],
517
+ margin: { t: 15, r: 10, b: 40, l: 110 }
518
  };
519
 
520
+ Plotly.newPlot(`ranking-${id}`, traces, layout, plotlyConfig);
521
  });
522
  }
523
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
524
  // ============================================================================
525
  // TURN DISTRIBUTION - 3 Charts (Ridgeline style)
526
  // ============================================================================
index.html CHANGED
@@ -75,24 +75,22 @@
75
  <section id="ranking" class="section visible">
76
  <div class="section-header">
77
  <h2>πŸ† Ranking Comparison</h2>
78
- <p>Compare model rankings based on Bradley-Terry pairwise ranking against accuracy ranking.</p>
79
- </div>
80
- <div class="dimension-toggle">
81
- <button class="dim-btn ranking-dim active" data-mode="novelty">🎯 Novelty Rank</button>
82
- <button class="dim-btn ranking-dim" data-mode="accuracy">πŸ“Š Accuracy Rank</button>
83
  </div>
84
  <div class="charts-grid three-col">
85
  <div class="chart-card">
86
  <h3>MIMIC</h3>
87
- <div id="ranking-mimic" class="chart-container"></div>
88
  </div>
89
  <div class="chart-card">
90
  <h3>10-K</h3>
91
- <div id="ranking-10k" class="chart-container"></div>
92
  </div>
93
  <div class="chart-card">
94
  <h3>GLOBEM</h3>
95
- <div id="ranking-globem" class="chart-container"></div>
 
96
  </div>
97
  </div>
98
  </section>
 
75
  <section id="ranking" class="section visible">
76
  <div class="section-header">
77
  <h2>πŸ† Ranking Comparison</h2>
78
+ <p>Novelty (Bradley-Terry pairwise) vs Accuracy ranking. ● = Novelty Rank, β—‡ = Accuracy Rank. Purple =
79
+ Proprietary, Green = Open-source.</p>
 
 
 
80
  </div>
81
  <div class="charts-grid three-col">
82
  <div class="chart-card">
83
  <h3>MIMIC</h3>
84
+ <div id="ranking-mimic" class="chart-container-tall"></div>
85
  </div>
86
  <div class="chart-card">
87
  <h3>10-K</h3>
88
+ <div id="ranking-10k" class="chart-container-tall"></div>
89
  </div>
90
  <div class="chart-card">
91
  <h3>GLOBEM</h3>
92
+ <div id="ranking-globem" class="chart-container-tall"></div>
93
+
94
  </div>
95
  </div>
96
  </section>