thinkwee commited on
Commit Β·
655137d
1
Parent(s): 2dcac27
wider layout, ranking toggle animation
Browse files- charts.js +89 -52
- index.html +7 -2
- styles.css +3 -3
charts.js
CHANGED
|
@@ -362,7 +362,7 @@ document.querySelectorAll('.dim-btn:not(.probing-dim)').forEach(btn => {
|
|
| 362 |
});
|
| 363 |
|
| 364 |
// ============================================================================
|
| 365 |
-
// RANKING COMPARISON -
|
| 366 |
// ============================================================================
|
| 367 |
const RANKING_DISPLAY_NAMES = {
|
| 368 |
'run_api_deepseek_deepseek-chat': 'DeepSeek-V3.2',
|
|
@@ -390,45 +390,47 @@ const RANKING_DISPLAY_NAMES = {
|
|
| 390 |
'gemini3-flash': 'Gemini3-Flash',
|
| 391 |
};
|
| 392 |
|
|
|
|
|
|
|
|
|
|
| 393 |
function getDisplayName(model) {
|
| 394 |
return RANKING_DISPLAY_NAMES[model] || model;
|
| 395 |
}
|
| 396 |
|
| 397 |
-
|
|
|
|
|
|
|
| 398 |
const scenarios = [
|
| 399 |
{ key: 'MIMIC', id: 'mimic' },
|
| 400 |
{ key: '10K', id: '10k' },
|
| 401 |
{ key: 'GLOBEM', id: 'globem' }
|
| 402 |
];
|
| 403 |
|
| 404 |
-
// Colors matching Python script
|
| 405 |
-
const PROPRIETARY_COLOR = '#6A0DAD'; // Vivid purple
|
| 406 |
-
const OPENSOURCE_COLOR = '#228B22'; // Forest green
|
| 407 |
-
|
| 408 |
scenarios.forEach(({ key, id }) => {
|
| 409 |
const rawData = DDR_DATA.ranking[key];
|
| 410 |
if (!rawData) return;
|
| 411 |
|
| 412 |
-
// Sort by
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
}
|
|
|
|
|
|
|
| 417 |
|
| 418 |
-
const models = sortedModels;
|
| 419 |
-
const traces = [];
|
| 420 |
const topN = models.length;
|
|
|
|
| 421 |
|
| 422 |
-
// Connection lines (dashed
|
| 423 |
models.forEach((m, i) => {
|
| 424 |
-
const accRankClipped = Math.min(m.acc_rank, topN + 1);
|
| 425 |
traces.push({
|
| 426 |
-
x: [m.bt_rank,
|
| 427 |
y: [i, i],
|
| 428 |
mode: 'lines',
|
| 429 |
line: {
|
| 430 |
-
color: 'rgba(
|
| 431 |
-
width: 1,
|
| 432 |
dash: 'dash'
|
| 433 |
},
|
| 434 |
showlegend: false,
|
|
@@ -436,41 +438,39 @@ function initRankingCharts() {
|
|
| 436 |
});
|
| 437 |
});
|
| 438 |
|
| 439 |
-
//
|
| 440 |
-
const accColors = models.map(m => m.is_proprietary ? PROPRIETARY_COLOR : OPENSOURCE_COLOR);
|
| 441 |
traces.push({
|
| 442 |
-
x: models.map(m => m.
|
| 443 |
y: models.map((_, i) => i),
|
| 444 |
mode: 'markers',
|
| 445 |
-
name: '
|
| 446 |
marker: {
|
| 447 |
-
size: 12,
|
| 448 |
-
symbol: '
|
| 449 |
-
color:
|
| 450 |
-
line: { width:
|
| 451 |
},
|
| 452 |
-
text: models.map(m => `<b>${getDisplayName(m.model)}</b><br>
|
| 453 |
hovertemplate: '%{text}<extra></extra>'
|
| 454 |
});
|
| 455 |
|
| 456 |
-
//
|
| 457 |
-
const noveltyColors = models.map(m => m.is_proprietary ? PROPRIETARY_COLOR : OPENSOURCE_COLOR);
|
| 458 |
traces.push({
|
| 459 |
-
x: models.map(m => m.
|
| 460 |
y: models.map((_, i) => i),
|
| 461 |
mode: 'markers',
|
| 462 |
-
name: '
|
| 463 |
marker: {
|
| 464 |
-
size: 10,
|
| 465 |
-
symbol: '
|
| 466 |
-
color:
|
| 467 |
-
line: {
|
| 468 |
},
|
| 469 |
-
text: models.map(m => `<b>${getDisplayName(m.model)}</b><br>
|
| 470 |
hovertemplate: '%{text}<extra></extra>'
|
| 471 |
});
|
| 472 |
|
| 473 |
-
// Calculate
|
| 474 |
const btRanks = models.map(m => m.bt_rank);
|
| 475 |
const accRanks = models.map(m => m.acc_rank);
|
| 476 |
const n = btRanks.length;
|
|
@@ -484,12 +484,14 @@ function initRankingCharts() {
|
|
| 484 |
}
|
| 485 |
const rho = num / Math.sqrt(denBt * denAcc);
|
| 486 |
|
|
|
|
|
|
|
| 487 |
const layout = {
|
| 488 |
...darkLayout,
|
| 489 |
xaxis: {
|
| 490 |
...darkLayout.xaxis,
|
| 491 |
title: { text: 'Rank', font: { size: 10, color: '#e2e8f0' } },
|
| 492 |
-
range: [topN + 0.5, 0.5],
|
| 493 |
dtick: 2,
|
| 494 |
tick0: 2
|
| 495 |
},
|
|
@@ -498,29 +500,64 @@ function initRankingCharts() {
|
|
| 498 |
tickmode: 'array',
|
| 499 |
tickvals: models.map((_, i) => i),
|
| 500 |
ticktext: models.map(m => getDisplayName(m.model)),
|
| 501 |
-
tickfont: { size:
|
| 502 |
automargin: true,
|
| 503 |
range: [-0.5, models.length - 0.5]
|
| 504 |
},
|
| 505 |
showlegend: false,
|
| 506 |
-
annotations: [
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 518 |
};
|
| 519 |
|
| 520 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 521 |
});
|
| 522 |
}
|
| 523 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 524 |
// ============================================================================
|
| 525 |
// TURN DISTRIBUTION - 3 Charts (Ridgeline style)
|
| 526 |
// ============================================================================
|
|
|
|
| 362 |
});
|
| 363 |
|
| 364 |
// ============================================================================
|
| 365 |
+
// RANKING COMPARISON - With animated mode switching
|
| 366 |
// ============================================================================
|
| 367 |
const RANKING_DISPLAY_NAMES = {
|
| 368 |
'run_api_deepseek_deepseek-chat': 'DeepSeek-V3.2',
|
|
|
|
| 390 |
'gemini3-flash': 'Gemini3-Flash',
|
| 391 |
};
|
| 392 |
|
| 393 |
+
const PROPRIETARY_COLOR = '#6A0DAD'; // Vivid purple
|
| 394 |
+
const OPENSOURCE_COLOR = '#228B22'; // Forest green
|
| 395 |
+
|
| 396 |
function getDisplayName(model) {
|
| 397 |
return RANKING_DISPLAY_NAMES[model] || model;
|
| 398 |
}
|
| 399 |
|
| 400 |
+
let currentRankingMode = 'novelty';
|
| 401 |
+
|
| 402 |
+
function renderRankingCharts(mode, animate = false) {
|
| 403 |
const scenarios = [
|
| 404 |
{ key: 'MIMIC', id: 'mimic' },
|
| 405 |
{ key: '10K', id: '10k' },
|
| 406 |
{ key: 'GLOBEM', id: 'globem' }
|
| 407 |
];
|
| 408 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 409 |
scenarios.forEach(({ key, id }) => {
|
| 410 |
const rawData = DDR_DATA.ranking[key];
|
| 411 |
if (!rawData) return;
|
| 412 |
|
| 413 |
+
// Sort by primary ranking based on mode
|
| 414 |
+
let sortedModels;
|
| 415 |
+
if (mode === 'novelty') {
|
| 416 |
+
sortedModels = [...rawData].sort((a, b) => a.bt_rank - b.bt_rank);
|
| 417 |
+
} else {
|
| 418 |
+
sortedModels = [...rawData].sort((a, b) => a.acc_rank - b.acc_rank);
|
| 419 |
+
}
|
| 420 |
|
| 421 |
+
const models = sortedModels;
|
|
|
|
| 422 |
const topN = models.length;
|
| 423 |
+
const traces = [];
|
| 424 |
|
| 425 |
+
// Connection lines (dashed)
|
| 426 |
models.forEach((m, i) => {
|
|
|
|
| 427 |
traces.push({
|
| 428 |
+
x: [m.bt_rank, m.acc_rank],
|
| 429 |
y: [i, i],
|
| 430 |
mode: 'lines',
|
| 431 |
line: {
|
| 432 |
+
color: 'rgba(148, 163, 184, 0.4)',
|
| 433 |
+
width: 1.5,
|
| 434 |
dash: 'dash'
|
| 435 |
},
|
| 436 |
showlegend: false,
|
|
|
|
| 438 |
});
|
| 439 |
});
|
| 440 |
|
| 441 |
+
// Novelty rank points (filled circles)
|
|
|
|
| 442 |
traces.push({
|
| 443 |
+
x: models.map(m => m.bt_rank),
|
| 444 |
y: models.map((_, i) => i),
|
| 445 |
mode: 'markers',
|
| 446 |
+
name: 'Novelty Rank',
|
| 447 |
marker: {
|
| 448 |
+
size: mode === 'novelty' ? 12 : 10,
|
| 449 |
+
symbol: 'circle',
|
| 450 |
+
color: models.map(m => m.is_proprietary ? PROPRIETARY_COLOR : OPENSOURCE_COLOR),
|
| 451 |
+
line: { color: '#fff', width: 1.5 }
|
| 452 |
},
|
| 453 |
+
text: models.map(m => `<b>${getDisplayName(m.model)}</b><br>Novelty: #${m.bt_rank}<br>Win Rate: ${m.win_rate}%`),
|
| 454 |
hovertemplate: '%{text}<extra></extra>'
|
| 455 |
});
|
| 456 |
|
| 457 |
+
// Accuracy rank points (hollow diamonds)
|
|
|
|
| 458 |
traces.push({
|
| 459 |
+
x: models.map(m => m.acc_rank),
|
| 460 |
y: models.map((_, i) => i),
|
| 461 |
mode: 'markers',
|
| 462 |
+
name: 'Accuracy Rank',
|
| 463 |
marker: {
|
| 464 |
+
size: mode === 'accuracy' ? 12 : 10,
|
| 465 |
+
symbol: 'diamond-open',
|
| 466 |
+
color: models.map(m => m.is_proprietary ? PROPRIETARY_COLOR : OPENSOURCE_COLOR),
|
| 467 |
+
line: { width: 2 }
|
| 468 |
},
|
| 469 |
+
text: models.map(m => `<b>${getDisplayName(m.model)}</b><br>Accuracy: #${m.acc_rank}<br>${m.accuracy}%`),
|
| 470 |
hovertemplate: '%{text}<extra></extra>'
|
| 471 |
});
|
| 472 |
|
| 473 |
+
// Calculate correlation
|
| 474 |
const btRanks = models.map(m => m.bt_rank);
|
| 475 |
const accRanks = models.map(m => m.acc_rank);
|
| 476 |
const n = btRanks.length;
|
|
|
|
| 484 |
}
|
| 485 |
const rho = num / Math.sqrt(denBt * denAcc);
|
| 486 |
|
| 487 |
+
const sortLabel = mode === 'novelty' ? 'Sorted by Novelty' : 'Sorted by Accuracy';
|
| 488 |
+
|
| 489 |
const layout = {
|
| 490 |
...darkLayout,
|
| 491 |
xaxis: {
|
| 492 |
...darkLayout.xaxis,
|
| 493 |
title: { text: 'Rank', font: { size: 10, color: '#e2e8f0' } },
|
| 494 |
+
range: [topN + 0.5, 0.5],
|
| 495 |
dtick: 2,
|
| 496 |
tick0: 2
|
| 497 |
},
|
|
|
|
| 500 |
tickmode: 'array',
|
| 501 |
tickvals: models.map((_, i) => i),
|
| 502 |
ticktext: models.map(m => getDisplayName(m.model)),
|
| 503 |
+
tickfont: { size: 9, color: '#94a3b8' },
|
| 504 |
automargin: true,
|
| 505 |
range: [-0.5, models.length - 0.5]
|
| 506 |
},
|
| 507 |
showlegend: false,
|
| 508 |
+
annotations: [
|
| 509 |
+
{
|
| 510 |
+
x: 0.02,
|
| 511 |
+
y: 0.98,
|
| 512 |
+
xref: 'paper',
|
| 513 |
+
yref: 'paper',
|
| 514 |
+
text: `Ο = ${rho.toFixed(2)}`,
|
| 515 |
+
showarrow: false,
|
| 516 |
+
font: { size: 11, color: '#94a3b8', family: 'Inter' },
|
| 517 |
+
bgcolor: 'rgba(30, 41, 59, 0.8)',
|
| 518 |
+
borderpad: 4
|
| 519 |
+
},
|
| 520 |
+
{
|
| 521 |
+
x: 0.98,
|
| 522 |
+
y: 0.98,
|
| 523 |
+
xref: 'paper',
|
| 524 |
+
yref: 'paper',
|
| 525 |
+
text: sortLabel,
|
| 526 |
+
showarrow: false,
|
| 527 |
+
font: { size: 10, color: mode === 'novelty' ? PROPRIETARY_COLOR : OPENSOURCE_COLOR, family: 'Inter' },
|
| 528 |
+
bgcolor: 'rgba(30, 41, 59, 0.8)',
|
| 529 |
+
borderpad: 4
|
| 530 |
+
}
|
| 531 |
+
],
|
| 532 |
+
margin: { t: 15, r: 15, b: 40, l: 120 }
|
| 533 |
};
|
| 534 |
|
| 535 |
+
if (animate) {
|
| 536 |
+
Plotly.react(`ranking-${id}`, traces, layout, plotlyConfig);
|
| 537 |
+
} else {
|
| 538 |
+
Plotly.newPlot(`ranking-${id}`, traces, layout, plotlyConfig);
|
| 539 |
+
}
|
| 540 |
});
|
| 541 |
}
|
| 542 |
|
| 543 |
+
function initRankingCharts() {
|
| 544 |
+
renderRankingCharts('novelty', false);
|
| 545 |
+
}
|
| 546 |
+
|
| 547 |
+
// Ranking mode toggle event listener
|
| 548 |
+
document.querySelectorAll('.ranking-dim').forEach(btn => {
|
| 549 |
+
btn.addEventListener('click', () => {
|
| 550 |
+
const mode = btn.dataset.mode;
|
| 551 |
+
if (mode === currentRankingMode) return;
|
| 552 |
+
|
| 553 |
+
document.querySelectorAll('.ranking-dim').forEach(b => b.classList.remove('active'));
|
| 554 |
+
btn.classList.add('active');
|
| 555 |
+
|
| 556 |
+
currentRankingMode = mode;
|
| 557 |
+
renderRankingCharts(mode, true);
|
| 558 |
+
});
|
| 559 |
+
});
|
| 560 |
+
|
| 561 |
// ============================================================================
|
| 562 |
// TURN DISTRIBUTION - 3 Charts (Ridgeline style)
|
| 563 |
// ============================================================================
|
index.html
CHANGED
|
@@ -75,10 +75,15 @@
|
|
| 75 |
<section id="ranking" class="section visible">
|
| 76 |
<div class="section-header">
|
| 77 |
<h2>π Ranking Comparison</h2>
|
| 78 |
-
<p>Novelty (Bradley-Terry
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
</div>
|
| 81 |
<div class="charts-grid three-col">
|
|
|
|
| 82 |
<div class="chart-card">
|
| 83 |
<h3>MIMIC</h3>
|
| 84 |
<div id="ranking-mimic" class="chart-container-tall"></div>
|
|
|
|
| 75 |
<section id="ranking" class="section visible">
|
| 76 |
<div class="section-header">
|
| 77 |
<h2>π Ranking Comparison</h2>
|
| 78 |
+
<p>Novelty (Bradley-Terry) vs Accuracy ranking. β = Novelty, β = Accuracy. Purple = Proprietary, Green =
|
| 79 |
+
Open-source.</p>
|
| 80 |
+
</div>
|
| 81 |
+
<div class="dimension-toggle">
|
| 82 |
+
<button class="dim-btn ranking-dim active" data-mode="novelty">π― Sort by Novelty</button>
|
| 83 |
+
<button class="dim-btn ranking-dim" data-mode="accuracy">π Sort by Accuracy</button>
|
| 84 |
</div>
|
| 85 |
<div class="charts-grid three-col">
|
| 86 |
+
|
| 87 |
<div class="chart-card">
|
| 88 |
<h3>MIMIC</h3>
|
| 89 |
<div id="ranking-mimic" class="chart-container-tall"></div>
|
styles.css
CHANGED
|
@@ -131,9 +131,9 @@ body {
|
|
| 131 |
|
| 132 |
/* Main Content */
|
| 133 |
.content {
|
| 134 |
-
max-width:
|
| 135 |
margin: 0 auto;
|
| 136 |
-
padding: 1rem
|
| 137 |
}
|
| 138 |
|
| 139 |
/* Sections - all visible */
|
|
@@ -202,7 +202,7 @@ body {
|
|
| 202 |
/* Charts Grid */
|
| 203 |
.charts-grid {
|
| 204 |
display: grid;
|
| 205 |
-
gap:
|
| 206 |
}
|
| 207 |
|
| 208 |
.charts-grid.three-col {
|
|
|
|
| 131 |
|
| 132 |
/* Main Content */
|
| 133 |
.content {
|
| 134 |
+
max-width: 1800px;
|
| 135 |
margin: 0 auto;
|
| 136 |
+
padding: 1rem 2rem;
|
| 137 |
}
|
| 138 |
|
| 139 |
/* Sections - all visible */
|
|
|
|
| 202 |
/* Charts Grid */
|
| 203 |
.charts-grid {
|
| 204 |
display: grid;
|
| 205 |
+
gap: 1.25rem;
|
| 206 |
}
|
| 207 |
|
| 208 |
.charts-grid.three-col {
|