Spaces:

thinkwee
/

DDR_Bench

Running

App Files Files Community

thinkwee commited on Jan 8

Commit

9200a73

1 Parent(s): 9c2d624

fix display

Browse files

Files changed (2) hide show

charts.js +100 -85
index.html +6 -8

charts.js CHANGED Viewed

@@ -362,150 +362,165 @@ document.querySelectorAll('.dim-btn:not(.probing-dim)').forEach(btn => {
 });
 // ============================================================================
-// RANKING COMPARISON - 3 Charts with mode switching (novelty vs accuracy)
 // ============================================================================
-let currentRankingMode = 'novelty';
-function renderRankingCharts(mode) {
     const scenarios = [
         { key: 'MIMIC', id: 'mimic' },
         { key: '10K', id: '10k' },
         { key: 'GLOBEM', id: 'globem' }
     ];
     scenarios.forEach(({ key, id }) => {
         const rawData = DDR_DATA.ranking[key];
         if (!rawData) return;
-        // Sort models by the primary ranking
-        let sortedModels;
-        if (mode === 'novelty') {
-            sortedModels = [...rawData].sort((a, b) => a.bt_rank - b.bt_rank);
-        } else {
-            sortedModels = [...rawData].sort((a, b) => a.acc_rank - b.acc_rank);
-        }
-        // Take top 12 for display
-        const models = sortedModels.slice(0, 12);
         const traces = [];
-        // Define colors
-        const primaryColor = mode === 'novelty' ? '#8B5CF6' : '#22C55E';
-        const secondaryColor = mode === 'novelty' ? '#22C55E' : '#8B5CF6';
-        const primaryLabel = mode === 'novelty' ? 'Novelty Rank' : 'Accuracy Rank';
-        const secondaryLabel = mode === 'novelty' ? 'Accuracy Rank' : 'Novelty Rank';
-        // Connection lines (dashed) from primary to secondary
         models.forEach((m, i) => {
-            const primaryX = mode === 'novelty' ? m.bt_rank : m.acc_rank;
-            const secondaryX = mode === 'novelty' ? m.acc_rank : m.bt_rank;
             traces.push({
-                x: [primaryX, secondaryX],
                 y: [i, i],
                 mode: 'lines',
                 line: {
-                    color: 'rgba(148, 163, 184, 0.4)',
-                    width: 1.5,
-                    dash: 'dot'
                 },
                 showlegend: false,
                 hoverinfo: 'skip'
             });
         });
-        // Primary rank points (filled circles)
         traces.push({
-            x: models.map(m => mode === 'novelty' ? m.bt_rank : m.acc_rank),
             y: models.map((_, i) => i),
             mode: 'markers',
-            name: primaryLabel,
             marker: {
-                size: 11,
-                symbol: 'circle',
-                color: primaryColor,
-                line: { color: '#fff', width: 1.5 }
             },
-            text: models.map(m => {
-                if (mode === 'novelty') {
-                    return `<b>${m.model}</b><br>Novelty: #${m.bt_rank}<br>Win Rate: ${m.win_rate}%`;
-                } else {
-                    return `<b>${m.model}</b><br>Accuracy: #${m.acc_rank}<br>${m.accuracy}%`;
-                }
-            }),
             hovertemplate: '%{text}<extra></extra>'
         });
-        // Secondary rank points (diamond outline)
         traces.push({
-            x: models.map(m => mode === 'novelty' ? m.acc_rank : m.bt_rank),
             y: models.map((_, i) => i),
             mode: 'markers',
-            name: secondaryLabel,
             marker: {
-                size: 9,
-                symbol: 'diamond-open',
-                color: secondaryColor,
-                line: { width: 2 }
             },
-            text: models.map(m => {
-                if (mode === 'novelty') {
-                    return `<b>${m.model}</b><br>Accuracy: #${m.acc_rank}<br>${m.accuracy}%`;
-                } else {
-                    return `<b>${m.model}</b><br>Novelty: #${m.bt_rank}<br>Win Rate: ${m.win_rate}%`;
-                }
-            }),
             hovertemplate: '%{text}<extra></extra>'
         });
         const layout = {
             ...darkLayout,
             xaxis: {
                 ...darkLayout.xaxis,
-                title: { text: 'Rank', font: { size: 11, color: '#e2e8f0' } },
-                range: [23, 0], // Fixed range for all charts
-                dtick: 5,
-                tick0: 0
             },
             yaxis: {
                 ...darkLayout.yaxis,
                 tickmode: 'array',
                 tickvals: models.map((_, i) => i),
-                ticktext: models.map(m => m.model.length > 16 ? m.model.substring(0, 14) + '...' : m.model),
                 automargin: true,
                 range: [-0.5, models.length - 0.5]
             },
-            showlegend: true,
-            legend: {
-                ...darkLayout.legend,
-                y: -0.18,
-                orientation: 'h',
-                x: 0.5,
-                xanchor: 'center'
-            },
-            margin: { t: 20, r: 15, b: 65, l: 120 }
         };
-        Plotly.react(`ranking-${id}`, traces, layout, plotlyConfig);
     });
 }
-function initRankingCharts() {
-    renderRankingCharts('novelty');
-}
-// Ranking mode toggle event listener
-document.querySelectorAll('.ranking-dim').forEach(btn => {
-    btn.addEventListener('click', () => {
-        document.querySelectorAll('.ranking-dim').forEach(b => b.classList.remove('active'));
-        btn.classList.add('active');
-        const mode = btn.dataset.mode;
-        currentRankingMode = mode;
-        renderRankingCharts(mode);
-    });
-});
 // ============================================================================
 // TURN DISTRIBUTION - 3 Charts (Ridgeline style)
 // ============================================================================

 });
 // ============================================================================
+// RANKING COMPARISON - Matches Python create_rank_figure.py exactly
 // ============================================================================
+const RANKING_DISPLAY_NAMES = {
+    'run_api_deepseek_deepseek-chat': 'DeepSeek-V3.2',
+    'qwen3-next-80b-a3b-instruct': 'Qwen3-Next-80BA3B',
+    'qwen2.5-14B-Instruct-1M': 'Qwen2.5-14B-1M',
+    'qwen2.5-7B-Instruct-1M': 'Qwen2.5-7B-1M',
+    'qwen2.5-14B-Instruct': 'Qwen2.5-14B',
+    'qwen2.5-7B-Instruct': 'Qwen2.5-7B',
+    'qwen2.5-72B-Instruct': 'Qwen2.5-72B',
+    'qwen2.5-32b-instruct': 'Qwen2.5-32B',
+    'qwen3-4B-Instruct-2507': 'Qwen3-4B',
+    'gemini2.5-flash-lite': 'Gemini2.5-Flash-Lite',
+    'gemini2.5-flash': 'Gemini2.5-Flash',
+    'gemini2.5-pro': 'Gemini2.5-Pro',
+    'claude4.5-sonnet': 'Claude4.5-Sonnet',
+    'llama3.3-70B': 'Llama3.3-70B',
+    'minimax-m2': 'MiniMax-M2',
+    'gpt5mini': 'GPT-5-mini',
+    'gpt5-mini': 'GPT-5-mini',
+    'gpt5.1': 'GPT-5.1',
+    'gpt5.2': 'GPT-5.2',
+    'kimi-k2': 'Kimi-K2',
+    'glm4.6': 'GLM-4.6',
+    'qwen3': 'Qwen3-30B-A3B',
+    'gemini3-flash': 'Gemini3-Flash',
+};
+function getDisplayName(model) {
+    return RANKING_DISPLAY_NAMES[model] || model;
+}
+function initRankingCharts() {
     const scenarios = [
         { key: 'MIMIC', id: 'mimic' },
         { key: '10K', id: '10k' },
         { key: 'GLOBEM', id: 'globem' }
     ];
+    // Colors matching Python script
+    const PROPRIETARY_COLOR = '#6A0DAD';  // Vivid purple
+    const OPENSOURCE_COLOR = '#228B22';   // Forest green
     scenarios.forEach(({ key, id }) => {
         const rawData = DDR_DATA.ranking[key];
         if (!rawData) return;
+        // Sort by acc_rank (like Python: df.sort_values(['acc_rank', 'bt_rank']))
+        const sortedModels = [...rawData].sort((a, b) => {
+            if (a.acc_rank !== b.acc_rank) return a.acc_rank - b.acc_rank;
+            return a.bt_rank - b.bt_rank;
+        });
+        const models = sortedModels;  // Use all models (up to 22)
         const traces = [];
+        const topN = models.length;
+        // Connection lines (dashed black)
         models.forEach((m, i) => {
+            const accRankClipped = Math.min(m.acc_rank, topN + 1);
             traces.push({
+                x: [m.bt_rank, accRankClipped],
                 y: [i, i],
                 mode: 'lines',
                 line: {
+                    color: 'rgba(0, 0, 0, 0.3)',
+                    width: 1,
+                    dash: 'dash'
                 },
                 showlegend: false,
                 hoverinfo: 'skip'
             });
         });
+        // Accuracy rank points (hollow diamonds) - drawn first (lower z)
+        const accColors = models.map(m => m.is_proprietary ? PROPRIETARY_COLOR : OPENSOURCE_COLOR);
         traces.push({
+            x: models.map(m => m.acc_rank),
             y: models.map((_, i) => i),
             mode: 'markers',
+            name: 'Accuracy Rank',
             marker: {
+                size: 12,
+                symbol: 'diamond-open',
+                color: accColors,
+                line: { width: 2 }
             },
+            text: models.map(m => `<b>${getDisplayName(m.model)}</b><br>Accuracy Rank: #${m.acc_rank}<br>Accuracy: ${m.accuracy}%`),
             hovertemplate: '%{text}<extra></extra>'
         });
+        // Novelty rank points (filled circles) - drawn on top
+        const noveltyColors = models.map(m => m.is_proprietary ? PROPRIETARY_COLOR : OPENSOURCE_COLOR);
         traces.push({
+            x: models.map(m => m.bt_rank),
             y: models.map((_, i) => i),
             mode: 'markers',
+            name: 'Novelty Rank',
             marker: {
+                size: 10,
+                symbol: 'circle',
+                color: noveltyColors,
+                line: { color: '#000', width: 1 }
             },
+            text: models.map(m => `<b>${getDisplayName(m.model)}</b><br>Novelty Rank: #${m.bt_rank}<br>Win Rate: ${m.win_rate}%`),
             hovertemplate: '%{text}<extra></extra>'
         });
+        // Calculate Spearman correlation
+        const btRanks = models.map(m => m.bt_rank);
+        const accRanks = models.map(m => m.acc_rank);
+        const n = btRanks.length;
+        const meanBt = btRanks.reduce((a, b) => a + b, 0) / n;
+        const meanAcc = accRanks.reduce((a, b) => a + b, 0) / n;
+        let num = 0, denBt = 0, denAcc = 0;
+        for (let i = 0; i < n; i++) {
+            num += (btRanks[i] - meanBt) * (accRanks[i] - meanAcc);
+            denBt += (btRanks[i] - meanBt) ** 2;
+            denAcc += (accRanks[i] - meanAcc) ** 2;
+        }
+        const rho = num / Math.sqrt(denBt * denAcc);
         const layout = {
             ...darkLayout,
             xaxis: {
                 ...darkLayout.xaxis,
+                title: { text: 'Rank', font: { size: 10, color: '#e2e8f0' } },
+                range: [topN + 0.5, 0.5],  // Inverted: high ranks left, 1 on right
+                dtick: 2,
+                tick0: 2
             },
             yaxis: {
                 ...darkLayout.yaxis,
                 tickmode: 'array',
                 tickvals: models.map((_, i) => i),
+                ticktext: models.map(m => getDisplayName(m.model)),
+                tickfont: { size: 8, color: '#94a3b8' },
                 automargin: true,
                 range: [-0.5, models.length - 0.5]
             },
+            showlegend: false,
+            annotations: [{
+                x: 0.02,
+                y: 0.98,
+                xref: 'paper',
+                yref: 'paper',
+                text: `ρ = ${rho.toFixed(2)}`,
+                showarrow: false,
+                font: { size: 11, color: '#94a3b8', family: 'Inter' },
+                bgcolor: 'rgba(30, 41, 59, 0.8)',
+                borderpad: 4
+            }],
+            margin: { t: 15, r: 10, b: 40, l: 110 }
         };
+        Plotly.newPlot(`ranking-${id}`, traces, layout, plotlyConfig);
     });
 }
 // ============================================================================
 // TURN DISTRIBUTION - 3 Charts (Ridgeline style)
 // ============================================================================

index.html CHANGED Viewed

@@ -75,24 +75,22 @@
         <section id="ranking" class="section visible">
             <div class="section-header">
                 <h2>🏆 Ranking Comparison</h2>
-                <p>Compare model rankings based on Bradley-Terry pairwise ranking against accuracy ranking.</p>
-            </div>
-            <div class="dimension-toggle">
-                <button class="dim-btn ranking-dim active" data-mode="novelty">🎯 Novelty Rank</button>
-                <button class="dim-btn ranking-dim" data-mode="accuracy">📊 Accuracy Rank</button>
             </div>
             <div class="charts-grid three-col">
                 <div class="chart-card">
                     <h3>MIMIC</h3>
-                    <div id="ranking-mimic" class="chart-container"></div>
                 </div>
                 <div class="chart-card">
                     <h3>10-K</h3>
-                    <div id="ranking-10k" class="chart-container"></div>
                 </div>
                 <div class="chart-card">
                     <h3>GLOBEM</h3>
-                    <div id="ranking-globem" class="chart-container"></div>
                 </div>
             </div>
         </section>

         <section id="ranking" class="section visible">
             <div class="section-header">
                 <h2>🏆 Ranking Comparison</h2>
+                <p>Novelty (Bradley-Terry pairwise) vs Accuracy ranking. ● = Novelty Rank, ◇ = Accuracy Rank. Purple =
+                    Proprietary, Green = Open-source.</p>
             </div>
             <div class="charts-grid three-col">
                 <div class="chart-card">
                     <h3>MIMIC</h3>
+                    <div id="ranking-mimic" class="chart-container-tall"></div>
                 </div>
                 <div class="chart-card">
                     <h3>10-K</h3>
+                    <div id="ranking-10k" class="chart-container-tall"></div>
                 </div>
                 <div class="chart-card">
                     <h3>GLOBEM</h3>
+                    <div id="ranking-globem" class="chart-container-tall"></div>
                 </div>
             </div>
         </section>