class LLMBenchmarkDashboard { constructor() { this.currentJudge = 'gpt5.4'; this.searchQuery = ''; this.judgeData = { o3: { general: null, complicated: null }, 'gpt5.4': { general: null, complicated: null } }; this.generalSort = { column: 'overall_score', direction: 'desc' }; this.complicatedSort = { column: 'overall_score', direction: 'desc' }; this.metricDisplayNames = { comprehension_score: 'Comprehension', structure_score: 'Coherence', prose_style_score: 'Style', creativity_score: 'Creativity', depth_score: 'Depth', helpfulness_score: 'Helpfulness', overall_score: 'Overall' }; this.metricDescriptions = { comprehension_score: 'How well the response understands the prompt intent and stays on topic.', structure_score: 'How clear, logical, and well-organized the writing is.', prose_style_score: 'The quality of language, grammar, and adherence to the requested tone.', creativity_score: 'The novelty of ideas and uniqueness of perspective.', depth_score: 'The level of detail, analysis, and substance provided.', helpfulness_score: 'How effectively the response fulfills the user\'s overall goal.', overall_score: 'Average score across all six criteria.' }; this.judgePaths = { o3: { general: 'data/all-scores.json', complicated: 'data/complicated-writing-scores.json' }, 'gpt5.4': { general: 'data/gpt5.4-judge-all-scores.json', complicated: 'data/gpt5.4-judge-complicated-writing-scores.json' } }; this.modelLinks = { 'Monomer-24B-Writer': 'https://huggingface.co/zake7749/Monomer-24B-Writer-Preview', 'Monomer-8B-Writer': 'https://huggingface.co/zake7749/Monomer-8B-Writer-Preview' }; this.init(); } async init() { this.showLoading(true); const promises = []; for (const judge of ['o3', 'gpt5.4']) { for (const type of ['general', 'complicated']) { promises.push(this.loadData(this.judgePaths[judge][type], judge, type)); } } await Promise.all(promises); this.renderTable('general'); this.renderTable('complicated'); this.setupSearch(); this.setupJudgeToggle(); this.setupTabs(); this.showLoading(false); } async loadData(path, judge, type) { try { const response = await fetch(path); if (!response.ok) { throw new Error(`HTTP error! status: ${response.status}`); } this.judgeData[judge][type] = await response.json(); } catch (error) { console.error(`Error loading ${judge}/${type} data:`, error); } } get generalData() { return this.judgeData[this.currentJudge].general; } get complicatedData() { return this.judgeData[this.currentJudge].complicated; } setupJudgeToggle() { const buttons = document.querySelectorAll('.judge-btn'); buttons.forEach(btn => { btn.addEventListener('click', () => { const judge = btn.dataset.judge; if (judge === this.currentJudge) return; this.currentJudge = judge; buttons.forEach(b => b.classList.remove('active')); btn.classList.add('active'); this.renderTable('general'); this.renderTable('complicated'); }); }); } setupSearch() { const searchInput = document.getElementById('globalSearch'); if (searchInput) { searchInput.addEventListener('input', (e) => { this.searchQuery = e.target.value.toLowerCase().trim(); this.renderTable('general'); this.renderTable('complicated'); }); } } setupTabs() { const tabBtns = document.querySelectorAll('.tab-btn'); const tabContents = document.querySelectorAll('.tab-content'); tabBtns.forEach(btn => { btn.addEventListener('click', () => { const targetId = btn.dataset.target; // Update active state of buttons tabBtns.forEach(b => b.classList.remove('active')); btn.classList.add('active'); // Update active state of content tabContents.forEach(content => { if (content.id === targetId) { content.classList.add('active'); } else { content.classList.remove('active'); } }); }); }); } renderTable(type) { const data = type === 'general' ? this.generalData : this.complicatedData; const sortState = type === 'general' ? this.generalSort : this.complicatedSort; const tableContainer = document.getElementById(type === 'general' ? 'generalTable' : 'complicatedTable'); if (!data) return; const models = Object.keys(data); const metrics = Object.keys(data[models[0]]); const tableHTML = ` ${metrics.map(metric => ` `).join('')} ${this.getSortedTableData(data, sortState, metrics).map((row, index) => { const isMonomer = this.modelLinks[row.model]; const isBaseline = row.model === 'gpt-4.1-2025-04-14'; let rowClass = ''; if (isMonomer) rowClass = 'highlight-row'; if (isBaseline) rowClass = 'baseline-row'; // Generate rank medal for the sorted column if it's a metric let medalHtml = ''; if (sortState.column !== 'model' && ['desc', 'asc'].includes(sortState.direction)) { const rank = sortState.direction === 'desc' ? index + 1 : Object.keys(data).length - index; if (rank === 1) medalHtml = '🥇'; else if (rank === 2) medalHtml = '🥈'; else if (rank === 3) medalHtml = '🥉'; } return ` ${metrics.map(metric => ` `).join('')} `; }).join('')}
Model${this.metricDisplayNames[metric] || metric}
${medalHtml} ${isMonomer ? `${row.model}` : row.model} ${isBaseline ? ' (Baseline)' : ''} ${this.formatScore(row[metric])}
`; tableContainer.innerHTML = tableHTML; this.setupTableSorting(type); } getSortedTableData(data, sortState, metrics) { const models = Object.keys(data); let tableData = models .filter(model => { if (!this.searchQuery) return true; return model.toLowerCase().includes(this.searchQuery); }) .map(model => ({ model, ...data[model] })); if (sortState.column) { tableData.sort((a, b) => { let aVal = a[sortState.column]; let bVal = b[sortState.column]; if (sortState.column === 'model') { aVal = aVal.toLowerCase(); bVal = bVal.toLowerCase(); } if (aVal < bVal) return sortState.direction === 'asc' ? -1 : 1; if (aVal > bVal) return sortState.direction === 'asc' ? 1 : -1; return 0; }); } return tableData; } formatScore(value) { if (typeof value === 'number') { return value.toFixed(2); } return value; } getHeatmapColor(val, type, metric) { if (val === null || val === undefined) return 'transparent'; // Use 5.0 as the neutral midpoint since that is the baseline // Less than 5.0: increasingly red. Greater than 5.0: increasingly green/purple. // Let's go from 3.0 to 9.0 as the typical min/max const minVal = 3.5; const maxVal = 8.5; const baseline = 5.0; let color = 'transparent'; if (val < baseline) { // Bad score: red hue const intensity = Math.min(1, (baseline - val) / (baseline - minVal)); // e.g. rgba(255, 99, 132, intensity) // But we want extremely light background colors so text remains readable color = `rgba(239, 68, 68, ${intensity * 0.2})`; } else if (val > baseline) { // Good score: purple/green hue const intensity = Math.min(1, (val - baseline) / (maxVal - baseline)); color = `rgba(16, 185, 129, ${intensity * 0.2})`; } return color; } setupTableSorting(type) { const tableContainer = document.getElementById(type === 'general' ? 'generalTable' : 'complicatedTable'); const headers = tableContainer.querySelectorAll('th.sortable'); headers.forEach(header => { header.addEventListener('click', () => { const column = header.dataset.column; this.handleSort(type, column, header); }); }); } handleSort(type, column, headerElement) { const sortState = type === 'general' ? this.generalSort : this.complicatedSort; if (sortState.column === column) { sortState.direction = sortState.direction === 'asc' ? 'desc' : 'asc'; } else { sortState.column = column; sortState.direction = 'asc'; } const tableContainer = document.getElementById(type === 'general' ? 'generalTable' : 'complicatedTable'); tableContainer.querySelectorAll('th.sortable').forEach(th => { th.classList.remove('sort-asc', 'sort-desc'); }); headerElement.classList.add(`sort-${sortState.direction}`); this.renderTable(type); } showLoading(show) { const loading = document.getElementById('loading'); if (show) { loading.classList.remove('hidden'); } else { loading.classList.add('hidden'); } } showError(message) { const loading = document.getElementById('loading'); loading.innerHTML = `

${message}

`; } } document.addEventListener('DOMContentLoaded', () => { new LLMBenchmarkDashboard(); });