justin-ailabs's picture
UI Overhaul: Premium Slate Aesthetic, Tabbed Interface, and Grid Refinements
384e89d
class LLMBenchmarkDashboard {
constructor() {
this.currentJudge = 'gpt5.4';
this.searchQuery = '';
this.judgeData = {
o3: { general: null, complicated: null },
'gpt5.4': { general: null, complicated: null }
};
this.generalSort = { column: 'overall_score', direction: 'desc' };
this.complicatedSort = { column: 'overall_score', direction: 'desc' };
this.metricDisplayNames = {
comprehension_score: 'Comprehension',
structure_score: 'Coherence',
prose_style_score: 'Style',
creativity_score: 'Creativity',
depth_score: 'Depth',
helpfulness_score: 'Helpfulness',
overall_score: 'Overall'
};
this.metricDescriptions = {
comprehension_score: 'How well the response understands the prompt intent and stays on topic.',
structure_score: 'How clear, logical, and well-organized the writing is.',
prose_style_score: 'The quality of language, grammar, and adherence to the requested tone.',
creativity_score: 'The novelty of ideas and uniqueness of perspective.',
depth_score: 'The level of detail, analysis, and substance provided.',
helpfulness_score: 'How effectively the response fulfills the user\'s overall goal.',
overall_score: 'Average score across all six criteria.'
};
this.judgePaths = {
o3: {
general: 'data/all-scores.json',
complicated: 'data/complicated-writing-scores.json'
},
'gpt5.4': {
general: 'data/gpt5.4-judge-all-scores.json',
complicated: 'data/gpt5.4-judge-complicated-writing-scores.json'
}
};
this.modelLinks = {
'Monomer-24B-Writer': 'https://huggingface.co/zake7749/Monomer-24B-Writer-Preview',
'Monomer-8B-Writer': 'https://huggingface.co/zake7749/Monomer-8B-Writer-Preview'
};
this.init();
}
async init() {
this.showLoading(true);
const promises = [];
for (const judge of ['o3', 'gpt5.4']) {
for (const type of ['general', 'complicated']) {
promises.push(this.loadData(this.judgePaths[judge][type], judge, type));
}
}
await Promise.all(promises);
this.renderTable('general');
this.renderTable('complicated');
this.setupSearch();
this.setupJudgeToggle();
this.setupTabs();
this.showLoading(false);
}
async loadData(path, judge, type) {
try {
const response = await fetch(path);
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
this.judgeData[judge][type] = await response.json();
} catch (error) {
console.error(`Error loading ${judge}/${type} data:`, error);
}
}
get generalData() {
return this.judgeData[this.currentJudge].general;
}
get complicatedData() {
return this.judgeData[this.currentJudge].complicated;
}
setupJudgeToggle() {
const buttons = document.querySelectorAll('.judge-btn');
buttons.forEach(btn => {
btn.addEventListener('click', () => {
const judge = btn.dataset.judge;
if (judge === this.currentJudge) return;
this.currentJudge = judge;
buttons.forEach(b => b.classList.remove('active'));
btn.classList.add('active');
this.renderTable('general');
this.renderTable('complicated');
});
});
}
setupSearch() {
const searchInput = document.getElementById('globalSearch');
if (searchInput) {
searchInput.addEventListener('input', (e) => {
this.searchQuery = e.target.value.toLowerCase().trim();
this.renderTable('general');
this.renderTable('complicated');
});
}
}
setupTabs() {
const tabBtns = document.querySelectorAll('.tab-btn');
const tabContents = document.querySelectorAll('.tab-content');
tabBtns.forEach(btn => {
btn.addEventListener('click', () => {
const targetId = btn.dataset.target;
// Update active state of buttons
tabBtns.forEach(b => b.classList.remove('active'));
btn.classList.add('active');
// Update active state of content
tabContents.forEach(content => {
if (content.id === targetId) {
content.classList.add('active');
} else {
content.classList.remove('active');
}
});
});
});
}
renderTable(type) {
const data = type === 'general' ? this.generalData : this.complicatedData;
const sortState = type === 'general' ? this.generalSort : this.complicatedSort;
const tableContainer = document.getElementById(type === 'general' ? 'generalTable' : 'complicatedTable');
if (!data) return;
const models = Object.keys(data);
const metrics = Object.keys(data[models[0]]);
const tableHTML = `
<table>
<thead>
<tr>
<th class="sortable${sortState.column === 'model' ? ' sort-' + sortState.direction : ''}" data-type="${type}" data-column="model">Model</th>
${metrics.map(metric => `
<th class="sortable${sortState.column === metric ? ' sort-' + sortState.direction : ''}" data-type="${type}" data-column="${metric}">${this.metricDisplayNames[metric] || metric}</th>
`).join('')}
</tr>
</thead>
<tbody>
${this.getSortedTableData(data, sortState, metrics).map((row, index) => {
const isMonomer = this.modelLinks[row.model];
const isBaseline = row.model === 'gpt-4.1-2025-04-14';
let rowClass = '';
if (isMonomer) rowClass = 'highlight-row';
if (isBaseline) rowClass = 'baseline-row';
// Generate rank medal for the sorted column if it's a metric
let medalHtml = '';
if (sortState.column !== 'model' && ['desc', 'asc'].includes(sortState.direction)) {
const rank = sortState.direction === 'desc' ? index + 1 : Object.keys(data).length - index;
if (rank === 1) medalHtml = '<span class="rank-medal">🥇</span>';
else if (rank === 2) medalHtml = '<span class="rank-medal">🥈</span>';
else if (rank === 3) medalHtml = '<span class="rank-medal">🥉</span>';
}
return `
<tr class="${rowClass}">
<td class="model-cell">
${medalHtml}
${isMonomer ? `<a href="${this.modelLinks[row.model]}" target="_blank" rel="noopener" class="model-link">${row.model}</a>` : row.model}
${isBaseline ? ' (Baseline)' : ''}
</td>
${metrics.map(metric => `
<td class="score-cell" style="background-color: ${this.getHeatmapColor(row[metric], type, metric)}">${this.formatScore(row[metric])}</td>
`).join('')}
</tr>
`;
}).join('')}
</tbody>
</table>
`;
tableContainer.innerHTML = tableHTML;
this.setupTableSorting(type);
}
getSortedTableData(data, sortState, metrics) {
const models = Object.keys(data);
let tableData = models
.filter(model => {
if (!this.searchQuery) return true;
return model.toLowerCase().includes(this.searchQuery);
})
.map(model => ({
model,
...data[model]
}));
if (sortState.column) {
tableData.sort((a, b) => {
let aVal = a[sortState.column];
let bVal = b[sortState.column];
if (sortState.column === 'model') {
aVal = aVal.toLowerCase();
bVal = bVal.toLowerCase();
}
if (aVal < bVal) return sortState.direction === 'asc' ? -1 : 1;
if (aVal > bVal) return sortState.direction === 'asc' ? 1 : -1;
return 0;
});
}
return tableData;
}
formatScore(value) {
if (typeof value === 'number') {
return value.toFixed(2);
}
return value;
}
getHeatmapColor(val, type, metric) {
if (val === null || val === undefined) return 'transparent';
// Use 5.0 as the neutral midpoint since that is the baseline
// Less than 5.0: increasingly red. Greater than 5.0: increasingly green/purple.
// Let's go from 3.0 to 9.0 as the typical min/max
const minVal = 3.5;
const maxVal = 8.5;
const baseline = 5.0;
let color = 'transparent';
if (val < baseline) {
// Bad score: red hue
const intensity = Math.min(1, (baseline - val) / (baseline - minVal));
// e.g. rgba(255, 99, 132, intensity)
// But we want extremely light background colors so text remains readable
color = `rgba(239, 68, 68, ${intensity * 0.2})`;
} else if (val > baseline) {
// Good score: purple/green hue
const intensity = Math.min(1, (val - baseline) / (maxVal - baseline));
color = `rgba(16, 185, 129, ${intensity * 0.2})`;
}
return color;
}
setupTableSorting(type) {
const tableContainer = document.getElementById(type === 'general' ? 'generalTable' : 'complicatedTable');
const headers = tableContainer.querySelectorAll('th.sortable');
headers.forEach(header => {
header.addEventListener('click', () => {
const column = header.dataset.column;
this.handleSort(type, column, header);
});
});
}
handleSort(type, column, headerElement) {
const sortState = type === 'general' ? this.generalSort : this.complicatedSort;
if (sortState.column === column) {
sortState.direction = sortState.direction === 'asc' ? 'desc' : 'asc';
} else {
sortState.column = column;
sortState.direction = 'asc';
}
const tableContainer = document.getElementById(type === 'general' ? 'generalTable' : 'complicatedTable');
tableContainer.querySelectorAll('th.sortable').forEach(th => {
th.classList.remove('sort-asc', 'sort-desc');
});
headerElement.classList.add(`sort-${sortState.direction}`);
this.renderTable(type);
}
showLoading(show) {
const loading = document.getElementById('loading');
if (show) {
loading.classList.remove('hidden');
} else {
loading.classList.add('hidden');
}
}
showError(message) {
const loading = document.getElementById('loading');
loading.innerHTML = `
<div class="no-data">
<i class="fas fa-exclamation-triangle"></i>
<p>${message}</p>
</div>
`;
}
}
document.addEventListener('DOMContentLoaded', () => {
new LLMBenchmarkDashboard();
});