maxidl's picture
Upload index.html with huggingface_hub
a1fc6bd verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Eval Suite Visualization</title>
<script src="https://cdn.plot.ly/plotly-2.35.2.min.js" charset="utf-8"></script>
<style>
* { box-sizing: border-box; margin: 0; padding: 0; }
body {
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
background: #f8f9fa;
color: #1a1a2e;
padding: 24px;
}
/* ── Page header ─────────────────────────────── */
.page-header {
display: flex;
align-items: center;
justify-content: space-between;
margin-bottom: 24px;
}
.page-header h1 {
font-size: 1.5rem;
font-weight: 600;
color: #1a1a2e;
}
.btn {
padding: 8px 16px;
border: 1px solid #dee2e6;
border-radius: 6px;
background: #fff;
font-size: 0.875rem;
color: #495057;
cursor: pointer;
transition: background 0.15s;
}
.btn:hover { background: #e9ecef; }
.btn-primary {
background: #4361ee;
color: #fff;
border-color: #4361ee;
}
.btn-primary:hover { background: #3a56d4; }
.btn-sm {
padding: 4px 10px;
font-size: 0.75rem;
}
.btn-danger { color: #e63946; border-color: #e6394640; }
.btn-danger:hover { background: #e6394610; }
/* ── Panels grid ────────────────────────────── */
#panels-container {
display: grid;
grid-template-columns: repeat(2, 1fr);
gap: 20px;
}
/* ── Panel ───────────────────────────────────── */
.panel {
background: #fff;
border: 1px solid #dee2e6;
border-radius: 8px;
overflow: hidden;
}
.panel-toolbar {
display: flex;
align-items: center;
justify-content: flex-end;
gap: 6px;
padding: 6px 10px;
border-bottom: 1px solid #dee2e6;
background: #f8f9fa;
}
.panel-controls {
padding: 16px;
border-bottom: 1px solid #dee2e6;
}
.panel-controls.collapsed { display: none; }
.controls-row {
display: flex;
flex-wrap: wrap;
gap: 12px;
align-items: flex-end;
}
.controls-row + .controls-row { margin-top: 12px; }
.control-group {
display: flex;
flex-direction: column;
gap: 4px;
}
.control-group label {
font-size: 0.7rem;
font-weight: 600;
text-transform: uppercase;
letter-spacing: 0.05em;
color: #6c757d;
}
select {
padding: 6px 10px;
border: 1px solid #dee2e6;
border-radius: 6px;
background: #fff;
font-size: 0.8rem;
color: #1a1a2e;
min-width: 160px;
cursor: pointer;
}
select:focus {
outline: none;
border-color: #4361ee;
box-shadow: 0 0 0 3px rgba(67, 97, 238, 0.15);
}
/* ── Models section ──────────────────────────── */
.models-section {
margin-top: 12px;
}
.models-header {
display: flex;
align-items: center;
gap: 8px;
margin-bottom: 8px;
}
.models-header span {
font-size: 0.7rem;
font-weight: 600;
text-transform: uppercase;
letter-spacing: 0.05em;
color: #6c757d;
}
.checkbox-grid {
display: flex;
flex-wrap: wrap;
gap: 6px 16px;
}
.checkbox-item {
display: flex;
align-items: center;
gap: 5px;
cursor: pointer;
font-size: 0.8rem;
}
.checkbox-item input[type="checkbox"] {
width: 14px;
height: 14px;
cursor: pointer;
accent-color: #4361ee;
}
.checkbox-item .model-name.missing {
text-decoration: line-through;
opacity: 0.5;
cursor: help;
}
.checkbox-item .model-name.missing:hover {
opacity: 0.8;
}
.model-separator {
width: 100%;
border-top: 1px solid #eee;
margin: 4px 0;
}
/* ── Chart ───────────────────────────────────── */
.panel-chart-wrapper {
position: relative;
}
.panel-chart {
min-height: 100px;
overflow: hidden;
}
.title-hover-zone {
position: absolute;
top: 0;
left: 50px;
right: 50px;
height: 40px;
cursor: pointer;
z-index: 10;
display: flex;
align-items: center;
justify-content: center;
}
.title-info-icon {
position: absolute;
top: 50%;
transform: translateY(-50%);
width: 18px;
height: 18px;
border-radius: 50%;
background: #e9ecef;
color: #495057;
font-size: 11px;
font-weight: 600;
display: flex;
align-items: center;
justify-content: center;
opacity: 0.6;
transition: opacity 0.15s;
}
.title-hover-zone:hover .title-info-icon {
opacity: 1;
}
/* ── Resize handle ──────────────────────────── */
.panel-resize-handle {
height: 6px;
cursor: ns-resize;
background: linear-gradient(to bottom, #dee2e6 1px, transparent 1px, transparent 3px, #dee2e6 3px);
background-size: 100% 4px;
background-position: center;
transition: background-color 0.15s;
}
.panel-resize-handle:hover,
.panel-resize-handle.active {
background-color: #e9ecef;
}
.loading {
display: flex;
align-items: center;
justify-content: center;
padding: 1rem 0;
color: #adb5bd;
font-size: 0.85rem;
}
/* ── Custom tooltip ──────────────────────────── */
.custom-tooltip {
position: fixed;
pointer-events: none;
background: rgba(0, 0, 0, 0.85);
color: #fff;
padding: 8px 12px 12px;
border-radius: 4px;
font-size: 11px;
line-height: 1.5;
z-index: 9999;
display: none;
white-space: nowrap;
}
.custom-tooltip.scrollable {
pointer-events: auto;
overflow-y: auto;
white-space: normal;
min-width: 200px;
max-width: 400px;
}
/* ── Add panel button ────────────────────────── */
.add-panel-row {
display: flex;
justify-content: center;
padding: 20px;
}
/* ── Init loading ────────────────────────────── */
#init-loading {
display: flex;
align-items: center;
justify-content: center;
height: 300px;
color: #6c757d;
font-size: 1rem;
}
</style>
</head>
<body>
<div class="page-header">
<h1>Eval Suite Visualization</h1>
</div>
<div id="init-loading">Initializing DuckDB...</div>
<div id="panels-container"></div>
<div class="custom-tooltip" id="custom-tooltip"></div>
<div class="add-panel-row" id="add-panel-row" style="display:none">
<button class="btn btn-primary" id="btn-add-panel">+ Add Panel</button>
</div>
<script type="module">
import * as duckdb from 'https://cdn.jsdelivr.net/npm/@duckdb/duckdb-wasm@1.29.0/+esm';
import jsyaml from 'https://cdn.jsdelivr.net/npm/js-yaml@4.1.0/+esm';
// ── Globals ─────────────────────────────────────────────────
let db = null;
let conn = null;
let panelCounter = 0;
const panels = new Map();
// Fallback palette for models without a config color
const COLOR_PALETTE = [
'#4361ee', '#e63946', '#2a9d8f', '#e9c46a', '#f4a261',
'#264653', '#7209b7', '#06d6a0', '#ef476f', '#ff6b6b',
'#48bfe3', '#d4a017', '#b5838d', '#588157', '#9d4edd',
'#f77f00', '#3a86a7', '#8338ec', '#ff006e', '#fb5607',
];
const PARQUET_URL = 'https://huggingface.co/datasets/ellamind/eval-scores/resolve/main/scores.parquet';
// Shared model info (loaded once)
let ALL_MODELS = []; // [{model, model_display_name, is_checkpoint}]
let MODEL_COLORS = {};
let CONFIG = {}; // parsed config.yaml
// ── DuckDB init ─────────────────────────────────────────────
async function initDuckDB() {
const JSDELIVR_BUNDLES = duckdb.getJsDelivrBundles();
const bundle = await duckdb.selectBundle(JSDELIVR_BUNDLES);
const worker_url = URL.createObjectURL(
new Blob([`importScripts("${bundle.mainWorker}");`], { type: 'text/javascript' })
);
const worker = new Worker(worker_url);
const logger = new duckdb.ConsoleLogger();
db = new duckdb.AsyncDuckDB(logger, worker);
await db.instantiate(bundle.mainModule, bundle.pthreadWorker);
URL.revokeObjectURL(worker_url);
conn = await db.connect();
}
async function loadParquet() {
const response = await fetch(PARQUET_URL);
const buffer = new Uint8Array(await response.arrayBuffer());
await db.registerFileBuffer('scores.parquet', buffer);
await conn.query(`CREATE VIEW scores AS SELECT * FROM 'scores.parquet'`);
}
// ── SQL helpers ─────────────────────────────────────────────
async function query(sql) {
const result = await conn.query(sql);
return result.toArray().map(row => row.toJSON());
}
function esc(s) { return s.replace(/'/g, "''"); }
function sqlIn(vals) { return vals.map(v => `'${esc(v)}'`).join(', '); }
// ── Shared helpers ──────────────────────────────────────────
function populateSelect(el, options, selected) {
el.innerHTML = '';
options.forEach(opt => {
const o = document.createElement('option');
if (typeof opt === 'object') { o.value = opt.value; o.textContent = opt.label; }
else { o.value = opt; o.textContent = opt; }
el.appendChild(o);
});
if (selected && options.some(o => (typeof o === 'object' ? o.value : o) === selected)) {
el.value = selected;
}
}
function formatTokens(value) {
if (value >= 1e12) return (value / 1e12).toFixed(1) + 'T';
if (value >= 1e9) return (value / 1e9).toFixed(0) + 'B';
if (value >= 1e6) return (value / 1e6).toFixed(0) + 'M';
if (value >= 1e3) return (value / 1e3).toFixed(0) + 'K';
return value.toString();
}
function niceTicks(min, max, maxTicks = 8) {
if (min === max) return [min];
const range = max - min;
// Find a "nice" step size: 1, 2, 5 Γ— 10^n
const rawStep = range / maxTicks;
const mag = Math.pow(10, Math.floor(Math.log10(rawStep)));
const normalized = rawStep / mag;
let step;
if (normalized <= 1.5) step = 1 * mag;
else if (normalized <= 3.5) step = 2 * mag;
else if (normalized <= 7.5) step = 5 * mag;
else step = 10 * mag;
const start = Math.ceil(min / step) * step;
const ticks = [];
for (let v = start; v <= max; v += step) {
ticks.push(Math.round(v));
}
// Always include min/max endpoints if not already close
if (ticks.length === 0 || ticks[0] - min > step * 0.3) ticks.unshift(Math.round(min));
if (max - ticks[ticks.length - 1] > step * 0.3) ticks.push(Math.round(max));
return ticks;
}
function movingAverage(values, w) {
if (w <= 1) return values;
return values.map((_, i) => {
const n = Math.min(i + 1, w);
let sum = 0;
for (let j = i - n + 1; j <= i; j++) sum += values[j];
return sum / n;
});
}
async function loadConfig() {
try {
const resp = await fetch('config.yaml');
if (resp.ok) {
CONFIG = jsyaml.load(await resp.text()) || {};
}
} catch (e) {
console.warn('Could not load config.yaml, using defaults:', e);
}
}
async function loadModels() {
ALL_MODELS = await query(`
WITH raw AS (
SELECT DISTINCT model, model_display_name, is_checkpoint
FROM scores
),
ckpt_models AS (
SELECT model FROM raw WHERE is_checkpoint = true
)
SELECT r.model, r.model_display_name, r.is_checkpoint
FROM raw r
WHERE r.is_checkpoint = true
OR r.model NOT IN (SELECT model FROM ckpt_models)
ORDER BY r.is_checkpoint DESC, r.model_display_name
`);
// Assign colors: config overrides first, then fallback palette
const configColors = CONFIG.model_colors || {};
MODEL_COLORS = {};
let paletteIdx = 0;
ALL_MODELS.forEach(m => {
const name = m.model_display_name;
if (configColors[name]) {
MODEL_COLORS[name] = configColors[name];
} else {
MODEL_COLORS[name] = COLOR_PALETTE[paletteIdx % COLOR_PALETTE.length];
paletteIdx++;
}
});
}
// ── Panel class ─────────────────────────────────────────────
class Panel {
constructor(id) {
this.id = id;
this.el = {};
this.collapsed = false;
this.build();
}
build() {
const container = document.getElementById('panels-container');
const panel = document.createElement('div');
panel.className = 'panel';
panel.id = `panel-${this.id}`;
panel.innerHTML = `
<div class="panel-toolbar">
<button class="btn btn-sm" id="ptoggle-${this.id}">Collapse</button>
<button class="btn btn-sm" id="pexport-png-${this.id}">PNG</button>
<button class="btn btn-sm" id="pexport-svg-${this.id}">SVG</button>
<button class="btn btn-sm btn-danger" id="premove-${this.id}">Remove</button>
</div>
<div class="panel-controls" id="pcontrols-${this.id}">
<div class="controls-row">
<div class="control-group">
<label>Eval Suite</label>
<select id="psuite-${this.id}"></select>
</div>
<div class="control-group">
<label>Group</label>
<select id="pgroup-${this.id}"></select>
</div>
<div class="control-group">
<label>Task</label>
<select id="ptask-${this.id}"></select>
</div>
<div class="control-group">
<label>Metric</label>
<select id="pmetric-${this.id}"></select>
</div>
<div class="control-group">
<label>Smoothing</label>
<select id="psmooth-${this.id}">
<option value="1" selected>None</option>
<option value="2">2</option>
<option value="3">3</option>
<option value="4">4</option>
<option value="5">5</option>
</select>
</div>
<div class="control-group">
<label>Chart Type</label>
<select id="pchart-type-${this.id}">
<option value="auto" selected>Auto</option>
<option value="line">Line</option>
<option value="bar">Bar</option>
</select>
</div>
</div>
<div class="models-section">
<div class="models-header">
<span>Models</span>
<button class="btn btn-sm" id="pmodels-all-${this.id}">All</button>
<button class="btn btn-sm" id="pmodels-none-${this.id}">None</button>
<button class="btn btn-sm" id="pmodels-ckpt-${this.id}">Checkpoints</button>
<button class="btn btn-sm" id="pmodels-base-${this.id}">Baselines</button>
</div>
<div class="checkbox-grid" id="pmodels-${this.id}"></div>
</div>
</div>
<div class="panel-chart-wrapper">
<div class="title-hover-zone" id="ptitle-hover-${this.id}" style="display:none"></div>
<div class="panel-chart" id="pchart-${this.id}"></div>
</div>
<div class="panel-resize-handle" id="presize-${this.id}"></div>
`;
container.appendChild(panel);
// Cache refs
this.el.panel = panel;
this.el.controls = panel.querySelector(`#pcontrols-${this.id}`);
this.el.suite = panel.querySelector(`#psuite-${this.id}`);
this.el.group = panel.querySelector(`#pgroup-${this.id}`);
this.el.task = panel.querySelector(`#ptask-${this.id}`);
this.el.metric = panel.querySelector(`#pmetric-${this.id}`);
this.el.smooth = panel.querySelector(`#psmooth-${this.id}`);
this.el.chartType = panel.querySelector(`#pchart-type-${this.id}`);
this.el.models = panel.querySelector(`#pmodels-${this.id}`);
this.el.chart = panel.querySelector(`#pchart-${this.id}`);
this.el.titleHover = panel.querySelector(`#ptitle-hover-${this.id}`);
this.el.resize = panel.querySelector(`#presize-${this.id}`);
this.chartHeight = null; // null = use default
// Events
panel.querySelector(`#ptoggle-${this.id}`).addEventListener('click', () => this.toggleControls());
panel.querySelector(`#premove-${this.id}`).addEventListener('click', () => this.remove());
panel.querySelector(`#pexport-png-${this.id}`).addEventListener('click', () => this.export('png'));
panel.querySelector(`#pexport-svg-${this.id}`).addEventListener('click', () => this.export('svg'));
this.el.suite.addEventListener('change', () => this.onSuiteChange());
this.el.group.addEventListener('change', () => this.onGroupChange());
this.el.task.addEventListener('change', () => this.onTaskChange());
this.el.metric.addEventListener('change', () => this.renderChart());
this.el.smooth.addEventListener('change', () => this.renderChart());
this.el.chartType.addEventListener('change', () => this.renderChart());
panel.querySelector(`#pmodels-all-${this.id}`).addEventListener('click', () => this.setModels(true));
panel.querySelector(`#pmodels-none-${this.id}`).addEventListener('click', () => this.setModels(false));
panel.querySelector(`#pmodels-ckpt-${this.id}`).addEventListener('click', () => this.setModelsByType(true));
panel.querySelector(`#pmodels-base-${this.id}`).addEventListener('click', () => this.setModelsByType(false));
// Resize handle drag
this.el.resize.addEventListener('mousedown', (e) => this.startResize(e));
this.buildModelCheckboxes();
}
toggleControls() {
this.collapsed = !this.collapsed;
this.el.controls.classList.toggle('collapsed', this.collapsed);
this.el.panel.querySelector(`#ptoggle-${this.id}`).textContent =
this.collapsed ? 'Expand' : 'Collapse';
}
remove() {
this.el.panel.remove();
panels.delete(this.id);
}
buildModelCheckboxes() {
const container = this.el.models;
container.innerHTML = '';
let lastCkpt = null;
for (const m of ALL_MODELS) {
if (lastCkpt !== null && lastCkpt !== m.is_checkpoint) {
const sep = document.createElement('div');
sep.className = 'model-separator';
container.appendChild(sep);
}
lastCkpt = m.is_checkpoint;
const lbl = document.createElement('label');
lbl.className = 'checkbox-item';
const cb = document.createElement('input');
cb.type = 'checkbox';
cb.value = m.model_display_name;
cb.checked = true;
cb.dataset.isCheckpoint = m.is_checkpoint;
cb.addEventListener('change', () => this.renderChart());
const dot = document.createElement('span');
dot.style.cssText = `display:inline-block;width:9px;height:9px;border-radius:50%;background:${MODEL_COLORS[m.model_display_name]}`;
const name = document.createElement('span');
name.className = 'model-name';
name.dataset.modelName = m.model_display_name;
name.textContent = ' ' + m.model_display_name;
if (!m.is_checkpoint) {
name.style.fontStyle = 'italic';
}
name.addEventListener('mouseenter', (e) => {
const tip = name.dataset.missingTip;
if (!tip) return;
const tooltip = document.getElementById('custom-tooltip');
if (tooltip.classList.contains('scrollable')) return;
tooltip.innerHTML = tip;
tooltip.style.display = 'block';
tooltip._modelTip = true;
const rect = name.getBoundingClientRect();
tooltip.style.left = (rect.left) + 'px';
tooltip.style.top = (rect.bottom + 4) + 'px';
});
name.addEventListener('mouseleave', () => {
const tooltip = document.getElementById('custom-tooltip');
if (tooltip._modelTip) {
tooltip.style.display = 'none';
tooltip._modelTip = false;
}
});
lbl.append(cb, dot, name);
container.appendChild(lbl);
}
}
setModels(checked) {
this.el.models.querySelectorAll('input').forEach(cb => cb.checked = checked);
this.renderChart();
}
setModelsByType(isCheckpoint) {
this.el.models.querySelectorAll('input').forEach(cb => {
cb.checked = (cb.dataset.isCheckpoint === String(isCheckpoint));
});
this.renderChart();
}
getSelectedModels() {
return Array.from(this.el.models.querySelectorAll('input:checked')).map(cb => cb.value);
}
getSmoothing() {
return parseInt(this.el.smooth.value, 10) || 1;
}
getChartType() {
return this.el.chartType.value;
}
getSelectedTask() {
const v = this.el.task.value;
return v === '__group__' ? this.el.group.value : v;
}
// ── Populate cascades ──────────────────────────────────────
async populateSuites(defaults) {
const rows = await query(`
SELECT DISTINCT task AS value, task_display_name AS label
FROM scores
WHERE task_type = 'eval_suite' AND task != 'test_fix'
ORDER BY task
`);
populateSelect(this.el.suite, rows, defaults?.suite);
await this.onSuiteChange(defaults);
}
async onSuiteChange(defaults) {
const suite = this.el.suite.value;
if (!suite) return;
const rows = await query(`
SELECT DISTINCT task AS value, task_display_name AS label
FROM scores
WHERE parent_task = '${esc(suite)}'
AND task_type = 'task_group'
ORDER BY task
`);
const options = [
{ value: suite, label: `${suite} (aggregate)` },
...rows,
];
populateSelect(this.el.group, options, defaults?.group);
await this.onGroupChange(defaults);
}
async onGroupChange(defaults) {
const group = this.el.group.value;
if (!group) return;
const rows = await query(`
SELECT DISTINCT task AS value, task_display_name AS label
FROM scores
WHERE parent_task = '${esc(group)}'
AND task_type = 'benchmark'
ORDER BY task
`);
if (rows.length === 0) {
populateSelect(this.el.task, [{ value: '__group__', label: '(aggregate)' }]);
} else {
populateSelect(this.el.task, [
{ value: '__group__', label: `(aggregate: ${group})` },
...rows,
]);
}
if (defaults?.task) this.el.task.value = defaults.task;
await this.onTaskChange(defaults);
}
async onTaskChange(defaults) {
const task = this.getSelectedTask();
if (!task) return;
const rows = await query(`
SELECT DISTINCT metric FROM scores WHERE task = '${esc(task)}' ORDER BY metric
`);
const prev = defaults?.metric || this.el.metric.value;
populateSelect(this.el.metric, rows.map(r => r.metric), prev);
if (defaults?.chartType) this.el.chartType.value = defaults.chartType;
await this.renderChart();
}
async updateMissingModels(task, metric) {
const nameEls = this.el.models.querySelectorAll('.model-name');
if (!task || !metric) {
nameEls.forEach(el => {
el.classList.remove('missing');
delete el.dataset.missingTip;
});
return;
}
const available = await query(`
SELECT DISTINCT model_display_name FROM scores
WHERE task = '${esc(task)}' AND metric = '${esc(metric)}'
AND tokens_trained IS NOT NULL
`);
const availableSet = new Set(available.map(r => r.model_display_name));
nameEls.forEach(el => {
const modelName = el.dataset.modelName;
if (!availableSet.has(modelName)) {
el.classList.add('missing');
el.dataset.missingTip = `No scores for "${modelName}" on this task / metric`;
} else {
el.classList.remove('missing');
delete el.dataset.missingTip;
}
});
}
// ── Chart rendering ────────────────────────────────────────
async renderChart() {
const task = this.getSelectedTask();
const metric = this.el.metric.value;
const models = this.getSelectedModels();
await this.updateMissingModels(task, metric);
if (!task || !metric || models.length === 0) {
this.el.chart.innerHTML = '';
return;
}
const rows = await query(`
SELECT model, model_display_name, tokens_trained, score, score_stderr,
is_checkpoint, higher_is_better, step
FROM scores
WHERE task = '${esc(task)}'
AND metric = '${esc(metric)}'
AND model_display_name IN (${sqlIn(models)})
AND tokens_trained IS NOT NULL
ORDER BY model_display_name, tokens_trained
`);
if (rows.length === 0) {
this.el.chart.innerHTML = '<div class="loading">No data for this selection</div>';
return;
}
// Merge final checkpoints (step=null) into matching checkpoint series
const mergedRows = this.mergeFinalCheckpoints(rows);
// Determine chart type
const chartType = this.resolveChartType(mergedRows);
const higherIsBetter = mergedRows[0]?.higher_is_better;
// Fetch subtask tree JSON from the data
let subtaskTree = null;
try {
const stRows = await query(`
SELECT subtask_tree FROM scores
WHERE task = '${esc(task)}' AND metric = '${esc(metric)}'
AND subtask_tree IS NOT NULL
LIMIT 1
`);
if (stRows.length > 0 && stRows[0].subtask_tree) {
subtaskTree = JSON.parse(stRows[0].subtask_tree);
}
} catch (e) {
// ignore
}
if (chartType === 'bar') {
this.drawBarChart(mergedRows, task, metric, higherIsBetter, subtaskTree);
} else {
this.drawLineChart(mergedRows, task, metric, higherIsBetter, subtaskTree);
}
}
mergeFinalCheckpoints(rows) {
// Final checkpoints have step=null. If a matching checkpoint series
// exists (same `model` id), append the final checkpoint to that series.
const regular = [];
const finals = [];
for (const r of rows) {
if (r.step === null || r.step === undefined) {
finals.push(r);
} else {
regular.push(r);
}
}
if (finals.length === 0) return rows;
// Map model id -> series model_display_name for checkpoint series
const modelToSeries = {};
for (const r of regular) {
if (r.is_checkpoint) {
modelToSeries[r.model] = r.model_display_name;
}
}
const result = [...regular];
for (const fc of finals) {
const seriesName = modelToSeries[fc.model];
if (seriesName) {
// Append to matching checkpoint series
result.push({ ...fc, model_display_name: seriesName, is_checkpoint: true });
} else {
// No matching series, keep as-is
result.push(fc);
}
}
return result;
}
resolveChartType(rows) {
const pref = this.getChartType();
if (pref !== 'auto') return pref;
// Auto-detect: if every model has <= 1 unique tokens_trained, use bar
const byModel = {};
for (const r of rows) {
if (!byModel[r.model_display_name]) byModel[r.model_display_name] = new Set();
byModel[r.model_display_name].add(Number(r.tokens_trained));
}
const allSingle = Object.values(byModel).every(s => s.size <= 1);
return allSingle ? 'bar' : 'line';
}
formatChartTitle(task, metric, higherIsBetter) {
const arrow = higherIsBetter === true ? ' \u2191' : higherIsBetter === false ? ' \u2193' : '';
return `${task} \u2014 ${metric}${arrow}`;
}
renderSubtaskTree(map, keys, depth = 0) {
if (!keys || keys.length === 0) return '';
const indent = depth * 16;
return keys.map(key => {
const children = map[key];
let html = `<div style="padding-left:${indent}px">${key}</div>`;
if (children) {
html += this.renderSubtaskTree(map, children, depth + 1);
}
return html;
}).join('');
}
setupTitleTooltip(subtaskTree) {
const hoverZone = this.el.titleHover;
hoverZone.innerHTML = '';
if (!subtaskTree || typeof subtaskTree !== 'object' || Object.keys(subtaskTree).length === 0) {
hoverZone.style.display = 'none';
return;
}
hoverZone.style.display = '';
// Position icon right before the title text
const icon = document.createElement('span');
icon.className = 'title-info-icon';
icon.textContent = 'i';
hoverZone.appendChild(icon);
const titleEl = this.el.chart.querySelector('.gtitle');
if (titleEl) {
const wrapperRect = this.el.chart.closest('.panel-chart-wrapper').getBoundingClientRect();
const titleRect = titleEl.getBoundingClientRect();
icon.style.left = (titleRect.right - wrapperRect.left - 50 + 6) + 'px'; // 50 = hover zone left offset, 6 = gap
} else {
icon.style.right = '0px';
}
const tooltip = document.getElementById('custom-tooltip');
// Find true roots: keys that never appear as a child value
const allChildren = new Set(Object.values(subtaskTree).flat());
const rootKeys = Object.keys(subtaskTree).filter(k => !allChildren.has(k));
const html = this.renderSubtaskTree(subtaskTree, rootKeys);
const positionTooltip = () => {
const titleEl = this.el.chart.querySelector('.gtitle');
const chartRect = this.el.chart.getBoundingClientRect();
const tw = tooltip.offsetWidth;
let tipTop;
if (titleEl) {
const titleRect = titleEl.getBoundingClientRect();
const titleCenter = (titleRect.left + titleRect.right) / 2;
tooltip.style.left = (titleCenter - tw / 2) + 'px';
tipTop = titleRect.bottom + 4;
} else {
tooltip.style.left = (chartRect.left + chartRect.width / 2 - tw / 2) + 'px';
tipTop = chartRect.top + 40;
}
tooltip.style.top = tipTop + 'px';
tooltip.style.maxHeight = Math.max(0, chartRect.bottom - tipTop) + 'px';
};
this._titleClick = (e) => {
// Toggle: if already visible for this panel, hide it
if (tooltip.style.display === 'block' && tooltip._panelId === this.id) {
tooltip.style.display = 'none';
tooltip.classList.remove('scrollable');
tooltip._panelId = null;
window.removeEventListener('scroll', this._titleScroll, true);
return;
}
tooltip.innerHTML = html;
tooltip.classList.add('scrollable');
tooltip.style.display = 'block';
tooltip._panelId = this.id;
positionTooltip();
window.addEventListener('scroll', this._titleScroll, true);
};
this._titleScroll = () => {
if (tooltip.style.display === 'block' && tooltip._panelId === this.id) {
positionTooltip();
}
};
this._titleOutsideClick = (e) => {
if (tooltip._panelId !== this.id) return;
if (tooltip.contains(e.target) || hoverZone.contains(e.target)) return;
tooltip.style.display = 'none';
tooltip.classList.remove('scrollable');
tooltip._panelId = null;
window.removeEventListener('scroll', this._titleScroll, true);
};
hoverZone.addEventListener('click', this._titleClick);
document.addEventListener('mousedown', this._titleOutsideClick);
}
startResize(e) {
e.preventDefault();
const startY = e.clientY;
const startH = this.el.chart.offsetHeight;
this.el.resize.classList.add('active');
const onMove = (ev) => {
const delta = ev.clientY - startY;
const newH = Math.max(200, startH + delta);
this.chartHeight = newH;
Plotly.relayout(this.el.chart, { height: newH });
};
const onUp = () => {
this.el.resize.classList.remove('active');
document.removeEventListener('mousemove', onMove);
document.removeEventListener('mouseup', onUp);
};
document.addEventListener('mousemove', onMove);
document.addEventListener('mouseup', onUp);
}
getChartHeight(fallback) {
return this.chartHeight || fallback;
}
cleanupTooltip() {
const tooltip = document.getElementById('custom-tooltip');
tooltip.style.display = 'none';
const chart = this.el.chart;
chart.removeAllListeners?.('plotly_hover');
chart.removeAllListeners?.('plotly_unhover');
if (this._tooltipMouseMove) {
chart.removeEventListener('mousemove', this._tooltipMouseMove);
this._tooltipMouseMove = null;
}
if (this._tooltipMouseLeave) {
chart.removeEventListener('mouseleave', this._tooltipMouseLeave);
this._tooltipMouseLeave = null;
}
// Clean up title click popup
if (this._titleClick) {
const hz = this.el.titleHover;
hz.removeEventListener('click', this._titleClick);
hz.style.display = 'none';
this._titleClick = null;
}
if (this._titleScroll) {
window.removeEventListener('scroll', this._titleScroll, true);
this._titleScroll = null;
}
if (this._titleOutsideClick) {
document.removeEventListener('mousedown', this._titleOutsideClick);
this._titleOutsideClick = null;
}
if (tooltip._panelId === this.id) {
tooltip.classList.remove('scrollable');
tooltip._panelId = null;
}
}
drawLineChart(rows, task, metric, higherIsBetter, subtasks) {
this.cleanupTooltip();
const w = this.getSmoothing();
// Group by model
const byModel = {};
for (const r of rows) {
const name = r.model_display_name;
if (!byModel[name]) byModel[name] = { points: [], isCheckpoint: r.is_checkpoint };
byModel[name].points.push({ x: Number(r.tokens_trained), y: r.score });
}
for (const d of Object.values(byModel)) d.points.sort((a, b) => a.x - b.x);
// X range for baselines
let xMin = Infinity, xMax = -Infinity;
for (const d of Object.values(byModel)) {
if (d.isCheckpoint) {
for (const p of d.points) {
xMin = Math.min(xMin, p.x);
xMax = Math.max(xMax, p.x);
}
}
}
if (!isFinite(xMin)) { xMin = 0; xMax = 1; }
const traces = [];
for (const [name, d] of Object.entries(byModel)) {
const color = MODEL_COLORS[name] || '#999';
if (d.isCheckpoint && d.points.length > 1) {
traces.push({
x: d.points.map(p => p.x),
y: movingAverage(d.points.map(p => p.y), w),
name, mode: 'lines+markers',
line: { color, width: 2 }, marker: { size: 5 },
});
} else {
const score = d.points[0]?.y;
if (score != null) {
traces.push({
x: [xMin, xMax], y: [score, score],
name, mode: 'lines',
line: { color, width: 2, dash: 'dash' },
});
}
}
}
// Compute nice tick values from data range
const tickVals = niceTicks(xMin, xMax);
Plotly.react(this.el.chart, traces, {
title: { text: this.formatChartTitle(task, metric, higherIsBetter), font: { size: 14, color: '#1a1a2e' } },
hoverlabel: { namelength: -1 },
xaxis: {
title: { text: 'Tokens Trained', font: { size: 12 } },
tickfont: { size: 10 }, tickvals: tickVals, ticktext: tickVals.map(formatTokens),
gridcolor: '#e9ecef', zeroline: false,
},
yaxis: {
title: { text: 'Score', font: { size: 12 } },
tickfont: { size: 10 }, gridcolor: '#e9ecef', zeroline: false, autorange: true,
},
legend: { orientation: 'h', yanchor: 'bottom', y: 1.05, x: 0, font: { size: 11 } },
margin: { t: 80, r: 20, b: 70, l: 50 },
plot_bgcolor: '#fff', paper_bgcolor: '#fff',
font: { family: '-apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif' },
height: this.getChartHeight(600),
}, { responsive: true });
this.setupTitleTooltip(subtasks);
}
drawBarChart(rows, task, metric, higherIsBetter, subtasks) {
this.cleanupTooltip();
// For bar chart, use latest checkpoint per model
const byModel = {};
for (const r of rows) {
const name = r.model_display_name;
const tokens = Number(r.tokens_trained);
if (!byModel[name] || tokens > byModel[name].tokens) {
byModel[name] = { score: r.score, tokens, isCheckpoint: r.is_checkpoint };
}
}
// Sort by score
const sorted = Object.entries(byModel)
.sort((a, b) => higherIsBetter !== false ? b[1].score - a[1].score : a[1].score - b[1].score);
const names = sorted.map(([n]) => n);
const scores = sorted.map(([, d]) => d.score);
const colors = sorted.map(([n]) => MODEL_COLORS[n] || '#999');
const tokens = sorted.map(([, d]) => formatTokens(d.tokens));
const hovertext = sorted.map(([n, d]) =>
`${n}<br>Score: ${d.score.toFixed(4)}<br>Tokens: ${formatTokens(d.tokens)}`
);
// Annotations for tokens trained at the start of each bar
const annotations = names.map((name, i) => ({
x: 0,
y: name,
text: tokens[i],
hovertext: 'Tokens Trained',
xanchor: 'left',
yanchor: 'middle',
showarrow: false,
font: { size: 10, color: '#000' },
xshift: 4,
}));
Plotly.react(this.el.chart, [{
type: 'bar',
orientation: 'h',
y: names,
x: scores,
marker: { color: colors },
text: scores.map(s => s.toFixed(4)),
textposition: 'outside',
textfont: { size: 11 },
hoverinfo: 'none',
customdata: hovertext,
}], {
title: { text: this.formatChartTitle(task, metric, higherIsBetter), font: { size: 14, color: '#1a1a2e' } },
hovermode: 'closest',
annotations,
xaxis: {
title: { text: 'Score', font: { size: 12 } },
tickfont: { size: 10 }, gridcolor: '#e9ecef', zeroline: false,
},
yaxis: {
tickfont: { size: 11 }, automargin: true,
categoryorder: 'array', categoryarray: names.slice().reverse(),
},
margin: { t: 60, r: 80, b: 60, l: 10 },
plot_bgcolor: '#fff', paper_bgcolor: '#fff',
font: { family: '-apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif' },
height: this.getChartHeight(Math.max(400, names.length * 40 + 100)),
showlegend: false,
}, { responsive: true });
// Custom cursor-following tooltip
const tooltip = document.getElementById('custom-tooltip');
const chart = this.el.chart;
chart.on('plotly_hover', (data) => {
if (tooltip.classList.contains('scrollable')) return;
const pt = data.points[0];
tooltip.innerHTML = pt.customdata;
tooltip.style.display = 'block';
});
chart.on('plotly_unhover', () => {
if (tooltip.classList.contains('scrollable')) return;
tooltip.style.display = 'none';
});
this._tooltipMouseMove = (e) => {
if (tooltip.classList.contains('scrollable')) return;
if (tooltip.style.display === 'block') {
tooltip.style.left = (e.clientX + 12) + 'px';
tooltip.style.top = (e.clientY - 10) + 'px';
}
};
this._tooltipMouseLeave = () => {
if (tooltip.classList.contains('scrollable')) return;
tooltip.style.display = 'none';
};
chart.addEventListener('mousemove', this._tooltipMouseMove);
chart.addEventListener('mouseleave', this._tooltipMouseLeave);
this.setupTitleTooltip(subtasks);
}
export(format) {
const task = this.getSelectedTask();
const metric = this.el.metric.value;
let filename = `${task}_${metric}`.replace(/[^a-zA-Z0-9_-]/g, '_');
Plotly.downloadImage(this.el.chart, { format, scale: 3, filename });
}
}
// ── Panel management ────────────────────────────────────────
async function addPanel(defaults) {
const id = panelCounter++;
const panel = new Panel(id);
panels.set(id, panel);
await panel.populateSuites(defaults);
return panel;
}
// ── Init ────────────────────────────────────────────────────
const elInitLoading = document.getElementById('init-loading');
const elAddPanelRow = document.getElementById('add-panel-row');
async function init() {
try {
elInitLoading.textContent = 'Loading config...';
await loadConfig();
elInitLoading.textContent = 'Initializing DuckDB...';
await initDuckDB();
elInitLoading.textContent = 'Loading data from HuggingFace...';
await loadParquet();
elInitLoading.textContent = 'Loading models...';
await loadModels();
elInitLoading.style.display = 'none';
elAddPanelRow.style.display = '';
// Create 2x2 default panels
await Promise.all([
addPanel({ suite: 'eng_base_easy', group: 'eng_base_easy_bpb', metric: 'bits_per_byte', chartType: 'bar' }),
addPanel({ suite: 'deu_base_easy', group: 'deu_base_easy_bpb', metric: 'bits_per_byte', chartType: 'bar' }),
addPanel({ suite: 'eng_base_easy', group: 'eng_base_easy_rc', metric: 'acc_norm', chartType: 'bar' }),
addPanel({ suite: 'deu_base_easy', group: 'deu_base_easy_rc', metric: 'acc_norm', chartType: 'bar' }),
]);
} catch (err) {
elInitLoading.innerHTML = `<span style="color:#e63946">
Error: ${err.message}<br>
<small>Check browser console for details.</small>
</span>`;
console.error('Init failed:', err);
}
}
document.getElementById('btn-add-panel').addEventListener('click', () => addPanel());
init();
</script>
</body>
</html>