Upload index.html with huggingface_hub
Browse files- index.html +102 -7
index.html
CHANGED
|
@@ -148,6 +148,14 @@
|
|
| 148 |
cursor: pointer;
|
| 149 |
accent-color: #4361ee;
|
| 150 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
.model-separator {
|
| 152 |
width: 100%;
|
| 153 |
border-top: 1px solid #eee;
|
|
@@ -572,12 +580,31 @@
|
|
| 572 |
dot.style.cssText = `display:inline-block;width:9px;height:9px;border-radius:50%;background:${MODEL_COLORS[m.model_display_name]}`;
|
| 573 |
|
| 574 |
const name = document.createElement('span');
|
|
|
|
|
|
|
| 575 |
name.textContent = ' ' + m.model_display_name;
|
| 576 |
if (!m.is_checkpoint) {
|
| 577 |
name.style.fontStyle = 'italic';
|
| 578 |
-
name.title = 'Baseline';
|
| 579 |
}
|
| 580 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 581 |
lbl.append(cb, dot, name);
|
| 582 |
container.appendChild(lbl);
|
| 583 |
}
|
|
@@ -680,20 +707,49 @@
|
|
| 680 |
await this.renderChart();
|
| 681 |
}
|
| 682 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 683 |
// ββ Chart rendering ββββββββββββββββββββββββββββββββββββββββ
|
| 684 |
async renderChart() {
|
| 685 |
const task = this.getSelectedTask();
|
| 686 |
const metric = this.el.metric.value;
|
| 687 |
const models = this.getSelectedModels();
|
| 688 |
|
|
|
|
|
|
|
| 689 |
if (!task || !metric || models.length === 0) {
|
| 690 |
this.el.chart.innerHTML = '';
|
| 691 |
return;
|
| 692 |
}
|
| 693 |
|
| 694 |
const rows = await query(`
|
| 695 |
-
SELECT model_display_name, tokens_trained, score, score_stderr,
|
| 696 |
-
is_checkpoint, higher_is_better
|
| 697 |
FROM scores
|
| 698 |
WHERE task = '${esc(task)}'
|
| 699 |
AND metric = '${esc(metric)}'
|
|
@@ -707,9 +763,12 @@
|
|
| 707 |
return;
|
| 708 |
}
|
| 709 |
|
|
|
|
|
|
|
|
|
|
| 710 |
// Determine chart type
|
| 711 |
-
const chartType = this.resolveChartType(
|
| 712 |
-
const higherIsBetter =
|
| 713 |
|
| 714 |
// Fetch subtask tree JSON from the data
|
| 715 |
let subtaskTree = null;
|
|
@@ -728,10 +787,46 @@
|
|
| 728 |
}
|
| 729 |
|
| 730 |
if (chartType === 'bar') {
|
| 731 |
-
this.drawBarChart(
|
| 732 |
} else {
|
| 733 |
-
this.drawLineChart(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 734 |
}
|
|
|
|
| 735 |
}
|
| 736 |
|
| 737 |
resolveChartType(rows) {
|
|
|
|
| 148 |
cursor: pointer;
|
| 149 |
accent-color: #4361ee;
|
| 150 |
}
|
| 151 |
+
.checkbox-item .model-name.missing {
|
| 152 |
+
text-decoration: line-through;
|
| 153 |
+
opacity: 0.5;
|
| 154 |
+
cursor: help;
|
| 155 |
+
}
|
| 156 |
+
.checkbox-item .model-name.missing:hover {
|
| 157 |
+
opacity: 0.8;
|
| 158 |
+
}
|
| 159 |
.model-separator {
|
| 160 |
width: 100%;
|
| 161 |
border-top: 1px solid #eee;
|
|
|
|
| 580 |
dot.style.cssText = `display:inline-block;width:9px;height:9px;border-radius:50%;background:${MODEL_COLORS[m.model_display_name]}`;
|
| 581 |
|
| 582 |
const name = document.createElement('span');
|
| 583 |
+
name.className = 'model-name';
|
| 584 |
+
name.dataset.modelName = m.model_display_name;
|
| 585 |
name.textContent = ' ' + m.model_display_name;
|
| 586 |
if (!m.is_checkpoint) {
|
| 587 |
name.style.fontStyle = 'italic';
|
|
|
|
| 588 |
}
|
| 589 |
|
| 590 |
+
name.addEventListener('mouseenter', (e) => {
|
| 591 |
+
const tip = name.dataset.missingTip;
|
| 592 |
+
if (!tip) return;
|
| 593 |
+
const tooltip = document.getElementById('custom-tooltip');
|
| 594 |
+
tooltip.textContent = tip;
|
| 595 |
+
tooltip.classList.remove('scrollable');
|
| 596 |
+
tooltip.style.display = 'block';
|
| 597 |
+
const rect = name.getBoundingClientRect();
|
| 598 |
+
tooltip.style.left = (rect.left) + 'px';
|
| 599 |
+
tooltip.style.top = (rect.bottom + 4) + 'px';
|
| 600 |
+
});
|
| 601 |
+
name.addEventListener('mouseleave', () => {
|
| 602 |
+
const tooltip = document.getElementById('custom-tooltip');
|
| 603 |
+
if (!tooltip.classList.contains('scrollable')) {
|
| 604 |
+
tooltip.style.display = 'none';
|
| 605 |
+
}
|
| 606 |
+
});
|
| 607 |
+
|
| 608 |
lbl.append(cb, dot, name);
|
| 609 |
container.appendChild(lbl);
|
| 610 |
}
|
|
|
|
| 707 |
await this.renderChart();
|
| 708 |
}
|
| 709 |
|
| 710 |
+
async updateMissingModels(task, metric) {
|
| 711 |
+
const nameEls = this.el.models.querySelectorAll('.model-name');
|
| 712 |
+
if (!task || !metric) {
|
| 713 |
+
nameEls.forEach(el => {
|
| 714 |
+
el.classList.remove('missing');
|
| 715 |
+
delete el.dataset.missingTip;
|
| 716 |
+
});
|
| 717 |
+
return;
|
| 718 |
+
}
|
| 719 |
+
const available = await query(`
|
| 720 |
+
SELECT DISTINCT model_display_name FROM scores
|
| 721 |
+
WHERE task = '${esc(task)}' AND metric = '${esc(metric)}'
|
| 722 |
+
AND tokens_trained IS NOT NULL
|
| 723 |
+
`);
|
| 724 |
+
const availableSet = new Set(available.map(r => r.model_display_name));
|
| 725 |
+
nameEls.forEach(el => {
|
| 726 |
+
const modelName = el.dataset.modelName;
|
| 727 |
+
if (!availableSet.has(modelName)) {
|
| 728 |
+
el.classList.add('missing');
|
| 729 |
+
el.dataset.missingTip = `No scores for "${modelName}" on this task / metric`;
|
| 730 |
+
} else {
|
| 731 |
+
el.classList.remove('missing');
|
| 732 |
+
delete el.dataset.missingTip;
|
| 733 |
+
}
|
| 734 |
+
});
|
| 735 |
+
}
|
| 736 |
+
|
| 737 |
// ββ Chart rendering ββββββββββββββββββββββββββββββββββββββββ
|
| 738 |
async renderChart() {
|
| 739 |
const task = this.getSelectedTask();
|
| 740 |
const metric = this.el.metric.value;
|
| 741 |
const models = this.getSelectedModels();
|
| 742 |
|
| 743 |
+
await this.updateMissingModels(task, metric);
|
| 744 |
+
|
| 745 |
if (!task || !metric || models.length === 0) {
|
| 746 |
this.el.chart.innerHTML = '';
|
| 747 |
return;
|
| 748 |
}
|
| 749 |
|
| 750 |
const rows = await query(`
|
| 751 |
+
SELECT model, model_display_name, tokens_trained, score, score_stderr,
|
| 752 |
+
is_checkpoint, higher_is_better, step
|
| 753 |
FROM scores
|
| 754 |
WHERE task = '${esc(task)}'
|
| 755 |
AND metric = '${esc(metric)}'
|
|
|
|
| 763 |
return;
|
| 764 |
}
|
| 765 |
|
| 766 |
+
// Merge final checkpoints (step=null) into matching checkpoint series
|
| 767 |
+
const mergedRows = this.mergeFinalCheckpoints(rows);
|
| 768 |
+
|
| 769 |
// Determine chart type
|
| 770 |
+
const chartType = this.resolveChartType(mergedRows);
|
| 771 |
+
const higherIsBetter = mergedRows[0]?.higher_is_better;
|
| 772 |
|
| 773 |
// Fetch subtask tree JSON from the data
|
| 774 |
let subtaskTree = null;
|
|
|
|
| 787 |
}
|
| 788 |
|
| 789 |
if (chartType === 'bar') {
|
| 790 |
+
this.drawBarChart(mergedRows, task, metric, higherIsBetter, subtaskTree);
|
| 791 |
} else {
|
| 792 |
+
this.drawLineChart(mergedRows, task, metric, higherIsBetter, subtaskTree);
|
| 793 |
+
}
|
| 794 |
+
}
|
| 795 |
+
|
| 796 |
+
mergeFinalCheckpoints(rows) {
|
| 797 |
+
// Final checkpoints have step=null. If a matching checkpoint series
|
| 798 |
+
// exists (same `model` id), append the final checkpoint to that series.
|
| 799 |
+
const regular = [];
|
| 800 |
+
const finals = [];
|
| 801 |
+
for (const r of rows) {
|
| 802 |
+
if (r.step === null || r.step === undefined) {
|
| 803 |
+
finals.push(r);
|
| 804 |
+
} else {
|
| 805 |
+
regular.push(r);
|
| 806 |
+
}
|
| 807 |
+
}
|
| 808 |
+
if (finals.length === 0) return rows;
|
| 809 |
+
|
| 810 |
+
// Map model id -> series model_display_name for checkpoint series
|
| 811 |
+
const modelToSeries = {};
|
| 812 |
+
for (const r of regular) {
|
| 813 |
+
if (r.is_checkpoint) {
|
| 814 |
+
modelToSeries[r.model] = r.model_display_name;
|
| 815 |
+
}
|
| 816 |
+
}
|
| 817 |
+
|
| 818 |
+
const result = [...regular];
|
| 819 |
+
for (const fc of finals) {
|
| 820 |
+
const seriesName = modelToSeries[fc.model];
|
| 821 |
+
if (seriesName) {
|
| 822 |
+
// Append to matching checkpoint series
|
| 823 |
+
result.push({ ...fc, model_display_name: seriesName, is_checkpoint: true });
|
| 824 |
+
} else {
|
| 825 |
+
// No matching series, keep as-is
|
| 826 |
+
result.push(fc);
|
| 827 |
+
}
|
| 828 |
}
|
| 829 |
+
return result;
|
| 830 |
}
|
| 831 |
|
| 832 |
resolveChartType(rows) {
|