Spaces:
Running
Running
Update index.html
Browse files- index.html +42 -93
index.html
CHANGED
|
@@ -230,7 +230,7 @@
|
|
| 230 |
.chart-card {
|
| 231 |
border-radius: var(--radius);
|
| 232 |
padding: 18px 18px 12px;
|
| 233 |
-
min-height:
|
| 234 |
overflow: hidden;
|
| 235 |
}
|
| 236 |
|
|
@@ -293,7 +293,7 @@
|
|
| 293 |
|
| 294 |
.canvas-wrap {
|
| 295 |
position: relative;
|
| 296 |
-
height:
|
| 297 |
}
|
| 298 |
|
| 299 |
canvas {
|
|
@@ -343,64 +343,17 @@
|
|
| 343 |
font-weight: 700;
|
| 344 |
}
|
| 345 |
|
| 346 |
-
.cta-grid {
|
| 347 |
-
display: grid;
|
| 348 |
-
grid-template-columns: repeat(2, minmax(0, 1fr));
|
| 349 |
-
gap: 14px;
|
| 350 |
-
margin-top: 14px;
|
| 351 |
-
}
|
| 352 |
-
|
| 353 |
-
.cta-card {
|
| 354 |
-
display: block;
|
| 355 |
-
text-decoration: none;
|
| 356 |
-
border: 1px solid rgba(255,255,255,0.10);
|
| 357 |
-
border-radius: 20px;
|
| 358 |
-
background: linear-gradient(180deg, rgba(255,255,255,0.04), rgba(255,255,255,0.02));
|
| 359 |
-
padding: 16px;
|
| 360 |
-
color: var(--text);
|
| 361 |
-
transition: transform 0.16s ease, border-color 0.16s ease, background 0.16s ease;
|
| 362 |
-
}
|
| 363 |
-
|
| 364 |
-
.cta-card:hover {
|
| 365 |
-
transform: translateY(-1px);
|
| 366 |
-
border-color: rgba(255,255,255,0.22);
|
| 367 |
-
background: linear-gradient(180deg, rgba(255,255,255,0.06), rgba(255,255,255,0.03));
|
| 368 |
-
}
|
| 369 |
-
|
| 370 |
-
.cta-kicker {
|
| 371 |
-
color: var(--muted2);
|
| 372 |
-
font-size: 11px;
|
| 373 |
-
text-transform: uppercase;
|
| 374 |
-
letter-spacing: 0.14em;
|
| 375 |
-
margin-bottom: 8px;
|
| 376 |
-
}
|
| 377 |
-
|
| 378 |
-
.cta-title {
|
| 379 |
-
color: var(--silver);
|
| 380 |
-
font-size: 16px;
|
| 381 |
-
font-weight: 800;
|
| 382 |
-
line-height: 1.35;
|
| 383 |
-
margin: 0 0 6px;
|
| 384 |
-
}
|
| 385 |
-
|
| 386 |
-
.cta-text {
|
| 387 |
-
color: var(--muted);
|
| 388 |
-
font-size: 13px;
|
| 389 |
-
line-height: 1.6;
|
| 390 |
-
margin: 0;
|
| 391 |
-
}
|
| 392 |
-
|
| 393 |
@media (max-width: 1120px) {
|
| 394 |
.layout { grid-template-columns: 1fr; }
|
| 395 |
.panel { position: static; }
|
| 396 |
-
.canvas-wrap { height:
|
| 397 |
.info-grid { grid-template-columns: repeat(3, minmax(0, 1fr)); }
|
| 398 |
}
|
| 399 |
|
| 400 |
@media (max-width: 760px) {
|
| 401 |
body { padding: 16px; }
|
| 402 |
.hero, .panel, .chart-card, .info-card { border-radius: 18px; }
|
| 403 |
-
.canvas-wrap { height:
|
| 404 |
.stat-grid { grid-template-columns: 1fr; }
|
| 405 |
.info-grid { grid-template-columns: 1fr; }
|
| 406 |
}
|
|
@@ -462,7 +415,7 @@
|
|
| 462 |
<p class="chart-sub" id="chartSub">Silver regression line over the visible models.</p>
|
| 463 |
<div class="callout">
|
| 464 |
<span class="dot"></span>
|
| 465 |
-
<span>Hover a point for details. Click a point to open the model page.</span>
|
| 466 |
</div>
|
| 467 |
</div>
|
| 468 |
<div class="badge" id="countBadge">0 models</div>
|
|
@@ -496,19 +449,6 @@
|
|
| 496 |
Residuals are measured in the same score units shown on the chart, after converting scores to percentages. <strong>RMSE</strong> is the square root of mean squared error, and <strong>R²</strong> describes how much of the score variation is explained by the line.
|
| 497 |
These metrics describe the fit of this subset, not model quality in general.
|
| 498 |
</section>
|
| 499 |
-
|
| 500 |
-
<section class="cta-grid">
|
| 501 |
-
<a class="cta-card" href="https://huggingface.co/spaces/fromziro/SLM_Regression_Line/discussions" target="_blank" rel="noopener noreferrer">
|
| 502 |
-
<div class="cta-kicker">Contribute</div>
|
| 503 |
-
<h3 class="cta-title">Want to add your model? Open a discussion or PR.</h3>
|
| 504 |
-
<p class="cta-text">Use the discussions tab to share a model that is not listed yet, or to suggest an update.</p>
|
| 505 |
-
</a>
|
| 506 |
-
<a class="cta-card" href="https://huggingface.co/spaces/AxiomicLabs/Open_SLM_Leaderboard" target="_blank" rel="noopener noreferrer">
|
| 507 |
-
<div class="cta-kicker">Deeper analysis</div>
|
| 508 |
-
<h3 class="cta-title">Need a more detailed breakdown of your model?</h3>
|
| 509 |
-
<p class="cta-text">Open the companion analysis space for a richer, more comprehensive view.</p>
|
| 510 |
-
</a>
|
| 511 |
-
</section>
|
| 512 |
</main>
|
| 513 |
</div>
|
| 514 |
</div>
|
|
@@ -615,8 +555,8 @@
|
|
| 615 |
{ name: 'PicoKittens/PicoStories-853K', org: 'PicoKittens', params: 853120, arc_easy: 0.255050505050505, arc_challenge: 0.22013651877133106, piqa: 0.5119695321001088, hellaswag: 0.26488747261501694, avg_4: 0.3128200823244073, url: 'https://huggingface.co/PicoKittens/PicoStories-853K' },
|
| 616 |
];
|
| 617 |
|
| 618 |
-
const BENCHMARKS = [
|
| 619 |
-
{ key: 'avg_4', label: 'Avg' },
|
| 620 |
{ key: 'arc_easy', label: 'ARC-Easy' },
|
| 621 |
{ key: 'arc_challenge', label: 'ARC-Challenge' },
|
| 622 |
{ key: 'hellaswag', label: 'HellaSwag' },
|
|
@@ -675,21 +615,24 @@
|
|
| 675 |
}
|
| 676 |
|
| 677 |
function buildFitSamples(data) {
|
| 678 |
-
const
|
|
|
|
|
|
|
|
|
|
|
|
|
| 679 |
for (const d of data) {
|
| 680 |
-
const
|
| 681 |
-
if (!
|
| 682 |
-
|
| 683 |
}
|
| 684 |
|
| 685 |
-
return [...
|
| 686 |
-
.map(
|
| 687 |
-
const xs =
|
| 688 |
-
const ys =
|
| 689 |
-
const mid = Math.floor(ys.length / 2);
|
| 690 |
-
const medianY = ys.length % 2 ? ys[mid] : (ys[mid - 1] + ys[mid]) / 2;
|
| 691 |
const meanX = xs.reduce((a, b) => a + b, 0) / xs.length;
|
| 692 |
-
|
|
|
|
| 693 |
})
|
| 694 |
.sort((a, b) => a.x - b.x);
|
| 695 |
}
|
|
@@ -737,7 +680,7 @@
|
|
| 737 |
document.getElementById('infoMAE').textContent = `${absMean.toFixed(2)} pts`;
|
| 738 |
document.getElementById('infoResidualSpread').textContent = `${spread.toFixed(2)} pts`;
|
| 739 |
document.getElementById('fitNote').textContent =
|
| 740 |
-
`Linear fit trained on ${bucketCount}
|
| 741 |
}
|
| 742 |
|
| 743 |
function render() {
|
|
@@ -764,11 +707,11 @@
|
|
| 764 |
|
| 765 |
document.getElementById('chartTitle').textContent = chartTitleMap[activeBenchmark] || 'Regression vs log parameters';
|
| 766 |
document.getElementById('chartSub').textContent =
|
| 767 |
-
`Silver dotted regression line trained on
|
| 768 |
|
| 769 |
if (data.length < 2) {
|
| 770 |
document.getElementById('chartSub').textContent = 'Add more visible models or clear the search filter to fit a line.';
|
| 771 |
-
document.getElementById('fitNote').textContent = 'Need at least 2 visible models to fit a line. The line is trained on
|
| 772 |
document.getElementById('countBadge').textContent = `${data.length} models`;
|
| 773 |
document.getElementById('infoBenchmark').textContent = BENCHMARK_NAMES[activeBenchmark] || activeBenchmark;
|
| 774 |
document.getElementById('infoCount').textContent = String(data.length);
|
|
@@ -792,16 +735,22 @@
|
|
| 792 |
});
|
| 793 |
updateStats(fit, data.length, residuals, fitSamples.length, rawFit);
|
| 794 |
|
| 795 |
-
const
|
| 796 |
-
const
|
| 797 |
-
const
|
| 798 |
-
const
|
| 799 |
-
const
|
| 800 |
-
|
| 801 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 802 |
|
| 803 |
-
const yMin = Math.min(...data.map(d => d.y),
|
| 804 |
-
const yMax = Math.max(...data.map(d => d.y),
|
| 805 |
|
| 806 |
if (chart) chart.destroy();
|
| 807 |
|
|
@@ -813,7 +762,7 @@
|
|
| 813 |
{
|
| 814 |
type: 'line',
|
| 815 |
label: 'Regression',
|
| 816 |
-
data:
|
| 817 |
borderColor: 'rgba(239,240,244,0.98)',
|
| 818 |
borderWidth: lineEmphasis ? 4 : 3,
|
| 819 |
borderDash: [2, 8],
|
|
@@ -906,8 +855,8 @@
|
|
| 906 |
scales: {
|
| 907 |
x: {
|
| 908 |
type: 'linear',
|
| 909 |
-
min:
|
| 910 |
-
max:
|
| 911 |
grid: { color: 'rgba(255,255,255,0.045)' },
|
| 912 |
ticks: {
|
| 913 |
color: '#8a8f9a',
|
|
|
|
| 230 |
.chart-card {
|
| 231 |
border-radius: var(--radius);
|
| 232 |
padding: 18px 18px 12px;
|
| 233 |
+
min-height: 920px;
|
| 234 |
overflow: hidden;
|
| 235 |
}
|
| 236 |
|
|
|
|
| 293 |
|
| 294 |
.canvas-wrap {
|
| 295 |
position: relative;
|
| 296 |
+
height: 780px;
|
| 297 |
}
|
| 298 |
|
| 299 |
canvas {
|
|
|
|
| 343 |
font-weight: 700;
|
| 344 |
}
|
| 345 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 346 |
@media (max-width: 1120px) {
|
| 347 |
.layout { grid-template-columns: 1fr; }
|
| 348 |
.panel { position: static; }
|
| 349 |
+
.canvas-wrap { height: 700px; }
|
| 350 |
.info-grid { grid-template-columns: repeat(3, minmax(0, 1fr)); }
|
| 351 |
}
|
| 352 |
|
| 353 |
@media (max-width: 760px) {
|
| 354 |
body { padding: 16px; }
|
| 355 |
.hero, .panel, .chart-card, .info-card { border-radius: 18px; }
|
| 356 |
+
.canvas-wrap { height: 520px; }
|
| 357 |
.stat-grid { grid-template-columns: 1fr; }
|
| 358 |
.info-grid { grid-template-columns: 1fr; }
|
| 359 |
}
|
|
|
|
| 415 |
<p class="chart-sub" id="chartSub">Silver regression line over the visible models.</p>
|
| 416 |
<div class="callout">
|
| 417 |
<span class="dot"></span>
|
| 418 |
+
<span>Hover a point for details. Click a point to open the model page. Tiny models are still included on the left side of the plot.</span>
|
| 419 |
</div>
|
| 420 |
</div>
|
| 421 |
<div class="badge" id="countBadge">0 models</div>
|
|
|
|
| 449 |
Residuals are measured in the same score units shown on the chart, after converting scores to percentages. <strong>RMSE</strong> is the square root of mean squared error, and <strong>R²</strong> describes how much of the score variation is explained by the line.
|
| 450 |
These metrics describe the fit of this subset, not model quality in general.
|
| 451 |
</section>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 452 |
</main>
|
| 453 |
</div>
|
| 454 |
</div>
|
|
|
|
| 555 |
{ name: 'PicoKittens/PicoStories-853K', org: 'PicoKittens', params: 853120, arc_easy: 0.255050505050505, arc_challenge: 0.22013651877133106, piqa: 0.5119695321001088, hellaswag: 0.26488747261501694, avg_4: 0.3128200823244073, url: 'https://huggingface.co/PicoKittens/PicoStories-853K' },
|
| 556 |
];
|
| 557 |
|
| 558 |
+
const BENCHMARKS = [
|
| 559 |
+
{ key: 'avg_4', label: 'Avg' },
|
| 560 |
{ key: 'arc_easy', label: 'ARC-Easy' },
|
| 561 |
{ key: 'arc_challenge', label: 'ARC-Challenge' },
|
| 562 |
{ key: 'hellaswag', label: 'HellaSwag' },
|
|
|
|
| 615 |
}
|
| 616 |
|
| 617 |
function buildFitSamples(data) {
|
| 618 |
+
const groups = new Map();
|
| 619 |
+
|
| 620 |
+
// Fit one sample per exact parameter count so duplicated models do not
|
| 621 |
+
// over-weight the line. This keeps the regression linear while avoiding
|
| 622 |
+
// a single repeated parameter size from dominating the fit.
|
| 623 |
for (const d of data) {
|
| 624 |
+
const key = String(d.params);
|
| 625 |
+
if (!groups.has(key)) groups.set(key, []);
|
| 626 |
+
groups.get(key).push(d);
|
| 627 |
}
|
| 628 |
|
| 629 |
+
return [...groups.values()]
|
| 630 |
+
.map(group => {
|
| 631 |
+
const xs = group.map(d => d.x);
|
| 632 |
+
const ys = group.map(d => d.y);
|
|
|
|
|
|
|
| 633 |
const meanX = xs.reduce((a, b) => a + b, 0) / xs.length;
|
| 634 |
+
const meanY = ys.reduce((a, b) => a + b, 0) / ys.length;
|
| 635 |
+
return { x: meanX, y: meanY, count: group.length };
|
| 636 |
})
|
| 637 |
.sort((a, b) => a.x - b.x);
|
| 638 |
}
|
|
|
|
| 680 |
document.getElementById('infoMAE').textContent = `${absMean.toFixed(2)} pts`;
|
| 681 |
document.getElementById('infoResidualSpread').textContent = `${spread.toFixed(2)} pts`;
|
| 682 |
document.getElementById('fitNote').textContent =
|
| 683 |
+
`Linear fit trained on ${bucketCount} exact parameter groups from ${count} visible models. Grouped RMSE is ${rmse.toFixed(2)} score points; raw-point RMSE is ${rawFit.rmse.toFixed(2)}. Very small models are kept in the dataset but may be clipped from the main plot to preserve readability.`;
|
| 684 |
}
|
| 685 |
|
| 686 |
function render() {
|
|
|
|
| 707 |
|
| 708 |
document.getElementById('chartTitle').textContent = chartTitleMap[activeBenchmark] || 'Regression vs log parameters';
|
| 709 |
document.getElementById('chartSub').textContent =
|
| 710 |
+
`Silver dotted regression line trained on exact parameter groups. The chart auto-zooms to the main cluster so tiny models do not crush the view; hover to inspect a point and click to open the model page.`;
|
| 711 |
|
| 712 |
if (data.length < 2) {
|
| 713 |
document.getElementById('chartSub').textContent = 'Add more visible models or clear the search filter to fit a line.';
|
| 714 |
+
document.getElementById('fitNote').textContent = 'Need at least 2 visible models to fit a line. The line is trained on exact parameter groups, not on repeated exact-size duplicates.';
|
| 715 |
document.getElementById('countBadge').textContent = `${data.length} models`;
|
| 716 |
document.getElementById('infoBenchmark').textContent = BENCHMARK_NAMES[activeBenchmark] || activeBenchmark;
|
| 717 |
document.getElementById('infoCount').textContent = String(data.length);
|
|
|
|
| 735 |
});
|
| 736 |
updateStats(fit, data.length, residuals, fitSamples.length, rawFit);
|
| 737 |
|
| 738 |
+
const rawXMin = Math.min(...data.map(d => d.x));
|
| 739 |
+
const rawXMax = Math.max(...data.map(d => d.x));
|
| 740 |
+
const rawPad = Math.max(0.18, (rawXMax - rawXMin) * 0.09);
|
| 741 |
+
const lineSteps = 40;
|
| 742 |
+
const regressionLine = [];
|
| 743 |
+
for (let i = 0; i <= lineSteps; i += 1) {
|
| 744 |
+
const t = i / lineSteps;
|
| 745 |
+
const rawX = (rawXMin - rawPad) + ((rawXMax + rawPad) - (rawXMin - rawPad)) * t;
|
| 746 |
+
regressionLine.push({
|
| 747 |
+
x: rawX,
|
| 748 |
+
y: fit.slope * rawX + fit.intercept
|
| 749 |
+
});
|
| 750 |
+
}
|
| 751 |
|
| 752 |
+
const yMin = Math.min(...data.map(d => d.y), ...regressionLine.map(p => p.y)) - 1.6;
|
| 753 |
+
const yMax = Math.max(...data.map(d => d.y), ...regressionLine.map(p => p.y)) + 1.6;
|
| 754 |
|
| 755 |
if (chart) chart.destroy();
|
| 756 |
|
|
|
|
| 762 |
{
|
| 763 |
type: 'line',
|
| 764 |
label: 'Regression',
|
| 765 |
+
data: regressionLine,
|
| 766 |
borderColor: 'rgba(239,240,244,0.98)',
|
| 767 |
borderWidth: lineEmphasis ? 4 : 3,
|
| 768 |
borderDash: [2, 8],
|
|
|
|
| 855 |
scales: {
|
| 856 |
x: {
|
| 857 |
type: 'linear',
|
| 858 |
+
min: Math.max(3.3, rawXMin - 0.20),
|
| 859 |
+
max: rawXMax + 0.15,
|
| 860 |
grid: { color: 'rgba(255,255,255,0.045)' },
|
| 861 |
ticks: {
|
| 862 |
color: '#8a8f9a',
|