CompactAI commited on
Commit
2ee1b90
·
verified ·
1 Parent(s): c1d5c42

Upload index.html

Browse files
Files changed (1) hide show
  1. index.html +179 -0
index.html CHANGED
@@ -123,6 +123,20 @@
123
  </div>
124
  </div>
125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
 
128
  <div class="methodology" style="margin-top:1.5rem">
@@ -400,6 +414,16 @@ function getColor(value, min, max, lowerIsBetter, useLog = false) {
400
  return `color: rgb(${r}, ${g}, ${b})`;
401
  }
402
 
 
 
 
 
 
 
 
 
 
 
403
  function renderTable() {
404
  const tbody = document.getElementById('leaderboard-body');
405
  const sortedModels = [...models].sort((a, b) => getScore(b) - getScore(a));
@@ -489,11 +513,166 @@ function buildChart(canvasId, metric, label, reverse) {
489
  });
490
  }
491
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
492
  window.addEventListener('DOMContentLoaded', () => {
493
  renderTable();
494
  buildChart('blimpChart', 'blimp', 'BLiMP Accuracy', false);
495
  buildChart('arcChart', 'arc', 'ARC-Easy Accuracy', false);
496
  buildChart('wikiChart', 'wiki', 'WikiText-2 Perplexity', true);
 
497
  });
498
  </script>
499
  </body>
 
123
  </div>
124
  </div>
125
 
126
+ <h2 class="section-title">Model Efficiency</h2>
127
+
128
+ <div class="chart-grid">
129
+ <div class="chart-card full">
130
+ <h3>Parameters vs Avg Score — high efficiency zone (≥1σ above trend)</h3>
131
+ <canvas id="efficiencyChart" style="max-height:400px"></canvas>
132
+ <div style="display:flex;flex-direction:column;gap:2px;margin-top:.5rem;font-size:.75rem;color:#8b949e">
133
+ <span>Faint dashed line: average trend</span>
134
+ <span>Bold dashed line: high-efficiency threshold (trend + 1σ)</span>
135
+ <span>Yellow shaded area: models outperforming expectations for their size</span>
136
+ </div>
137
+ </div>
138
+ </div>
139
+
140
 
141
 
142
  <div class="methodology" style="margin-top:1.5rem">
 
414
  return `color: rgb(${r}, ${g}, ${b})`;
415
  }
416
 
417
+ function parseParams(str) {
418
+ if (!str || typeof str !== 'string') return NaN;
419
+ const s = str.toUpperCase().replace(/,/g, '');
420
+ if (s.endsWith('B')) return parseFloat(s) * 1e9;
421
+ if (s.endsWith('M')) return parseFloat(s) * 1e6;
422
+ if (s.endsWith('K')) return parseFloat(s) * 1e3;
423
+ const n = parseFloat(s);
424
+ return isNaN(n) ? NaN : n;
425
+ }
426
+
427
  function renderTable() {
428
  const tbody = document.getElementById('leaderboard-body');
429
  const sortedModels = [...models].sort((a, b) => getScore(b) - getScore(a));
 
513
  });
514
  }
515
 
516
+ function buildEfficiencyChart() {
517
+ const valid = models
518
+ .filter(d => d.blimp !== null && d.arc !== null && typeof d.blimp === 'number' && typeof d.arc === 'number')
519
+ .map(d => ({
520
+ ...d,
521
+ paramsNum: parseParams(d.params),
522
+ avgScore: (d.blimp + d.arc) / 2
523
+ }))
524
+ .filter(d => !isNaN(d.paramsNum) && d.paramsNum > 0);
525
+
526
+ if (valid.length < 2) return;
527
+
528
+ const logParams = valid.map(d => Math.log10(d.paramsNum));
529
+ const scores = valid.map(d => d.avgScore);
530
+
531
+ const n = valid.length;
532
+ const sumX = logParams.reduce((s, v) => s + v, 0);
533
+ const sumY = scores.reduce((s, v) => s + v, 0);
534
+ const sumXY = logParams.reduce((s, v, i) => s + v * scores[i], 0);
535
+ const sumX2 = logParams.reduce((s, v) => s + v * v, 0);
536
+ const slope = (n * sumXY - sumX * sumY) / (n * sumX2 - sumX * sumX);
537
+ const intercept = (sumY - slope * sumX) / n;
538
+
539
+ const residuals = valid.map(d => d.avgScore - (intercept + slope * Math.log10(d.paramsNum)));
540
+ const resMean = residuals.reduce((s, v) => s + v, 0) / n;
541
+ const resVariance = residuals.reduce((s, v) => s + (v - resMean) ** 2, 0) / n;
542
+ const resStddev = Math.sqrt(resVariance);
543
+ const thresholdShift = Math.max(resStddev, 3);
544
+
545
+ const sorted = [...valid].sort((a, b) => a.paramsNum - b.paramsNum);
546
+
547
+ const regData = [];
548
+ const thresholdData = [];
549
+ const steps = 100;
550
+ const logMin = 4;
551
+ const logMax = 9;
552
+ for (let i = 0; i <= steps; i++) {
553
+ const lx = logMin + (logMax - logMin) * (i / steps);
554
+ regData.push({ x: Math.pow(10, lx), y: intercept + slope * lx });
555
+ thresholdData.push({ x: Math.pow(10, lx), y: intercept + slope * lx + thresholdShift });
556
+ }
557
+
558
+ const zonePlugin = {
559
+ id: 'efficiencyZone',
560
+ beforeDraw(chart) {
561
+ const ctx = chart.ctx;
562
+ const xScale = chart.scales.x;
563
+ const yScale = chart.scales.y;
564
+ const { left, right, top, bottom } = chart.chartArea;
565
+
566
+ const leftX = xScale.min;
567
+ const rightX = xScale.max;
568
+ const leftY = intercept + slope * Math.log10(Math.max(leftX, 1)) + thresholdShift;
569
+ const rightY = intercept + slope * Math.log10(Math.max(rightX, 1)) + thresholdShift;
570
+ const leftYPix = yScale.getPixelForValue(leftY);
571
+ const rightYPix = yScale.getPixelForValue(rightY);
572
+
573
+ ctx.save();
574
+ ctx.beginPath();
575
+ ctx.rect(left, top, right - left, bottom - top);
576
+ ctx.clip();
577
+
578
+ ctx.beginPath();
579
+ ctx.moveTo(left, leftYPix);
580
+ ctx.lineTo(left, top);
581
+ ctx.lineTo(right, top);
582
+ ctx.lineTo(right, rightYPix);
583
+ ctx.closePath();
584
+ ctx.fillStyle = 'rgba(255, 230, 0, 0.12)';
585
+ ctx.fill();
586
+ ctx.restore();
587
+ }
588
+ };
589
+
590
+ new Chart(document.getElementById('efficiencyChart'), {
591
+ type: 'line',
592
+ data: {
593
+ datasets: [
594
+ {
595
+ label: 'Models',
596
+ data: sorted.map(d => ({ x: d.paramsNum, y: d.avgScore })),
597
+ showLine: false,
598
+ backgroundColor: sorted.map(d => colorMap[d.org]),
599
+ borderColor: sorted.map(d => colorMap[d.org]),
600
+ pointRadius: 6,
601
+ pointHoverRadius: 9,
602
+ },
603
+ {
604
+ label: 'Trend',
605
+ data: regData,
606
+ showLine: true,
607
+ borderColor: 'rgba(255, 200, 0, 0.5)',
608
+ borderWidth: 1.5,
609
+ borderDash: [4, 4],
610
+ pointRadius: 0,
611
+ fill: false,
612
+ tension: 0,
613
+ },
614
+ {
615
+ label: 'High Efficiency Threshold',
616
+ data: thresholdData,
617
+ showLine: true,
618
+ borderColor: 'rgba(255, 200, 0, 0.8)',
619
+ borderWidth: 2,
620
+ borderDash: [6, 4],
621
+ pointRadius: 0,
622
+ fill: false,
623
+ tension: 0,
624
+ }
625
+ ]
626
+ },
627
+ options: {
628
+ parsing: false,
629
+ responsive: true,
630
+ maintainAspectRatio: true,
631
+ scales: {
632
+ x: {
633
+ type: 'logarithmic',
634
+ title: { display: true, text: 'Parameters', color: '#8b949e' },
635
+ grid: { color: 'rgba(255,255,255,0.06)' },
636
+ ticks: {
637
+ color: '#8b949e',
638
+ callback: function(v) {
639
+ if (v >= 1000000) return (v / 1000000).toFixed(v >= 10000000 ? 0 : 1) + 'M';
640
+ if (v >= 1000) return (v / 1000).toFixed(v >= 10000 ? 0 : 1) + 'K';
641
+ return v.toString();
642
+ }
643
+ }
644
+ },
645
+ y: {
646
+ title: { display: true, text: 'Avg Score (BLiMP + ARC-Easy)', color: '#8b949e' },
647
+ min: 20,
648
+ max: 80,
649
+ grid: { color: 'rgba(255,255,255,0.06)' },
650
+ ticks: { color: '#8b949e', callback: v => v + '%' }
651
+ }
652
+ },
653
+ plugins: {
654
+ legend: { display: false },
655
+ tooltip: {
656
+ callbacks: {
657
+ label: ctx => {
658
+ if (ctx.dataset.label !== 'Models') return '';
659
+ const d = sorted[ctx.dataIndex];
660
+ return `${d.name}: ${d.params} params, ${d.avgScore.toFixed(1)}% avg`;
661
+ }
662
+ }
663
+ }
664
+ }
665
+ },
666
+ plugins: [zonePlugin]
667
+ });
668
+ }
669
+
670
  window.addEventListener('DOMContentLoaded', () => {
671
  renderTable();
672
  buildChart('blimpChart', 'blimp', 'BLiMP Accuracy', false);
673
  buildChart('arcChart', 'arc', 'ARC-Easy Accuracy', false);
674
  buildChart('wikiChart', 'wiki', 'WikiText-2 Perplexity', true);
675
+ buildEfficiencyChart();
676
  });
677
  </script>
678
  </body>