Harley-ml commited on
Commit
f856253
Β·
verified Β·
1 Parent(s): c83e5f0

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +414 -64
index.html CHANGED
@@ -628,8 +628,8 @@
628
  <div class="eyebrow">Regression explorer</div>
629
  <h1><span class="title-silver">SLM Regression Line Benchmark</span></h1>
630
  <p class="sub">
631
- Choose a benchmark, fit a linear regression on <strong>log10(parameters)</strong>, and compare models to the size trend.
632
- Search and org filters only change what is visible. The regression line stays anchored to the full eligible dataset for the selected benchmark.
633
  Models below <strong>500k parameters</strong> are clipped from the plot and excluded from the fit so they do not crush the scale.
634
  </p>
635
  </div>
@@ -708,13 +708,13 @@
708
  <div class="section desktop-only">
709
  <span class="section-label">Regression stats</span>
710
  <div class="stat-grid">
711
- <div class="stat"><div class="k">Slope</div><div class="v" id="statSlope">β€”</div></div>
712
- <div class="stat"><div class="k">Intercept</div><div class="v" id="statIntercept">β€”</div></div>
713
- <div class="stat"><div class="k">MSE</div><div class="v" id="statMSE">β€”</div></div>
714
  <div class="stat"><div class="k">RMSE</div><div class="v" id="statRMSE">β€”</div></div>
715
  <div class="stat"><div class="k">RΒ²</div><div class="v" id="statR2">β€”</div></div>
716
  </div>
717
- <div class="small-note" id="fitNote">The fit is locked to the full eligible dataset for the selected benchmark.</div>
718
  </div>
719
  </aside>
720
 
@@ -723,13 +723,13 @@
723
  <div class="chart-top">
724
  <div>
725
  <h2 class="chart-title" id="chartTitle">Average score vs log parameters</h2>
726
- <p class="chart-sub" id="chartSub">Silver regression line fit globally for the selected benchmark. Search and org filtering only hide points.</p>
727
  <div class="callout">
728
  <span class="dot"></span>
729
- <span>Hover for details. Click a point to open the model page. The line is fit globally for the selected benchmark.</span>
730
  </div>
731
  </div>
732
- <div class="badge" id="countBadge">0 visible / 0 fit</div>
733
  </div>
734
  <div class="canvas-wrap">
735
  <canvas id="scatterChart"></canvas>
@@ -739,7 +739,7 @@
739
  <section class="info-grid desktop-only">
740
  <div class="info-card"><div class="label">Selected benchmark</div><div class="big" id="infoBenchmark">Avg</div></div>
741
  <div class="info-card"><div class="label">Visible models</div><div class="big" id="infoCount">0</div></div>
742
- <div class="info-card"><div class="label">Fit models</div><div class="big" id="infoFitCount">0</div></div>
743
  <div class="info-card"><div class="label">Mean absolute residual</div><div class="big" id="infoMAE">β€”</div></div>
744
  <div class="info-card"><div class="label">Residual spread</div><div class="big" id="infoResidualSpread">β€”</div></div>
745
  <div class="info-card"><div class="label">Visible match rate</div><div class="big" id="infoMatchRate">β€”</div></div>
@@ -748,8 +748,8 @@
748
  </section>
749
 
750
  <section class="disclaimer desktop-only">
751
- <strong>Disclaimer:</strong> the regression line is fit only to the full eligible dataset for the selected benchmark, using <strong>log10(parameters)</strong> as x and the selected benchmark score as y.
752
- Search and org filtering only hide points from the chart. Residuals are measured in score points after converting scores to percentages.
753
  <strong>RMSE</strong> is the square root of mean squared error, and <strong>RΒ²</strong> describes how much of the variation is explained by the line.
754
  Models with fewer than <strong>500k parameters</strong> are clipped from the view and excluded from the fit so they do not compress the plot.
755
 
@@ -959,6 +959,66 @@
959
  return 'gt500m';
960
  }
961
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
962
  function benchmarkFit(key) {
963
  const fitModels = MODELS.filter(m => {
964
  const score = getMetricValue(m, key);
@@ -984,16 +1044,19 @@
984
  };
985
  });
986
 
987
- const fitSamples = buildFitSamples(fitData);
988
- return { fitData, fit: linearRegression(fitSamples), rawFit: linearRegression(fitData) };
 
 
989
  }
990
 
 
991
  function buildPredictionRows(paramCount) {
992
  return BENCHMARKS.map(({ key, label }) => {
993
  const bundle = benchmarkFit(key);
994
  if (!bundle || bundle.fit.n < 2) return { key, label, predicted: null };
995
  const x = Math.log10(paramCount);
996
- return { key, label, predicted: bundle.fit.slope * x + bundle.fit.intercept };
997
  });
998
  }
999
 
@@ -1008,44 +1071,331 @@
1008
  return model[key];
1009
  }
1010
 
1011
- function linearRegression(points) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1012
  const n = points.length;
1013
- let sumX = 0, sumY = 0, sumXY = 0, sumXX = 0;
1014
- for (const p of points) {
1015
- sumX += p.x;
1016
- sumY += p.y;
1017
- sumXY += p.x * p.y;
1018
- sumXX += p.x * p.x;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1019
  }
1020
- const denom = n * sumXX - sumX * sumX;
1021
- const slope = Math.abs(denom) < 1e-12 ? 0 : (n * sumXY - sumX * sumY) / denom;
1022
- const intercept = n ? (sumY - slope * sumX) / n : 0;
1023
- const yMean = n ? sumY / n : 0;
1024
- const sst = points.reduce((acc, p) => acc + (p.y - yMean) ** 2, 0);
1025
- const sse = points.reduce((acc, p) => {
1026
- const resid = p.y - (slope * p.x + intercept);
1027
- return acc + resid * resid;
 
 
 
 
1028
  }, 0);
1029
- const mse = n ? sse / n : 0;
1030
  const rmse = Math.sqrt(mse);
1031
  const r2 = sst > 0 ? 1 - (sse / sst) : 0;
1032
- return { slope, intercept, mse, rmse, r2, n };
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1033
  }
1034
 
1035
- function buildFitSamples(data) {
1036
- const groups = new Map();
1037
- for (const d of data) {
1038
- const key = String(d.params);
1039
- if (!groups.has(key)) groups.set(key, []);
1040
- groups.get(key).push(d);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1041
  }
1042
- return [...groups.values()]
1043
- .map(group => {
1044
- const meanX = group.reduce((a, b) => a + b.x, 0) / group.length;
1045
- const meanY = group.reduce((a, b) => a + b.y, 0) / group.length;
1046
- return { x: meanX, y: meanY, count: group.length };
1047
- })
1048
- .sort((a, b) => a.x - b.x);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1049
  }
1050
 
1051
  function getEligibleModels() {
@@ -1175,17 +1525,17 @@
1175
  }).join('');
1176
  }
1177
 
1178
- function updateStats(fit, visibleCount, residuals, fitCount, rawFit) {
1179
- document.getElementById('statSlope').textContent = fit.n >= 2 ? fit.slope.toFixed(4) : 'β€”';
1180
- document.getElementById('statIntercept').textContent = fit.n >= 2 ? fit.intercept.toFixed(2) : 'β€”';
1181
- document.getElementById('statMSE').textContent = fit.n >= 2 ? fit.mse.toFixed(3) : 'β€”';
1182
  document.getElementById('statRMSE').textContent = fit.n >= 2 ? fit.rmse.toFixed(2) : 'β€”';
1183
  document.getElementById('statR2').textContent = fit.n >= 2 ? fit.r2.toFixed(3) : 'β€”';
1184
 
1185
- document.getElementById('countBadge').textContent = `${visibleCount} visible / ${fitCount} fit`;
1186
  document.getElementById('infoBenchmark').textContent = BENCHMARK_NAMES[activeBenchmark] || activeBenchmark;
1187
  document.getElementById('infoCount').textContent = String(visibleCount);
1188
- document.getElementById('infoFitCount').textContent = String(fitCount);
1189
 
1190
  const absMean = residuals.length ? residuals.reduce((a, b) => a + Math.abs(b), 0) / residuals.length : 0;
1191
  const mean = residuals.length ? residuals.reduce((a, b) => a + b, 0) / residuals.length : 0;
@@ -1196,11 +1546,11 @@
1196
 
1197
  const orgCount = new Set(getVisibleModels().map(m => m.org)).size;
1198
  document.getElementById('infoOrgs').textContent = `${orgCount} orgs`;
1199
- document.getElementById('infoMatchRate').textContent = `${visibleCount}/${fitCount}`;
1200
  document.getElementById('infoMode').textContent = activeMode === 'mobile' ? 'Mobile' : 'Computer';
1201
 
1202
  document.getElementById('fitNote').textContent =
1203
- `Fit uses ${fitCount} eligible models for ${BENCHMARK_NAMES[activeBenchmark] || activeBenchmark}. Raw-point RMSE is ${rawFit.n >= 2 ? rawFit.rmse.toFixed(2) : 'β€”'}; grouped RMSE is ${fit.n >= 2 ? fit.rmse.toFixed(2) : 'β€”'}. Search and org filters only affect visibility.`;
1204
  }
1205
 
1206
  function render() {
@@ -1243,11 +1593,11 @@
1243
 
1244
  document.getElementById('chartTitle').textContent = chartTitleMap[activeBenchmark] || 'Regression vs log parameters';
1245
  document.getElementById('chartSub').textContent =
1246
- 'Silver regression line fit globally for the selected benchmark. Search and org filtering only hide points, so the line stays stable.';
1247
 
1248
  if (fitData.length < 2) {
1249
  document.getElementById('chartSub').textContent = 'Need at least 2 eligible models to fit a line.';
1250
- document.getElementById('countBadge').textContent = `${visibleModels.length} visible / ${fitData.length} fit`;
1251
  document.getElementById('infoBenchmark').textContent = BENCHMARK_NAMES[activeBenchmark] || activeBenchmark;
1252
  document.getElementById('infoCount').textContent = String(visibleModels.length);
1253
  document.getElementById('infoFitCount').textContent = String(fitData.length);
@@ -1261,17 +1611,17 @@
1261
  return;
1262
  }
1263
 
1264
- const fitSamples = buildFitSamples(fitData);
1265
- const fit = linearRegression(fitSamples);
1266
- const rawFit = linearRegression(fitData);
1267
 
1268
- const visibleResiduals = data.map(d => d.y - (fit.slope * d.x + fit.intercept));
1269
  data.forEach((d, i) => {
1270
  d.residual = visibleResiduals[i];
1271
- d.prediction = fit.slope * d.x + fit.intercept;
1272
  });
1273
 
1274
- updateStats(fit, data.length, visibleResiduals, fitData.length, rawFit);
1275
  updatePredictionPanel();
1276
 
1277
  const xMin = Math.min(...fitData.map(d => d.x));
@@ -1283,7 +1633,7 @@
1283
  for (let i = 0; i <= lineSteps; i += 1) {
1284
  const t = i / lineSteps;
1285
  const rawX = (xMin - xPad) + ((xMax + xPad) - (xMin - xPad)) * t;
1286
- regressionLine.push({ x: rawX, y: fit.slope * rawX + fit.intercept });
1287
  }
1288
 
1289
  const yMin = Math.min(...fitData.map(d => d.y), ...regressionLine.map(p => p.y)) - 1.6;
@@ -1378,14 +1728,14 @@
1378
  title: (items) => items[0]?.raw?.name || '',
1379
  label: (item) => {
1380
  const d = item.raw;
1381
- const predicted = fit.slope * d.x + fit.intercept;
1382
  const resid = d.y - predicted;
1383
  return [
1384
  `Org: ${d.org}`,
1385
  `Params: ${fmtParams(d.params)} (${d.params.toLocaleString()})`,
1386
  `Score: ${d.score.toFixed(2)}%`,
1387
  `Residual: ${resid >= 0 ? '+' : ''}${resid.toFixed(2)} pts`,
1388
- `Predicted: ${predicted.toFixed(2)}%`
1389
  ];
1390
  },
1391
  afterLabel: (item) => {
 
628
  <div class="eyebrow">Regression explorer</div>
629
  <h1><span class="title-silver">SLM Regression Line Benchmark</span></h1>
630
  <p class="sub">
631
+ Choose a benchmark, fit a robust linear line on <strong>log10(parameters)</strong>, and compare models to the size trend.
632
+ Search and org filters only change what is visible. The line stays anchored to the full eligible dataset for the selected benchmark.
633
  Models below <strong>500k parameters</strong> are clipped from the plot and excluded from the fit so they do not crush the scale.
634
  </p>
635
  </div>
 
708
  <div class="section desktop-only">
709
  <span class="section-label">Regression stats</span>
710
  <div class="stat-grid">
711
+ <div class="stat"><div class="k">Curvature</div><div class="v" id="statSlope">β€”</div></div>
712
+ <div class="stat"><div class="k">Mid-slope</div><div class="v" id="statIntercept">β€”</div></div>
713
+ <div class="stat"><div class="k">Center score</div><div class="v" id="statMSE">β€”</div></div>
714
  <div class="stat"><div class="k">RMSE</div><div class="v" id="statRMSE">β€”</div></div>
715
  <div class="stat"><div class="k">RΒ²</div><div class="v" id="statR2">β€”</div></div>
716
  </div>
717
+ <div class="small-note" id="fitNote">The line is locked to the full eligible dataset for the selected benchmark.</div>
718
  </div>
719
  </aside>
720
 
 
723
  <div class="chart-top">
724
  <div>
725
  <h2 class="chart-title" id="chartTitle">Average score vs log parameters</h2>
726
+ <p class="chart-sub" id="chartSub">Binned linear regression on log10(parameters) for the selected benchmark. Each size bin contributes one equally weighted sample, so the line tracks the average score by parameter region instead of point density.</p>
727
  <div class="callout">
728
  <span class="dot"></span>
729
+ <span>Hover for details. Click a point to open the model page. The line is fit globally with a robust quadratic estimator for the selected benchmark.</span>
730
  </div>
731
  </div>
732
+ <div class="badge" id="countBadge">0 visible / 0 bins</div>
733
  </div>
734
  <div class="canvas-wrap">
735
  <canvas id="scatterChart"></canvas>
 
739
  <section class="info-grid desktop-only">
740
  <div class="info-card"><div class="label">Selected benchmark</div><div class="big" id="infoBenchmark">Avg</div></div>
741
  <div class="info-card"><div class="label">Visible models</div><div class="big" id="infoCount">0</div></div>
742
+ <div class="info-card"><div class="label">Fit bins</div><div class="big" id="infoFitCount">0</div></div>
743
  <div class="info-card"><div class="label">Mean absolute residual</div><div class="big" id="infoMAE">β€”</div></div>
744
  <div class="info-card"><div class="label">Residual spread</div><div class="big" id="infoResidualSpread">β€”</div></div>
745
  <div class="info-card"><div class="label">Visible match rate</div><div class="big" id="infoMatchRate">β€”</div></div>
 
748
  </section>
749
 
750
  <section class="disclaimer desktop-only">
751
+ <strong>Disclaimer:</strong> the regression line is fit using <strong>log10(parameters)</strong> as x and the selected benchmark score as y, but the samples are first collapsed into equal-weight size bins so the line reflects the average score by parameter region rather than point density.
752
+ The displayed line uses a robust binned fit that downweights outliers while keeping each parameter-size region equally represented. Search and org filtering only hide points from the chart. Residuals are measured in score points after converting scores to percentages.
753
  <strong>RMSE</strong> is the square root of mean squared error, and <strong>RΒ²</strong> describes how much of the variation is explained by the line.
754
  Models with fewer than <strong>500k parameters</strong> are clipped from the view and excluded from the fit so they do not compress the plot.
755
 
 
959
  return 'gt500m';
960
  }
961
 
962
+
963
+ function trimmedMean(values, trimFraction = 0.1) {
964
+ const arr = values.filter(Number.isFinite).sort((a, b) => a - b);
965
+ if (!arr.length) return 0;
966
+ const trim = Math.floor(arr.length * trimFraction);
967
+ const sliced = arr.slice(trim, Math.max(trim + 1, arr.length - trim));
968
+ const used = sliced.length ? sliced : arr;
969
+ return used.reduce((a, b) => a + b, 0) / used.length;
970
+ }
971
+
972
+ function buildBinnedFitSamples(points) {
973
+ if (!Array.isArray(points) || points.length < 2) return [];
974
+
975
+ const xs = points.map(p => p.x).filter(Number.isFinite);
976
+ if (!xs.length) return [];
977
+
978
+ const minX = Math.min(...xs);
979
+ const maxX = Math.max(...xs);
980
+ const range = Math.max(1e-6, maxX - minX);
981
+
982
+ const targetBins = Math.max(5, Math.min(14, Math.round(Math.sqrt(points.length))));
983
+ const binWidth = Math.max(0.08, range / targetBins);
984
+ const firstEdge = Math.floor(minX / binWidth) * binWidth;
985
+
986
+ const bins = new Map();
987
+ for (const p of points) {
988
+ const idx = Math.floor((p.x - firstEdge) / binWidth);
989
+ if (!bins.has(idx)) bins.set(idx, []);
990
+ bins.get(idx).push(p);
991
+ }
992
+
993
+ const samples = [];
994
+ for (const [idx, group] of [...bins.entries()].sort((a, b) => a[0] - b[0])) {
995
+ const xsInBin = group.map(p => p.x).filter(Number.isFinite);
996
+ const ysInBin = group.map(p => p.y).filter(Number.isFinite);
997
+ if (!xsInBin.length || !ysInBin.length) continue;
998
+
999
+ const binX = xsInBin.reduce((a, b) => a + b, 0) / xsInBin.length;
1000
+ const binY = ysInBin.length >= 4
1001
+ ? trimmedMean(ysInBin, 0.15)
1002
+ : (ysInBin.reduce((a, b) => a + b, 0) / ysInBin.length);
1003
+
1004
+ samples.push({
1005
+ x: binX,
1006
+ y: binY,
1007
+ count: 1,
1008
+ modelCount: group.length,
1009
+ binIndex: idx,
1010
+ params: group[0]?.params,
1011
+ name: `${group.length} models in size bin`,
1012
+ org: 'bin',
1013
+ url: group[0]?.url
1014
+ });
1015
+ }
1016
+
1017
+ samples.sort((a, b) => a.x - b.x);
1018
+ return samples;
1019
+ }
1020
+
1021
+
1022
  function benchmarkFit(key) {
1023
  const fitModels = MODELS.filter(m => {
1024
  const score = getMetricValue(m, key);
 
1044
  };
1045
  });
1046
 
1047
+ const fitSamples = buildBinnedFitSamples(fitData);
1048
+ const fit = robustLinearRegression(fitSamples);
1049
+ const rawFit = weightedLinearRegression(fitData);
1050
+ return { fitData, fit, rawFit, fitSamples };
1051
  }
1052
 
1053
+
1054
  function buildPredictionRows(paramCount) {
1055
  return BENCHMARKS.map(({ key, label }) => {
1056
  const bundle = benchmarkFit(key);
1057
  if (!bundle || bundle.fit.n < 2) return { key, label, predicted: null };
1058
  const x = Math.log10(paramCount);
1059
+ return { key, label, predicted: evaluateFit(bundle.fit, x) };
1060
  });
1061
  }
1062
 
 
1071
  return model[key];
1072
  }
1073
 
1074
+
1075
+ function median(values) {
1076
+ if (!values.length) return 0;
1077
+ const sorted = [...values].sort((a, b) => a - b);
1078
+ const mid = Math.floor(sorted.length / 2);
1079
+ return sorted.length % 2 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2;
1080
+ }
1081
+
1082
+ function weightedMedian(values, weights) {
1083
+ const pairs = values
1084
+ .map((v, i) => [v, Math.max(0, Number.isFinite(weights?.[i]) ? weights[i] : 1)])
1085
+ .filter(([v, w]) => Number.isFinite(v) && Number.isFinite(w) && w > 0)
1086
+ .sort((a, b) => a[0] - b[0]);
1087
+
1088
+ if (!pairs.length) return 0;
1089
+
1090
+ const total = pairs.reduce((acc, [, w]) => acc + w, 0);
1091
+ let acc = 0;
1092
+ for (const [value, weight] of pairs) {
1093
+ acc += weight;
1094
+ if (acc >= total / 2) return value;
1095
+ }
1096
+ return pairs[pairs.length - 1][0];
1097
+ }
1098
+
1099
+ function evaluatePolynomial(coefficients, x) {
1100
+ if (!coefficients?.length) return 0;
1101
+ let y = 0;
1102
+ for (let i = 0; i < coefficients.length; i += 1) {
1103
+ y = (y * x) + coefficients[i];
1104
+ }
1105
+ return y;
1106
+ }
1107
+
1108
+ function polynomialDerivative(coefficients, x) {
1109
+ if (!coefficients?.length || coefficients.length < 2) return 0;
1110
+ const degree = coefficients.length - 1;
1111
+ let y = 0;
1112
+ for (let i = 0; i < degree; i += 1) {
1113
+ const power = degree - i;
1114
+ y = (y * x) + (coefficients[i] * power);
1115
+ }
1116
+ return y;
1117
+ }
1118
+
1119
+ function solveLinearSystem(matrix, vector) {
1120
+ const n = vector.length;
1121
+ const a = matrix.map((row, i) => [...row, vector[i]]);
1122
+
1123
+ for (let col = 0; col < n; col += 1) {
1124
+ let pivotRow = col;
1125
+ let pivotAbs = Math.abs(a[col][col]);
1126
+ for (let row = col + 1; row < n; row += 1) {
1127
+ const cand = Math.abs(a[row][col]);
1128
+ if (cand > pivotAbs) {
1129
+ pivotAbs = cand;
1130
+ pivotRow = row;
1131
+ }
1132
+ }
1133
+
1134
+ if (pivotAbs < 1e-12) {
1135
+ return null;
1136
+ }
1137
+
1138
+ if (pivotRow !== col) {
1139
+ const tmp = a[col];
1140
+ a[col] = a[pivotRow];
1141
+ a[pivotRow] = tmp;
1142
+ }
1143
+
1144
+ const pivot = a[col][col];
1145
+ for (let j = col; j <= n; j += 1) {
1146
+ a[col][j] /= pivot;
1147
+ }
1148
+
1149
+ for (let row = 0; row < n; row += 1) {
1150
+ if (row === col) continue;
1151
+ const factor = a[row][col];
1152
+ if (Math.abs(factor) < 1e-12) continue;
1153
+ for (let j = col; j <= n; j += 1) {
1154
+ a[row][j] -= factor * a[col][j];
1155
+ }
1156
+ }
1157
+ }
1158
+
1159
+ return a.map(row => row[n]);
1160
+ }
1161
+
1162
+ function weightedPolynomialRegression(points, degree = 1, weights = null) {
1163
  const n = points.length;
1164
+ if (!n) return { degree, coefficients: [0], mse: 0, rmse: 0, r2: 0, n: 0, weightSum: 0 };
1165
+
1166
+ const actualDegree = Math.max(0, Math.min(degree, n - 1));
1167
+ const size = actualDegree + 1;
1168
+ const matrix = Array.from({ length: size }, () => Array(size).fill(0));
1169
+ const vector = Array(size).fill(0);
1170
+
1171
+ for (let i = 0; i < n; i += 1) {
1172
+ const p = points[i];
1173
+ const w = Math.max(0, Number.isFinite(weights?.[i]) ? weights[i] : 1);
1174
+ if (!w) continue;
1175
+
1176
+ const basis = [];
1177
+ for (let d = actualDegree; d >= 0; d -= 1) {
1178
+ basis.push(p.x ** d);
1179
+ }
1180
+
1181
+ for (let r = 0; r < size; r += 1) {
1182
+ vector[r] += w * basis[r] * p.y;
1183
+ for (let c = 0; c < size; c += 1) {
1184
+ matrix[r][c] += w * basis[r] * basis[c];
1185
+ }
1186
+ }
1187
+ }
1188
+
1189
+ let coefficients = solveLinearSystem(matrix, vector);
1190
+
1191
+ if (!coefficients) {
1192
+ if (actualDegree === 0) {
1193
+ const avg = points.reduce((acc, p, i) => {
1194
+ const w = Math.max(0, Number.isFinite(weights?.[i]) ? weights[i] : 1);
1195
+ return acc + w * p.y;
1196
+ }, 0);
1197
+ const sw = points.reduce((acc, p, i) => acc + Math.max(0, Number.isFinite(weights?.[i]) ? weights[i] : 1), 0);
1198
+ coefficients = [sw ? avg / sw : 0];
1199
+ } else {
1200
+ return weightedPolynomialRegression(points, actualDegree - 1, weights);
1201
+ }
1202
  }
1203
+
1204
+ const predictions = points.map(p => evaluatePolynomial(coefficients, p.x));
1205
+ const sw = points.reduce((acc, p, i) => acc + Math.max(0, Number.isFinite(weights?.[i]) ? weights[i] : 1), 0);
1206
+ const yMean = sw ? points.reduce((acc, p, i) => acc + Math.max(0, Number.isFinite(weights?.[i]) ? weights[i] : 1) * p.y, 0) / sw : 0;
1207
+ const sse = points.reduce((acc, p, i) => {
1208
+ const w = Math.max(0, Number.isFinite(weights?.[i]) ? weights[i] : 1);
1209
+ const resid = p.y - predictions[i];
1210
+ return acc + w * resid * resid;
1211
+ }, 0);
1212
+ const sst = points.reduce((acc, p, i) => {
1213
+ const w = Math.max(0, Number.isFinite(weights?.[i]) ? weights[i] : 1);
1214
+ return acc + w * (p.y - yMean) ** 2;
1215
  }, 0);
1216
+ const mse = sw ? sse / sw : 0;
1217
  const rmse = Math.sqrt(mse);
1218
  const r2 = sst > 0 ? 1 - (sse / sst) : 0;
1219
+ return { degree: actualDegree, coefficients, mse, rmse, r2, n, weightSum: sw };
1220
+ }
1221
+
1222
+ function weightedLinearRegression(points, weights = null) {
1223
+ return weightedPolynomialRegression(points, 1, weights);
1224
+ }
1225
+
1226
+ function linearRegression(points) {
1227
+ return weightedPolynomialRegression(points, 1);
1228
+ }
1229
+
1230
+
1231
+ function robustPolynomialRegression(points, degree = 2) {
1232
+ const n = points.length;
1233
+ if (n < 2) return weightedPolynomialRegression(points, degree);
1234
+
1235
+ const baseWeights = points.map(p => Math.max(1, Number.isFinite(p.count) ? p.count : 1));
1236
+ let weights = [...baseWeights];
1237
+ let fit = weightedPolynomialRegression(points, degree, weights);
1238
+
1239
+ for (let iter = 0; iter < 10; iter += 1) {
1240
+ const residuals = points.map((p, i) => p.y - evaluateFit(fit, p.x));
1241
+ const residMedian = median(residuals);
1242
+ const absDeviations = residuals.map(r => Math.abs(r - residMedian));
1243
+ const scale = Math.max(1e-6, 1.4826 * median(absDeviations));
1244
+ const huberK = 1.345 * scale;
1245
+ const xMedian = median(points.map(p => p.x));
1246
+ const xScale = Math.max(1e-6, 1.4826 * median(points.map(p => Math.abs(p.x - xMedian))));
1247
+
1248
+ const nextWeights = points.map((p, i) => {
1249
+ const resid = Math.abs(residuals[i] - residMedian);
1250
+ let w = baseWeights[i];
1251
+ if (resid > huberK) w *= huberK / resid;
1252
+ const leverage = Math.abs(p.x - xMedian) / xScale;
1253
+ w *= 1 / (1 + 0.18 * leverage + 0.02 * leverage * leverage);
1254
+ return Math.max(w, 1e-6);
1255
+ });
1256
+
1257
+ const next = weightedPolynomialRegression(points, degree, nextWeights);
1258
+ const delta = next.coefficients.reduce((acc, coef, i) => acc + Math.abs((fit.coefficients[i] ?? 0) - coef), 0);
1259
+ fit = next;
1260
+ weights = nextWeights;
1261
+ if (delta < 1e-10) break;
1262
+ }
1263
+
1264
+ const xMean = points.reduce((acc, p, i) => {
1265
+ const w = Math.max(0, Number.isFinite(weights?.[i]) ? weights[i] : 1);
1266
+ return acc + w * p.x;
1267
+ }, 0) / Math.max(1e-12, weights.reduce((a, b) => a + b, 0));
1268
+ fit.xMean = xMean;
1269
+ fit.centerValue = evaluateFit(fit, xMean);
1270
+ fit.centerSlope = polynomialDerivative(fit.coefficients, xMean);
1271
+ fit.curvature = fit.coefficients.length >= 3 ? fit.coefficients[0] : 0;
1272
+ fit.method = `robust-degree-${fit.degree}`;
1273
+ fit.effectiveN = weights.reduce((a, b) => a + b, 0);
1274
+ return fit;
1275
  }
1276
 
1277
+ function robustLinearRegression(points) {
1278
+ const n = points.length;
1279
+ if (n < 2) return weightedPolynomialRegression(points, 1);
1280
+
1281
+ const baseWeights = points.map(p => Math.max(1, Number.isFinite(p.count) ? p.count : 1));
1282
+
1283
+ const slopes = [];
1284
+ const slopeWeights = [];
1285
+ for (let i = 0; i < n; i += 1) {
1286
+ for (let j = i + 1; j < n; j += 1) {
1287
+ const dx = points[j].x - points[i].x;
1288
+ if (Math.abs(dx) < 1e-12) continue;
1289
+ slopes.push((points[j].y - points[i].y) / dx);
1290
+ slopeWeights.push(baseWeights[i] * baseWeights[j]);
1291
+ }
1292
+ }
1293
+
1294
+ let slope = slopes.length ? weightedMedian(slopes, slopeWeights) : 0;
1295
+ if (!Number.isFinite(slope)) {
1296
+ slope = weightedLinearRegression(points, baseWeights).coefficients[0] ?? 0;
1297
  }
1298
+
1299
+ let intercept = weightedMedian(
1300
+ points.map(p => p.y - slope * p.x),
1301
+ baseWeights
1302
+ );
1303
+ if (!Number.isFinite(intercept)) intercept = 0;
1304
+
1305
+ let fit = {
1306
+ degree: 1,
1307
+ coefficients: [slope, intercept],
1308
+ mse: 0,
1309
+ rmse: 0,
1310
+ r2: 0,
1311
+ n,
1312
+ weightSum: baseWeights.reduce((a, b) => a + b, 0),
1313
+ method: 'theil-sen-seeded-linear'
1314
+ };
1315
+
1316
+ let weights = [...baseWeights];
1317
+ for (let iter = 0; iter < 6; iter += 1) {
1318
+ const residuals = points.map(p => p.y - evaluateFit(fit, p.x));
1319
+ const residMedian = median(residuals);
1320
+ const absDeviations = residuals.map(r => Math.abs(r - residMedian));
1321
+ const scale = Math.max(1e-6, 1.4826 * median(absDeviations));
1322
+ const huberK = 1.345 * scale;
1323
+ const xMedian = median(points.map(p => p.x));
1324
+ const xScale = Math.max(1e-6, 1.4826 * median(points.map(p => Math.abs(p.x - xMedian))));
1325
+
1326
+ const nextWeights = points.map((p, i) => {
1327
+ const resid = Math.abs(residuals[i] - residMedian);
1328
+ let w = baseWeights[i];
1329
+ if (resid > huberK) w *= huberK / resid;
1330
+ const leverage = Math.abs(p.x - xMedian) / xScale;
1331
+ w *= 1 / (1 + 0.14 * leverage + 0.015 * leverage * leverage);
1332
+ return Math.max(w, 1e-6);
1333
+ });
1334
+
1335
+ const nextFit = weightedPolynomialRegression(points, 1, nextWeights);
1336
+ fit = nextFit;
1337
+ weights = nextWeights;
1338
+ if (iter > 0) {
1339
+ const delta = Math.abs((fit.coefficients[0] ?? 0) - slope) + Math.abs((fit.coefficients[1] ?? 0) - intercept);
1340
+ if (delta < 1e-10) break;
1341
+ }
1342
+ slope = fit.coefficients[0] ?? slope;
1343
+ intercept = fit.coefficients[1] ?? intercept;
1344
+ }
1345
+
1346
+ const xMean = points.reduce((acc, p, i) => {
1347
+ const w = Math.max(0, Number.isFinite(weights?.[i]) ? weights[i] : 1);
1348
+ return acc + w * p.x;
1349
+ }, 0) / Math.max(1e-12, weights.reduce((a, b) => a + b, 0));
1350
+
1351
+ fit.xMean = xMean;
1352
+ fit.centerValue = evaluateFit(fit, xMean);
1353
+ fit.centerSlope = fit.coefficients[0] ?? 0;
1354
+ fit.slope = fit.coefficients[0] ?? 0;
1355
+ fit.intercept = fit.coefficients[1] ?? 0;
1356
+ fit.curvature = 0;
1357
+ fit.effectiveN = weights.reduce((a, b) => a + b, 0);
1358
+ fit.method = 'robust-linear';
1359
+ return fit;
1360
+ }
1361
+
1362
+ function evaluateFit(fit, x) {
1363
+ return evaluatePolynomial(fit?.coefficients || [0], x);
1364
+ }
1365
+
1366
+ function fitSlopeAt(fit, x) {
1367
+ return polynomialDerivative(fit?.coefficients || [0], x);
1368
+ }
1369
+
1370
+ function fitCenter(fit) {
1371
+ return fit?.xMean ?? 0;
1372
+ }
1373
+
1374
+ function fitSummaryValue(fit) {
1375
+ return fit?.centerValue ?? 0;
1376
+ }
1377
+
1378
+ function formatFitEquation(fit) {
1379
+ if (!fit?.coefficients?.length) return 'β€”';
1380
+ const coeffs = fit.coefficients;
1381
+ if (fit.degree === 1 && coeffs.length >= 2) {
1382
+ return `${coeffs[0].toFixed(4)}x + ${coeffs[1].toFixed(2)}`;
1383
+ }
1384
+ if (fit.degree >= 2 && coeffs.length >= 3) {
1385
+ return `${coeffs[0].toFixed(4)}xΒ² + ${coeffs[1].toFixed(4)}x + ${coeffs[2].toFixed(2)}`;
1386
+ }
1387
+ return coeffs.map(v => v.toFixed(4)).join(', ');
1388
+ }
1389
+
1390
+ function getEligibleModels() {
1391
+ return MODELS.filter(m => {
1392
+ const score = getMetricValue(m, activeBenchmark);
1393
+ return Number.isFinite(m.params) &&
1394
+ m.params >= MIN_PLOT_PARAMS &&
1395
+ score !== null &&
1396
+ score !== undefined &&
1397
+ Number.isFinite(score);
1398
+ });
1399
  }
1400
 
1401
  function getEligibleModels() {
 
1525
  }).join('');
1526
  }
1527
 
1528
+ function updateStats(fit, visibleCount, residuals, rawCount, binCount, rawFit) {
1529
+ document.getElementById('statSlope').textContent = fit.n >= 2 ? (fit.slope ?? fit.coefficients?.[0] ?? 0).toFixed(6) : 'β€”';
1530
+ document.getElementById('statIntercept').textContent = fit.n >= 2 ? (fit.intercept ?? fit.coefficients?.[1] ?? 0).toFixed(4) : 'β€”';
1531
+ document.getElementById('statMSE').textContent = fit.n >= 2 ? fit.mse.toFixed(2) : 'β€”';
1532
  document.getElementById('statRMSE').textContent = fit.n >= 2 ? fit.rmse.toFixed(2) : 'β€”';
1533
  document.getElementById('statR2').textContent = fit.n >= 2 ? fit.r2.toFixed(3) : 'β€”';
1534
 
1535
+ document.getElementById('countBadge').textContent = `${visibleCount} visible / ${binCount} bins`;
1536
  document.getElementById('infoBenchmark').textContent = BENCHMARK_NAMES[activeBenchmark] || activeBenchmark;
1537
  document.getElementById('infoCount').textContent = String(visibleCount);
1538
+ document.getElementById('infoFitCount').textContent = String(binCount);
1539
 
1540
  const absMean = residuals.length ? residuals.reduce((a, b) => a + Math.abs(b), 0) / residuals.length : 0;
1541
  const mean = residuals.length ? residuals.reduce((a, b) => a + b, 0) / residuals.length : 0;
 
1546
 
1547
  const orgCount = new Set(getVisibleModels().map(m => m.org)).size;
1548
  document.getElementById('infoOrgs').textContent = `${orgCount} orgs`;
1549
+ document.getElementById('infoMatchRate').textContent = `${visibleCount}/${rawCount}`;
1550
  document.getElementById('infoMode').textContent = activeMode === 'mobile' ? 'Mobile' : 'Computer';
1551
 
1552
  document.getElementById('fitNote').textContent =
1553
+ `Fit uses ${rawCount} eligible models collapsed into ${binCount} size bins for ${BENCHMARK_NAMES[activeBenchmark] || activeBenchmark}. Raw-point RMSE is ${rawFit.n >= 2 ? rawFit.rmse.toFixed(2) : 'β€”'}; binned fit RMSE is ${fit.n >= 2 ? fit.rmse.toFixed(2) : 'β€”'}. Search and org filters only affect visibility.`;
1554
  }
1555
 
1556
  function render() {
 
1593
 
1594
  document.getElementById('chartTitle').textContent = chartTitleMap[activeBenchmark] || 'Regression vs log parameters';
1595
  document.getElementById('chartSub').textContent =
1596
+ 'Binned linear regression on log10(parameters) for the selected benchmark. Each size bin contributes one equally weighted sample, so the line tracks the average score by parameter region instead of point density.';
1597
 
1598
  if (fitData.length < 2) {
1599
  document.getElementById('chartSub').textContent = 'Need at least 2 eligible models to fit a line.';
1600
+ document.getElementById('countBadge').textContent = `${visibleModels.length} visible / ${fitData.length} bins`;
1601
  document.getElementById('infoBenchmark').textContent = BENCHMARK_NAMES[activeBenchmark] || activeBenchmark;
1602
  document.getElementById('infoCount').textContent = String(visibleModels.length);
1603
  document.getElementById('infoFitCount').textContent = String(fitData.length);
 
1611
  return;
1612
  }
1613
 
1614
+ const fitSamples = buildBinnedFitSamples(fitData);
1615
+ const fit = robustLinearRegression(fitSamples);
1616
+ const rawFit = weightedLinearRegression(fitData);
1617
 
1618
+ const visibleResiduals = data.map(d => d.y - evaluateFit(fit, d.x));
1619
  data.forEach((d, i) => {
1620
  d.residual = visibleResiduals[i];
1621
+ d.prediction = evaluateFit(fit, d.x);
1622
  });
1623
 
1624
+ updateStats(fit, data.length, visibleResiduals, fitData.length, fitSamples.length, rawFit);
1625
  updatePredictionPanel();
1626
 
1627
  const xMin = Math.min(...fitData.map(d => d.x));
 
1633
  for (let i = 0; i <= lineSteps; i += 1) {
1634
  const t = i / lineSteps;
1635
  const rawX = (xMin - xPad) + ((xMax + xPad) - (xMin - xPad)) * t;
1636
+ regressionLine.push({ x: rawX, y: evaluateFit(fit, rawX) });
1637
  }
1638
 
1639
  const yMin = Math.min(...fitData.map(d => d.y), ...regressionLine.map(p => p.y)) - 1.6;
 
1728
  title: (items) => items[0]?.raw?.name || '',
1729
  label: (item) => {
1730
  const d = item.raw;
1731
+ const predicted = evaluateFit(fit, d.x);
1732
  const resid = d.y - predicted;
1733
  return [
1734
  `Org: ${d.org}`,
1735
  `Params: ${fmtParams(d.params)} (${d.params.toLocaleString()})`,
1736
  `Score: ${d.score.toFixed(2)}%`,
1737
  `Residual: ${resid >= 0 ? '+' : ''}${resid.toFixed(2)} pts`,
1738
+ `Predicted (robust line): ${predicted.toFixed(2)}%`
1739
  ];
1740
  },
1741
  afterLabel: (item) => {