Kristinx0351 commited on
Commit
1444582
·
1 Parent(s): 9e30538

Remove overview and highlights section

Browse files
Files changed (1) hide show
  1. index.html +0 -110
index.html CHANGED
@@ -868,46 +868,6 @@
868
  </article>
869
  </section>
870
 
871
- <section class="grid-2 plain-section" id="findings">
872
- <article>
873
- <div class="section-head">
874
- <div>
875
- <div class="section-kicker">Work overview</div>
876
- <h2 style="font-size:1.45rem;">What this work measures</h2>
877
- </div>
878
- </div>
879
- <div class="plain-list">
880
- <div class="plain-item">
881
- <strong>Source quality dimensions</strong>
882
- <span>Each cited source is judged on semantic relevance, factual accuracy, freshness, objectivity, layout/ad density, accountability, transparency, and authority.</span>
883
- </div>
884
- <div class="plain-item">
885
- <strong>Main leaderboard target</strong>
886
- <span>The leaderboard emphasizes quality of referenced web sources, because a model can produce fluent answers while still relying on weak or misleading citations.</span>
887
- </div>
888
- <div class="plain-item">
889
- <strong>Why this matters</strong>
890
- <span>For web-grounded assistants, citation quality is a first-class property. A stronger source profile should make answers more trustworthy and auditable.</span>
891
- </div>
892
- </div>
893
- </article>
894
-
895
- <article>
896
- <div class="section-head">
897
- <div>
898
- <div class="section-kicker">Simple conclusions</div>
899
- <h2 style="font-size:1.45rem;">Current board highlights</h2>
900
- </div>
901
- </div>
902
- <div class="findings" id="findings-list">
903
- <div class="finding">
904
- <strong>Load leaderboard data to see findings</strong>
905
- <span>The page will summarize the top models, hardest query types, and search-overlap patterns from the current artifact.</span>
906
- </div>
907
- </div>
908
- </article>
909
- </section>
910
-
911
  <section class="panel section" id="leaderboard">
912
  <div class="section-head">
913
  <div>
@@ -1133,7 +1093,6 @@
1133
  const tableCount = document.getElementById("table-count");
1134
  const tableHead = document.querySelector("#leaderboard-table thead");
1135
  const tableBody = document.querySelector("#leaderboard-table tbody");
1136
- const findingsList = document.getElementById("findings-list");
1137
  const deepseekFindings = document.getElementById("deepseek-findings");
1138
  const deepseekTableHead = document.querySelector("#deepseek-table thead");
1139
  const deepseekTableBody = document.querySelector("#deepseek-table tbody");
@@ -1241,74 +1200,6 @@
1241
  tableGenerated.textContent = `Generated at ${formatPacificTimestamp(payload.metadata?.generated_at)}`;
1242
  }
1243
 
1244
- function computeFindings(payload) {
1245
- const overall = (payload.overall || []).filter((row) => isMainBoardModel(row.model_name));
1246
- const byType = (payload.by_query_type || []).filter((row) => isMainBoardModel(row.model_name));
1247
- if (!overall.length || !byType.length) {
1248
- return [{
1249
- title: "No findings available",
1250
- body: "Load a leaderboard artifact to generate summary findings."
1251
- }];
1252
- }
1253
-
1254
- const top = overall[0];
1255
- const bestFreshness = [...overall].sort((a, b) => (b.freshness || 0) - (a.freshness || 0))[0];
1256
- const bestOverlap = [...overall]
1257
- .filter((row) => row.percentage_ge_sources_in_se_sources !== null && row.percentage_ge_sources_in_se_sources !== undefined)
1258
- .sort((a, b) => b.percentage_ge_sources_in_se_sources - a.percentage_ge_sources_in_se_sources)[0];
1259
- const qualityLeaders = {
1260
- transparency: [...overall].sort((a, b) => (b.transparency || 0) - (a.transparency || 0))[0],
1261
- authority: [...overall].sort((a, b) => (b.authority || 0) - (a.authority || 0))[0],
1262
- accountability: [...overall].sort((a, b) => (b.accountability || 0) - (a.accountability || 0))[0],
1263
- };
1264
-
1265
- const groupedByType = new Map();
1266
- for (const row of byType) {
1267
- if (!groupedByType.has(row.query_type)) groupedByType.set(row.query_type, []);
1268
- groupedByType.get(row.query_type).push(row);
1269
- }
1270
-
1271
- const typeAverages = Array.from(groupedByType.entries()).map(([queryType, rows]) => {
1272
- const avg = rows.reduce((sum, row) => sum + (row.weighted_total_content_score || 0), 0) / rows.length;
1273
- return { queryType, avg };
1274
- }).sort((a, b) => a.avg - b.avg);
1275
-
1276
- const hardest = typeAverages[0];
1277
- const easiest = typeAverages[typeAverages.length - 1];
1278
-
1279
- return [
1280
- {
1281
- title: "Overall source quality remains meaningfully separated across systems",
1282
- body: `${formatModelName(top.model_name)} is the current overall leader with a weighted score of ${formatNumber(top.weighted_total_content_score)}. The spread across the current board suggests that citation quality is not saturated: systems still differ substantially once source relevance, accuracy, transparency, and authority are scored directly.`
1283
- },
1284
- {
1285
- title: "Question type matters, and multi-hop fact synthesis is still the hardest slice",
1286
- body: `Across the current artifact, ${hardest.queryType} has the lowest average weighted score (${formatNumber(hardest.avg)}), while ${easiest.queryType} is the easiest (${formatNumber(easiest.avg)}). This matches the broader SourceBench framing that harder query types expose source-selection weaknesses even when answers may still look fluent.`
1287
- },
1288
- {
1289
- title: "High search overlap is not the same thing as high source quality",
1290
- body: bestOverlap
1291
- ? `${formatModelName(bestOverlap.model_name)} has the highest visible search overlap at ${formatNumber(bestOverlap.percentage_ge_sources_in_se_sources)}% In SE, but the best overall weighted score still belongs to ${formatModelName(top.model_name)}. This mirrors the paper's emphasis that leaderboard quality should not be reduced to overlap with search results alone.`
1292
- : "The current artifact includes quality metrics beyond simple overlap with search-engine results, which is one of the main design points of SourceBench."
1293
- },
1294
- {
1295
- title: "Dimension scores reveal different strengths behind similar overall rankings",
1296
- body: `${formatModelName(bestFreshness.model_name)} currently leads freshness at ${formatNumber(bestFreshness.freshness)}, while ${formatModelName(qualityLeaders.transparency.model_name)}, ${formatModelName(qualityLeaders.authority.model_name)}, and ${formatModelName(qualityLeaders.accountability.model_name)} lead key trust-related dimensions such as transparency, authority, and accountability. These per-dimension columns make it easier to see why two systems with similar overall scores can still have very different citation profiles.`
1297
- }
1298
- ];
1299
- }
1300
-
1301
- function renderFindings(payload) {
1302
- const findings = computeFindings(payload);
1303
- findingsList.innerHTML = "";
1304
- for (const finding of findings) {
1305
- const div = document.createElement("div");
1306
- div.className = "finding";
1307
- div.innerHTML = `<strong>${finding.title}</strong><span>${finding.body}</span>`;
1308
- findingsList.appendChild(div);
1309
- }
1310
- }
1311
-
1312
  function renderDeepSeekStudy(payload) {
1313
  const rows = (payload.overall || [])
1314
  .filter((row) => typeof row.model_name === "string" && row.model_name.startsWith("deepseek"));
@@ -1428,7 +1319,6 @@
1428
  populateQueryTypeFilter(payload);
1429
  updateQueryTypeControls();
1430
  updateTopStats(payload);
1431
- renderFindings(payload);
1432
  renderDeepSeekStudy(payload);
1433
  renderTable();
1434
  setLoadStatus("Leaderboard data loaded.");
 
868
  </article>
869
  </section>
870
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
871
  <section class="panel section" id="leaderboard">
872
  <div class="section-head">
873
  <div>
 
1093
  const tableCount = document.getElementById("table-count");
1094
  const tableHead = document.querySelector("#leaderboard-table thead");
1095
  const tableBody = document.querySelector("#leaderboard-table tbody");
 
1096
  const deepseekFindings = document.getElementById("deepseek-findings");
1097
  const deepseekTableHead = document.querySelector("#deepseek-table thead");
1098
  const deepseekTableBody = document.querySelector("#deepseek-table tbody");
 
1200
  tableGenerated.textContent = `Generated at ${formatPacificTimestamp(payload.metadata?.generated_at)}`;
1201
  }
1202
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1203
  function renderDeepSeekStudy(payload) {
1204
  const rows = (payload.overall || [])
1205
  .filter((row) => typeof row.model_name === "string" && row.model_name.startsWith("deepseek"));
 
1319
  populateQueryTypeFilter(payload);
1320
  updateQueryTypeControls();
1321
  updateTopStats(payload);
 
1322
  renderDeepSeekStudy(payload);
1323
  renderTable();
1324
  setLoadStatus("Leaderboard data loaded.");