Spaces:

WukLab
/

SourceBench-Leaderboard

Running

App Files Files Community

Kristinx0351 commited on 3 days ago

Commit

1444582

1 Parent(s): 9e30538

Remove overview and highlights section

Browse files

Files changed (1) hide show

index.html +0 -110

index.html CHANGED Viewed

@@ -868,46 +868,6 @@
       </article>
     </section>
-    <section class="grid-2 plain-section" id="findings">
-      <article>
-        <div class="section-head">
-          <div>
-            <div class="section-kicker">Work overview</div>
-            <h2 style="font-size:1.45rem;">What this work measures</h2>
-          </div>
-        </div>
-        <div class="plain-list">
-          <div class="plain-item">
-            <strong>Source quality dimensions</strong>
-            <span>Each cited source is judged on semantic relevance, factual accuracy, freshness, objectivity, layout/ad density, accountability, transparency, and authority.</span>
-          </div>
-          <div class="plain-item">
-            <strong>Main leaderboard target</strong>
-            <span>The leaderboard emphasizes quality of referenced web sources, because a model can produce fluent answers while still relying on weak or misleading citations.</span>
-          </div>
-          <div class="plain-item">
-            <strong>Why this matters</strong>
-            <span>For web-grounded assistants, citation quality is a first-class property. A stronger source profile should make answers more trustworthy and auditable.</span>
-          </div>
-        </div>
-      </article>
-      <article>
-        <div class="section-head">
-          <div>
-            <div class="section-kicker">Simple conclusions</div>
-            <h2 style="font-size:1.45rem;">Current board highlights</h2>
-          </div>
-        </div>
-        <div class="findings" id="findings-list">
-          <div class="finding">
-            <strong>Load leaderboard data to see findings</strong>
-            <span>The page will summarize the top models, hardest query types, and search-overlap patterns from the current artifact.</span>
-          </div>
-        </div>
-      </article>
-    </section>
     <section class="panel section" id="leaderboard">
       <div class="section-head">
         <div>
@@ -1133,7 +1093,6 @@
     const tableCount = document.getElementById("table-count");
     const tableHead = document.querySelector("#leaderboard-table thead");
     const tableBody = document.querySelector("#leaderboard-table tbody");
-    const findingsList = document.getElementById("findings-list");
     const deepseekFindings = document.getElementById("deepseek-findings");
     const deepseekTableHead = document.querySelector("#deepseek-table thead");
     const deepseekTableBody = document.querySelector("#deepseek-table tbody");
@@ -1241,74 +1200,6 @@
       tableGenerated.textContent = `Generated at ${formatPacificTimestamp(payload.metadata?.generated_at)}`;
     }
-    function computeFindings(payload) {
-      const overall = (payload.overall || []).filter((row) => isMainBoardModel(row.model_name));
-      const byType = (payload.by_query_type || []).filter((row) => isMainBoardModel(row.model_name));
-      if (!overall.length || !byType.length) {
-        return [{
-          title: "No findings available",
-          body: "Load a leaderboard artifact to generate summary findings."
-        }];
-      }
-      const top = overall[0];
-      const bestFreshness = [...overall].sort((a, b) => (b.freshness || 0) - (a.freshness || 0))[0];
-      const bestOverlap = [...overall]
-        .filter((row) => row.percentage_ge_sources_in_se_sources !== null && row.percentage_ge_sources_in_se_sources !== undefined)
-        .sort((a, b) => b.percentage_ge_sources_in_se_sources - a.percentage_ge_sources_in_se_sources)[0];
-      const qualityLeaders = {
-        transparency: [...overall].sort((a, b) => (b.transparency || 0) - (a.transparency || 0))[0],
-        authority: [...overall].sort((a, b) => (b.authority || 0) - (a.authority || 0))[0],
-        accountability: [...overall].sort((a, b) => (b.accountability || 0) - (a.accountability || 0))[0],
-      };
-      const groupedByType = new Map();
-      for (const row of byType) {
-        if (!groupedByType.has(row.query_type)) groupedByType.set(row.query_type, []);
-        groupedByType.get(row.query_type).push(row);
-      }
-      const typeAverages = Array.from(groupedByType.entries()).map(([queryType, rows]) => {
-        const avg = rows.reduce((sum, row) => sum + (row.weighted_total_content_score || 0), 0) / rows.length;
-        return { queryType, avg };
-      }).sort((a, b) => a.avg - b.avg);
-      const hardest = typeAverages[0];
-      const easiest = typeAverages[typeAverages.length - 1];
-      return [
-        {
-          title: "Overall source quality remains meaningfully separated across systems",
-          body: `${formatModelName(top.model_name)} is the current overall leader with a weighted score of ${formatNumber(top.weighted_total_content_score)}. The spread across the current board suggests that citation quality is not saturated: systems still differ substantially once source relevance, accuracy, transparency, and authority are scored directly.`
-        },
-        {
-          title: "Question type matters, and multi-hop fact synthesis is still the hardest slice",
-          body: `Across the current artifact, ${hardest.queryType} has the lowest average weighted score (${formatNumber(hardest.avg)}), while ${easiest.queryType} is the easiest (${formatNumber(easiest.avg)}). This matches the broader SourceBench framing that harder query types expose source-selection weaknesses even when answers may still look fluent.`
-        },
-        {
-          title: "High search overlap is not the same thing as high source quality",
-          body: bestOverlap
-            ? `${formatModelName(bestOverlap.model_name)} has the highest visible search overlap at ${formatNumber(bestOverlap.percentage_ge_sources_in_se_sources)}% In SE, but the best overall weighted score still belongs to ${formatModelName(top.model_name)}. This mirrors the paper's emphasis that leaderboard quality should not be reduced to overlap with search results alone.`
-            : "The current artifact includes quality metrics beyond simple overlap with search-engine results, which is one of the main design points of SourceBench."
-        },
-        {
-          title: "Dimension scores reveal different strengths behind similar overall rankings",
-          body: `${formatModelName(bestFreshness.model_name)} currently leads freshness at ${formatNumber(bestFreshness.freshness)}, while ${formatModelName(qualityLeaders.transparency.model_name)}, ${formatModelName(qualityLeaders.authority.model_name)}, and ${formatModelName(qualityLeaders.accountability.model_name)} lead key trust-related dimensions such as transparency, authority, and accountability. These per-dimension columns make it easier to see why two systems with similar overall scores can still have very different citation profiles.`
-        }
-      ];
-    }
-    function renderFindings(payload) {
-      const findings = computeFindings(payload);
-      findingsList.innerHTML = "";
-      for (const finding of findings) {
-        const div = document.createElement("div");
-        div.className = "finding";
-        div.innerHTML = `<strong>${finding.title}</strong><span>${finding.body}</span>`;
-        findingsList.appendChild(div);
-      }
-    }
     function renderDeepSeekStudy(payload) {
       const rows = (payload.overall || [])
         .filter((row) => typeof row.model_name === "string" && row.model_name.startsWith("deepseek"));
@@ -1428,7 +1319,6 @@
       populateQueryTypeFilter(payload);
       updateQueryTypeControls();
       updateTopStats(payload);
-      renderFindings(payload);
       renderDeepSeekStudy(payload);
       renderTable();
       setLoadStatus("Leaderboard data loaded.");

       </article>
     </section>
     <section class="panel section" id="leaderboard">
       <div class="section-head">
         <div>
     const tableCount = document.getElementById("table-count");
     const tableHead = document.querySelector("#leaderboard-table thead");
     const tableBody = document.querySelector("#leaderboard-table tbody");
     const deepseekFindings = document.getElementById("deepseek-findings");
     const deepseekTableHead = document.querySelector("#deepseek-table thead");
     const deepseekTableBody = document.querySelector("#deepseek-table tbody");
       tableGenerated.textContent = `Generated at ${formatPacificTimestamp(payload.metadata?.generated_at)}`;
     }
     function renderDeepSeekStudy(payload) {
       const rows = (payload.overall || [])
         .filter((row) => typeof row.model_name === "string" && row.model_name.startsWith("deepseek"));
       populateQueryTypeFilter(payload);
       updateQueryTypeControls();
       updateTopStats(payload);
       renderDeepSeekStudy(payload);
       renderTable();
       setLoadStatus("Leaderboard data loaded.");