Spaces:
Running
Running
Commit ·
1d7a096
1
Parent(s): ff74e24
Simplify by-query-type leaderboard view
Browse files- index.html +173 -18
index.html
CHANGED
|
@@ -593,11 +593,19 @@
|
|
| 593 |
.controls {
|
| 594 |
display: grid;
|
| 595 |
gap: 12px;
|
| 596 |
-
grid-template-columns: repeat(
|
| 597 |
padding: 16px;
|
| 598 |
margin-bottom: 14px;
|
| 599 |
}
|
| 600 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 601 |
.toggle-row {
|
| 602 |
display: flex;
|
| 603 |
justify-content: space-between;
|
|
@@ -607,6 +615,10 @@
|
|
| 607 |
flex-wrap: wrap;
|
| 608 |
}
|
| 609 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 610 |
.toggle {
|
| 611 |
display: inline-flex;
|
| 612 |
align-items: center;
|
|
@@ -692,6 +704,58 @@
|
|
| 692 |
line-height: 1.5;
|
| 693 |
}
|
| 694 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 695 |
.markdown-note {
|
| 696 |
color: var(--muted);
|
| 697 |
font-size: 1rem;
|
|
@@ -918,7 +982,7 @@
|
|
| 918 |
<select id="sort-key">
|
| 919 |
<option value="weighted_total_content_score">Weighted score</option>
|
| 920 |
<option value="unweighted_mean_score">Unweighted mean</option>
|
| 921 |
-
<option value="percentage_ge_sources_in_se_sources">% In SE</option>
|
| 922 |
</select>
|
| 923 |
</label>
|
| 924 |
<label>
|
|
@@ -927,7 +991,11 @@
|
|
| 927 |
</label>
|
| 928 |
</section>
|
| 929 |
|
| 930 |
-
<div class="
|
|
|
|
|
|
|
|
|
|
|
|
|
| 931 |
<label class="toggle">
|
| 932 |
<input id="show-dimensions" type="checkbox">
|
| 933 |
<span>Show dimension scores</span>
|
|
@@ -1017,6 +1085,7 @@
|
|
| 1017 |
payload: null,
|
| 1018 |
currentView: "overall",
|
| 1019 |
showDimensions: false,
|
|
|
|
| 1020 |
};
|
| 1021 |
|
| 1022 |
const baseColumns = [
|
|
@@ -1061,10 +1130,21 @@
|
|
| 1061 |
"deepseek-chat-tavily",
|
| 1062 |
]);
|
| 1063 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1064 |
const viewSelect = document.getElementById("view-select");
|
| 1065 |
const sortKey = document.getElementById("sort-key");
|
| 1066 |
const searchInput = document.getElementById("search-input");
|
| 1067 |
const showDimensionsInput = document.getElementById("show-dimensions");
|
|
|
|
|
|
|
|
|
|
| 1068 |
const tableTitle = document.getElementById("table-title");
|
| 1069 |
const tableCount = document.getElementById("table-count");
|
| 1070 |
const tableHead = document.querySelector("#leaderboard-table thead");
|
|
@@ -1114,6 +1194,53 @@
|
|
| 1114 |
return !isDeepSeekStudyModel(modelName);
|
| 1115 |
}
|
| 1116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1117 |
function updateTopStats(payload) {
|
| 1118 |
const overall = (payload.overall || []).filter((row) => isMainBoardModel(row.model_name));
|
| 1119 |
const byType = (payload.by_query_type || []).filter((row) => isMainBoardModel(row.model_name));
|
|
@@ -1239,6 +1366,10 @@
|
|
| 1239 |
|
| 1240 |
rows = rows.filter((row) => isMainBoardModel(row.model_name));
|
| 1241 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1242 |
const q = searchInput.value.trim().toLowerCase();
|
| 1243 |
if (q) {
|
| 1244 |
rows = rows.filter((row) => String(row.model_name || "").toLowerCase().includes(q));
|
|
@@ -1258,27 +1389,43 @@
|
|
| 1258 |
|
| 1259 |
function renderTable() {
|
| 1260 |
const rows = getRows();
|
| 1261 |
-
const cols =
|
| 1262 |
-
|
| 1263 |
-
|
| 1264 |
-
|
| 1265 |
-
|
| 1266 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1267 |
tableCount.textContent = `${rows.length} rows`;
|
| 1268 |
tableHead.innerHTML = `<tr>${cols.map((key) => `<th>${labels[key] || key}</th>`).join("")}</tr>`;
|
| 1269 |
-
|
| 1270 |
-
|
| 1271 |
-
|
| 1272 |
-
|
| 1273 |
-
|
| 1274 |
-
|
| 1275 |
-
|
| 1276 |
-
|
| 1277 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1278 |
}
|
| 1279 |
|
| 1280 |
function applyPayload(payload) {
|
| 1281 |
state.payload = payload;
|
|
|
|
|
|
|
| 1282 |
updateTopStats(payload);
|
| 1283 |
renderFindings(payload);
|
| 1284 |
renderDeepSeekStudy(payload);
|
|
@@ -1306,6 +1453,10 @@
|
|
| 1306 |
tab.addEventListener("click", () => {
|
| 1307 |
state.currentView = tab.dataset.view;
|
| 1308 |
viewSelect.value = state.currentView;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1309 |
document.querySelectorAll(".tab").forEach((node) => node.classList.toggle("active", node === tab));
|
| 1310 |
renderTable();
|
| 1311 |
});
|
|
@@ -1313,6 +1464,10 @@
|
|
| 1313 |
|
| 1314 |
viewSelect.addEventListener("change", (event) => {
|
| 1315 |
state.currentView = event.target.value;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1316 |
document.querySelectorAll(".tab").forEach((node) => node.classList.toggle("active", node.dataset.view === state.currentView));
|
| 1317 |
renderTable();
|
| 1318 |
});
|
|
|
|
| 593 |
.controls {
|
| 594 |
display: grid;
|
| 595 |
gap: 12px;
|
| 596 |
+
grid-template-columns: repeat(4, minmax(0, 1fr));
|
| 597 |
padding: 16px;
|
| 598 |
margin-bottom: 14px;
|
| 599 |
}
|
| 600 |
|
| 601 |
+
.query-filter-wrap {
|
| 602 |
+
display: none;
|
| 603 |
+
}
|
| 604 |
+
|
| 605 |
+
.query-filter-wrap.visible {
|
| 606 |
+
display: block;
|
| 607 |
+
}
|
| 608 |
+
|
| 609 |
.toggle-row {
|
| 610 |
display: flex;
|
| 611 |
justify-content: space-between;
|
|
|
|
| 615 |
flex-wrap: wrap;
|
| 616 |
}
|
| 617 |
|
| 618 |
+
.toggle-row.hidden {
|
| 619 |
+
display: none;
|
| 620 |
+
}
|
| 621 |
+
|
| 622 |
.toggle {
|
| 623 |
display: inline-flex;
|
| 624 |
align-items: center;
|
|
|
|
| 704 |
line-height: 1.5;
|
| 705 |
}
|
| 706 |
|
| 707 |
+
.dataset-legend {
|
| 708 |
+
display: none;
|
| 709 |
+
gap: 10px;
|
| 710 |
+
flex-wrap: wrap;
|
| 711 |
+
margin: 0 0 14px;
|
| 712 |
+
}
|
| 713 |
+
|
| 714 |
+
.dataset-legend.visible {
|
| 715 |
+
display: flex;
|
| 716 |
+
}
|
| 717 |
+
|
| 718 |
+
.dataset-chip {
|
| 719 |
+
display: inline-flex;
|
| 720 |
+
align-items: center;
|
| 721 |
+
gap: 8px;
|
| 722 |
+
padding: 8px 12px;
|
| 723 |
+
border-radius: 999px;
|
| 724 |
+
background: rgba(255,255,255,0.78);
|
| 725 |
+
border: 1px solid rgba(23, 32, 51, 0.1);
|
| 726 |
+
color: var(--muted);
|
| 727 |
+
font-size: 0.93rem;
|
| 728 |
+
line-height: 1.4;
|
| 729 |
+
}
|
| 730 |
+
|
| 731 |
+
.dataset-chip strong {
|
| 732 |
+
color: var(--ink);
|
| 733 |
+
font-size: 0.94rem;
|
| 734 |
+
}
|
| 735 |
+
|
| 736 |
+
.dataset-chip.active {
|
| 737 |
+
background: var(--ink);
|
| 738 |
+
border-color: var(--ink);
|
| 739 |
+
color: rgba(255,255,255,0.85);
|
| 740 |
+
}
|
| 741 |
+
|
| 742 |
+
.dataset-chip.active strong {
|
| 743 |
+
color: white;
|
| 744 |
+
}
|
| 745 |
+
|
| 746 |
+
.sort-option-overall-only.hidden {
|
| 747 |
+
display: none;
|
| 748 |
+
}
|
| 749 |
+
|
| 750 |
+
.group-row td {
|
| 751 |
+
background: rgba(23, 32, 51, 0.05);
|
| 752 |
+
color: var(--ink);
|
| 753 |
+
font-weight: 700;
|
| 754 |
+
font-size: 0.9rem;
|
| 755 |
+
letter-spacing: 0.02em;
|
| 756 |
+
border-bottom: 1px solid rgba(23, 32, 51, 0.08);
|
| 757 |
+
}
|
| 758 |
+
|
| 759 |
.markdown-note {
|
| 760 |
color: var(--muted);
|
| 761 |
font-size: 1rem;
|
|
|
|
| 982 |
<select id="sort-key">
|
| 983 |
<option value="weighted_total_content_score">Weighted score</option>
|
| 984 |
<option value="unweighted_mean_score">Unweighted mean</option>
|
| 985 |
+
<option class="sort-option-overall-only" value="percentage_ge_sources_in_se_sources">% In SE</option>
|
| 986 |
</select>
|
| 987 |
</label>
|
| 988 |
<label>
|
|
|
|
| 991 |
</label>
|
| 992 |
</section>
|
| 993 |
|
| 994 |
+
<div class="query-filter-wrap" id="query-filter-wrap">
|
| 995 |
+
<div class="dataset-legend" id="dataset-legend"></div>
|
| 996 |
+
</div>
|
| 997 |
+
|
| 998 |
+
<div class="toggle-row" id="toggle-row">
|
| 999 |
<label class="toggle">
|
| 1000 |
<input id="show-dimensions" type="checkbox">
|
| 1001 |
<span>Show dimension scores</span>
|
|
|
|
| 1085 |
payload: null,
|
| 1086 |
currentView: "overall",
|
| 1087 |
showDimensions: false,
|
| 1088 |
+
selectedQueryType: "all",
|
| 1089 |
};
|
| 1090 |
|
| 1091 |
const baseColumns = [
|
|
|
|
| 1130 |
"deepseek-chat-tavily",
|
| 1131 |
]);
|
| 1132 |
|
| 1133 |
+
const QUERY_TYPE_META = {
|
| 1134 |
+
"DebateQA": "Argument",
|
| 1135 |
+
"HotpotQA": "Informational",
|
| 1136 |
+
"Pinocchios": "Factual",
|
| 1137 |
+
"QuoraQuestions": "Social",
|
| 1138 |
+
"VA-COS NLQ": "Shopping",
|
| 1139 |
+
};
|
| 1140 |
+
|
| 1141 |
const viewSelect = document.getElementById("view-select");
|
| 1142 |
const sortKey = document.getElementById("sort-key");
|
| 1143 |
const searchInput = document.getElementById("search-input");
|
| 1144 |
const showDimensionsInput = document.getElementById("show-dimensions");
|
| 1145 |
+
const toggleRow = document.getElementById("toggle-row");
|
| 1146 |
+
const queryFilterWrap = document.getElementById("query-filter-wrap");
|
| 1147 |
+
const datasetLegend = document.getElementById("dataset-legend");
|
| 1148 |
const tableTitle = document.getElementById("table-title");
|
| 1149 |
const tableCount = document.getElementById("table-count");
|
| 1150 |
const tableHead = document.querySelector("#leaderboard-table thead");
|
|
|
|
| 1194 |
return !isDeepSeekStudyModel(modelName);
|
| 1195 |
}
|
| 1196 |
|
| 1197 |
+
function formatQueryTypeLabel(queryType) {
|
| 1198 |
+
const tag = QUERY_TYPE_META[queryType];
|
| 1199 |
+
return tag ? `${queryType} (${tag})` : String(queryType || "-");
|
| 1200 |
+
}
|
| 1201 |
+
|
| 1202 |
+
function updateQueryTypeControls() {
|
| 1203 |
+
const visible = state.currentView === "by_query_type";
|
| 1204 |
+
queryFilterWrap.classList.toggle("visible", visible);
|
| 1205 |
+
datasetLegend.classList.toggle("visible", visible);
|
| 1206 |
+
toggleRow.classList.toggle("hidden", visible);
|
| 1207 |
+
for (const option of document.querySelectorAll(".sort-option-overall-only")) {
|
| 1208 |
+
option.classList.toggle("hidden", visible);
|
| 1209 |
+
}
|
| 1210 |
+
if (visible && sortKey.value === "percentage_ge_sources_in_se_sources") {
|
| 1211 |
+
sortKey.value = "weighted_total_content_score";
|
| 1212 |
+
}
|
| 1213 |
+
if (visible) {
|
| 1214 |
+
state.showDimensions = false;
|
| 1215 |
+
showDimensionsInput.checked = false;
|
| 1216 |
+
}
|
| 1217 |
+
}
|
| 1218 |
+
|
| 1219 |
+
function populateQueryTypeFilter(payload) {
|
| 1220 |
+
const values = Array.from(new Set((payload.by_query_type || []).map((row) => row.query_type).filter(Boolean)));
|
| 1221 |
+
const ordered = Object.keys(QUERY_TYPE_META).filter((key) => values.includes(key)).concat(values.filter((key) => !(key in QUERY_TYPE_META)));
|
| 1222 |
+
if (!ordered.length) {
|
| 1223 |
+
datasetLegend.innerHTML = "";
|
| 1224 |
+
return;
|
| 1225 |
+
}
|
| 1226 |
+
if (state.selectedQueryType === "all" || !ordered.includes(state.selectedQueryType)) {
|
| 1227 |
+
state.selectedQueryType = ordered[0];
|
| 1228 |
+
}
|
| 1229 |
+
datasetLegend.innerHTML = ordered.map((value) => `
|
| 1230 |
+
<button class="dataset-chip ${value === state.selectedQueryType ? 'active' : ''}" data-query-type="${value}" type="button">
|
| 1231 |
+
<strong>${value}</strong>
|
| 1232 |
+
<span>${QUERY_TYPE_META[value] || ''}</span>
|
| 1233 |
+
</button>
|
| 1234 |
+
`).join('');
|
| 1235 |
+
for (const button of datasetLegend.querySelectorAll('[data-query-type]')) {
|
| 1236 |
+
button.addEventListener('click', () => {
|
| 1237 |
+
state.selectedQueryType = button.dataset.queryType;
|
| 1238 |
+
populateQueryTypeFilter(state.payload);
|
| 1239 |
+
renderTable();
|
| 1240 |
+
});
|
| 1241 |
+
}
|
| 1242 |
+
}
|
| 1243 |
+
|
| 1244 |
function updateTopStats(payload) {
|
| 1245 |
const overall = (payload.overall || []).filter((row) => isMainBoardModel(row.model_name));
|
| 1246 |
const byType = (payload.by_query_type || []).filter((row) => isMainBoardModel(row.model_name));
|
|
|
|
| 1366 |
|
| 1367 |
rows = rows.filter((row) => isMainBoardModel(row.model_name));
|
| 1368 |
|
| 1369 |
+
if (state.currentView === "by_query_type") {
|
| 1370 |
+
rows = rows.filter((row) => row.query_type === state.selectedQueryType);
|
| 1371 |
+
}
|
| 1372 |
+
|
| 1373 |
const q = searchInput.value.trim().toLowerCase();
|
| 1374 |
if (q) {
|
| 1375 |
rows = rows.filter((row) => String(row.model_name || "").toLowerCase().includes(q));
|
|
|
|
| 1389 |
|
| 1390 |
function renderTable() {
|
| 1391 |
const rows = getRows();
|
| 1392 |
+
const cols = state.currentView === "overall"
|
| 1393 |
+
? [
|
| 1394 |
+
...baseColumns.filter((key) => key !== "query_type"),
|
| 1395 |
+
...(state.showDimensions ? dimensionColumns : []),
|
| 1396 |
+
]
|
| 1397 |
+
: [
|
| 1398 |
+
"model_name",
|
| 1399 |
+
"weighted_total_content_score",
|
| 1400 |
+
"unweighted_mean_score",
|
| 1401 |
+
];
|
| 1402 |
+
|
| 1403 |
+
if (state.currentView === "overall") {
|
| 1404 |
+
tableTitle.textContent = "Overall ranking";
|
| 1405 |
+
} else {
|
| 1406 |
+
tableTitle.textContent = `Ranking for ${formatQueryTypeLabel(state.selectedQueryType)}`;
|
| 1407 |
+
}
|
| 1408 |
tableCount.textContent = `${rows.length} rows`;
|
| 1409 |
tableHead.innerHTML = `<tr>${cols.map((key) => `<th>${labels[key] || key}</th>`).join("")}</tr>`;
|
| 1410 |
+
const tableRows = [];
|
| 1411 |
+
for (const row of rows) {
|
| 1412 |
+
tableRows.push(`
|
| 1413 |
+
<tr>
|
| 1414 |
+
${cols.map((key) => {
|
| 1415 |
+
const val = row[key];
|
| 1416 |
+
const rendered = typeof val === "number" ? formatNumber(val) : (val ?? "-");
|
| 1417 |
+
return `<td>${rendered}</td>`;
|
| 1418 |
+
}).join("")}
|
| 1419 |
+
</tr>
|
| 1420 |
+
`);
|
| 1421 |
+
}
|
| 1422 |
+
tableBody.innerHTML = tableRows.join("");
|
| 1423 |
}
|
| 1424 |
|
| 1425 |
function applyPayload(payload) {
|
| 1426 |
state.payload = payload;
|
| 1427 |
+
populateQueryTypeFilter(payload);
|
| 1428 |
+
updateQueryTypeControls();
|
| 1429 |
updateTopStats(payload);
|
| 1430 |
renderFindings(payload);
|
| 1431 |
renderDeepSeekStudy(payload);
|
|
|
|
| 1453 |
tab.addEventListener("click", () => {
|
| 1454 |
state.currentView = tab.dataset.view;
|
| 1455 |
viewSelect.value = state.currentView;
|
| 1456 |
+
if (state.currentView === "by_query_type" && state.payload) {
|
| 1457 |
+
populateQueryTypeFilter(state.payload);
|
| 1458 |
+
}
|
| 1459 |
+
updateQueryTypeControls();
|
| 1460 |
document.querySelectorAll(".tab").forEach((node) => node.classList.toggle("active", node === tab));
|
| 1461 |
renderTable();
|
| 1462 |
});
|
|
|
|
| 1464 |
|
| 1465 |
viewSelect.addEventListener("change", (event) => {
|
| 1466 |
state.currentView = event.target.value;
|
| 1467 |
+
if (state.currentView === "by_query_type" && state.payload) {
|
| 1468 |
+
populateQueryTypeFilter(state.payload);
|
| 1469 |
+
}
|
| 1470 |
+
updateQueryTypeControls();
|
| 1471 |
document.querySelectorAll(".tab").forEach((node) => node.classList.toggle("active", node.dataset.view === state.currentView));
|
| 1472 |
renderTable();
|
| 1473 |
});
|