Spaces:
Running
Running
Commit ·
ff74e24
1
Parent(s): 9cbc730
Update percent-in-SE metric and keep google-search on main board
Browse files- index.html +2 -2
- leaderboard_data.js +0 -0
- leaderboard_data.json +61 -61
index.html
CHANGED
|
@@ -887,7 +887,7 @@
|
|
| 887 |
<ul class="metric-core">
|
| 888 |
<li><strong>Weighted Score.</strong> The main leaderboard score, combining the judged dimensions into one overall source-quality metric.</li>
|
| 889 |
<li><strong>Unweighted Mean.</strong> The simple average across the judged dimension scores, without weighting.</li>
|
| 890 |
-
<li><strong>% In SE.</strong>
|
| 891 |
</ul>
|
| 892 |
<ul class="metric-defs">
|
| 893 |
<li><strong>Semantic Relevance.</strong> Whether the cited source is directly relevant to the query and answer.</li>
|
|
@@ -1111,7 +1111,7 @@
|
|
| 1111 |
|
| 1112 |
function isMainBoardModel(name) {
|
| 1113 |
const modelName = String(name || "");
|
| 1114 |
-
return !isDeepSeekStudyModel(modelName)
|
| 1115 |
}
|
| 1116 |
|
| 1117 |
function updateTopStats(payload) {
|
|
|
|
| 887 |
<ul class="metric-core">
|
| 888 |
<li><strong>Weighted Score.</strong> The main leaderboard score, combining the judged dimensions into one overall source-quality metric.</li>
|
| 889 |
<li><strong>Unweighted Mean.</strong> The simple average across the judged dimension scores, without weighting.</li>
|
| 890 |
+
<li><strong>% In SE.</strong> Percentage of model cited sources appearing in the first five pages of Google Search.</li>
|
| 891 |
</ul>
|
| 892 |
<ul class="metric-defs">
|
| 893 |
<li><strong>Semantic Relevance.</strong> Whether the cited source is directly relevant to the query and answer.</li>
|
|
|
|
| 1111 |
|
| 1112 |
function isMainBoardModel(name) {
|
| 1113 |
const modelName = String(name || "");
|
| 1114 |
+
return !isDeepSeekStudyModel(modelName);
|
| 1115 |
}
|
| 1116 |
|
| 1117 |
function updateTopStats(payload) {
|
leaderboard_data.js
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
leaderboard_data.json
CHANGED
|
@@ -129,7 +129,7 @@
|
|
| 129 |
"normalized_reciprocal_se_rank": 0.07444993783835872,
|
| 130 |
"reciprocal_se_rank": 0.027598407393197893,
|
| 131 |
"percentage_ge_sources_not_in_se_sources": 87.34177215189875,
|
| 132 |
-
"percentage_ge_sources_in_se_sources":
|
| 133 |
},
|
| 134 |
{
|
| 135 |
"model_name": "gpt-4o",
|
|
@@ -151,7 +151,7 @@
|
|
| 151 |
"normalized_reciprocal_se_rank": 0.12262328761538778,
|
| 152 |
"reciprocal_se_rank": 0.03917404241243542,
|
| 153 |
"percentage_ge_sources_not_in_se_sources": 74.82993197278913,
|
| 154 |
-
"percentage_ge_sources_in_se_sources":
|
| 155 |
},
|
| 156 |
{
|
| 157 |
"model_name": "grok-4.1-fast-non-reasoning",
|
|
@@ -173,7 +173,7 @@
|
|
| 173 |
"normalized_reciprocal_se_rank": 0.14088939196167136,
|
| 174 |
"reciprocal_se_rank": 0.043563227680110374,
|
| 175 |
"percentage_ge_sources_not_in_se_sources": 69.15584415584416,
|
| 176 |
-
"percentage_ge_sources_in_se_sources":
|
| 177 |
},
|
| 178 |
{
|
| 179 |
"model_name": "claude",
|
|
@@ -195,7 +195,7 @@
|
|
| 195 |
"normalized_reciprocal_se_rank": 0.1641520228549373,
|
| 196 |
"reciprocal_se_rank": 0.04915303461805529,
|
| 197 |
"percentage_ge_sources_not_in_se_sources": 62.934362934362916,
|
| 198 |
-
"percentage_ge_sources_in_se_sources": 37.
|
| 199 |
},
|
| 200 |
{
|
| 201 |
"model_name": "Gemini-3-Pro-Preview",
|
|
@@ -217,7 +217,7 @@
|
|
| 217 |
"normalized_reciprocal_se_rank": 0.08733801529571,
|
| 218 |
"reciprocal_se_rank": 0.03069529979193034,
|
| 219 |
"percentage_ge_sources_not_in_se_sources": 79.50450450450452,
|
| 220 |
-
"percentage_ge_sources_in_se_sources": 20.
|
| 221 |
},
|
| 222 |
{
|
| 223 |
"model_name": "Gemini-3-Flash-Preview",
|
|
@@ -239,7 +239,7 @@
|
|
| 239 |
"normalized_reciprocal_se_rank": 0.10236888641393292,
|
| 240 |
"reciprocal_se_rank": 0.03430708678393044,
|
| 241 |
"percentage_ge_sources_not_in_se_sources": 76.7543859649123,
|
| 242 |
-
"percentage_ge_sources_in_se_sources": 23.
|
| 243 |
},
|
| 244 |
{
|
| 245 |
"model_name": "Gemini-2.5-Flash-Preview",
|
|
@@ -261,7 +261,7 @@
|
|
| 261 |
"normalized_reciprocal_se_rank": null,
|
| 262 |
"reciprocal_se_rank": null,
|
| 263 |
"percentage_ge_sources_not_in_se_sources": null,
|
| 264 |
-
"percentage_ge_sources_in_se_sources":
|
| 265 |
},
|
| 266 |
{
|
| 267 |
"model_name": "Perplexity-Sonar-Pro",
|
|
@@ -283,7 +283,7 @@
|
|
| 283 |
"normalized_reciprocal_se_rank": 0.16776928125494017,
|
| 284 |
"reciprocal_se_rank": 0.050022230204463676,
|
| 285 |
"percentage_ge_sources_not_in_se_sources": 60.949868073878655,
|
| 286 |
-
"percentage_ge_sources_in_se_sources":
|
| 287 |
},
|
| 288 |
{
|
| 289 |
"model_name": "google-search",
|
|
@@ -327,7 +327,7 @@
|
|
| 327 |
"normalized_reciprocal_se_rank": 0.20452762803005117,
|
| 328 |
"reciprocal_se_rank": 0.058854939745084926,
|
| 329 |
"percentage_ge_sources_not_in_se_sources": 57.176470588235304,
|
| 330 |
-
"percentage_ge_sources_in_se_sources":
|
| 331 |
},
|
| 332 |
{
|
| 333 |
"model_name": "tavily",
|
|
@@ -349,7 +349,7 @@
|
|
| 349 |
"normalized_reciprocal_se_rank": 0.2743286477154668,
|
| 350 |
"reciprocal_se_rank": 0.07562751486366782,
|
| 351 |
"percentage_ge_sources_not_in_se_sources": 45.316455696202524,
|
| 352 |
-
"percentage_ge_sources_in_se_sources":
|
| 353 |
},
|
| 354 |
{
|
| 355 |
"model_name": "gensee",
|
|
@@ -371,7 +371,7 @@
|
|
| 371 |
"normalized_reciprocal_se_rank": 0.1299587450378004,
|
| 372 |
"reciprocal_se_rank": 0.040936688734811204,
|
| 373 |
"percentage_ge_sources_not_in_se_sources": 71.46596858638743,
|
| 374 |
-
"percentage_ge_sources_in_se_sources": 28.
|
| 375 |
},
|
| 376 |
{
|
| 377 |
"model_name": "deepseek-chat-gensee",
|
|
@@ -484,7 +484,7 @@
|
|
| 484 |
"normalized_reciprocal_se_rank": 0.024647681783897292,
|
| 485 |
"reciprocal_se_rank": 0.015631360428654928,
|
| 486 |
"percentage_ge_sources_not_in_se_sources": 89.41176470588235,
|
| 487 |
-
"percentage_ge_sources_in_se_sources":
|
| 488 |
},
|
| 489 |
{
|
| 490 |
"model_name": "gpt-5",
|
|
@@ -507,7 +507,7 @@
|
|
| 507 |
"normalized_reciprocal_se_rank": 0.24588844317711944,
|
| 508 |
"reciprocal_se_rank": 0.06879358221974471,
|
| 509 |
"percentage_ge_sources_not_in_se_sources": 71.05263157894737,
|
| 510 |
-
"percentage_ge_sources_in_se_sources":
|
| 511 |
},
|
| 512 |
{
|
| 513 |
"model_name": "gpt-5",
|
|
@@ -530,7 +530,7 @@
|
|
| 530 |
"normalized_reciprocal_se_rank": 0.16050373868555687,
|
| 531 |
"reciprocal_se_rank": 0.048276383810364386,
|
| 532 |
"percentage_ge_sources_not_in_se_sources": 74.54545454545455,
|
| 533 |
-
"percentage_ge_sources_in_se_sources":
|
| 534 |
},
|
| 535 |
{
|
| 536 |
"model_name": "gpt-5",
|
|
@@ -553,7 +553,7 @@
|
|
| 553 |
"normalized_reciprocal_se_rank": 0.04254727146439185,
|
| 554 |
"reciprocal_se_rank": 0.019932475424696096,
|
| 555 |
"percentage_ge_sources_not_in_se_sources": 91.37931034482759,
|
| 556 |
-
"percentage_ge_sources_in_se_sources":
|
| 557 |
},
|
| 558 |
{
|
| 559 |
"model_name": "gpt-5",
|
|
@@ -576,7 +576,7 @@
|
|
| 576 |
"normalized_reciprocal_se_rank": 0.0098989898989899,
|
| 577 |
"reciprocal_se_rank": 0.012087378640776695,
|
| 578 |
"percentage_ge_sources_not_in_se_sources": 98.75,
|
| 579 |
-
"percentage_ge_sources_in_se_sources":
|
| 580 |
},
|
| 581 |
{
|
| 582 |
"model_name": "gpt-4o",
|
|
@@ -599,7 +599,7 @@
|
|
| 599 |
"normalized_reciprocal_se_rank": 0.12281919725976252,
|
| 600 |
"reciprocal_se_rank": 0.03922111778814682,
|
| 601 |
"percentage_ge_sources_not_in_se_sources": 71.7948717948718,
|
| 602 |
-
"percentage_ge_sources_in_se_sources":
|
| 603 |
},
|
| 604 |
{
|
| 605 |
"model_name": "gpt-4o",
|
|
@@ -622,7 +622,7 @@
|
|
| 622 |
"normalized_reciprocal_se_rank": 0.08335991493886231,
|
| 623 |
"reciprocal_se_rank": 0.029739397036280018,
|
| 624 |
"percentage_ge_sources_not_in_se_sources": 89.47368421052632,
|
| 625 |
-
"percentage_ge_sources_in_se_sources":
|
| 626 |
},
|
| 627 |
{
|
| 628 |
"model_name": "gpt-4o",
|
|
@@ -645,7 +645,7 @@
|
|
| 645 |
"normalized_reciprocal_se_rank": 0.28054167213258124,
|
| 646 |
"reciprocal_se_rank": 0.07712045034253773,
|
| 647 |
"percentage_ge_sources_not_in_se_sources": 62.5,
|
| 648 |
-
"percentage_ge_sources_in_se_sources":
|
| 649 |
},
|
| 650 |
{
|
| 651 |
"model_name": "gpt-4o",
|
|
@@ -668,7 +668,7 @@
|
|
| 668 |
"normalized_reciprocal_se_rank": 0.0996490754594708,
|
| 669 |
"reciprocal_se_rank": 0.03365353997691167,
|
| 670 |
"percentage_ge_sources_not_in_se_sources": 71.05263157894738,
|
| 671 |
-
"percentage_ge_sources_in_se_sources":
|
| 672 |
},
|
| 673 |
{
|
| 674 |
"model_name": "gpt-4o",
|
|
@@ -691,7 +691,7 @@
|
|
| 691 |
"normalized_reciprocal_se_rank": 0.07521622430371759,
|
| 692 |
"reciprocal_se_rank": 0.027782539335116603,
|
| 693 |
"percentage_ge_sources_not_in_se_sources": 83.95061728395062,
|
| 694 |
-
"percentage_ge_sources_in_se_sources":
|
| 695 |
},
|
| 696 |
{
|
| 697 |
"model_name": "Grok-4.1-Fast",
|
|
@@ -829,7 +829,7 @@
|
|
| 829 |
"normalized_reciprocal_se_rank": 0.10523315112286376,
|
| 830 |
"reciprocal_se_rank": 0.03499534456593085,
|
| 831 |
"percentage_ge_sources_not_in_se_sources": 72.0,
|
| 832 |
-
"percentage_ge_sources_in_se_sources":
|
| 833 |
},
|
| 834 |
{
|
| 835 |
"model_name": "Gemini-3-Pro-Preview",
|
|
@@ -852,7 +852,7 @@
|
|
| 852 |
"normalized_reciprocal_se_rank": 0.056782624848369385,
|
| 853 |
"reciprocal_se_rank": 0.02335310645628293,
|
| 854 |
"percentage_ge_sources_not_in_se_sources": 83.13253012048195,
|
| 855 |
-
"percentage_ge_sources_in_se_sources":
|
| 856 |
},
|
| 857 |
{
|
| 858 |
"model_name": "Gemini-3-Pro-Preview",
|
|
@@ -875,7 +875,7 @@
|
|
| 875 |
"normalized_reciprocal_se_rank": 0.15368587669053982,
|
| 876 |
"reciprocal_se_rank": 0.04663811114651323,
|
| 877 |
"percentage_ge_sources_not_in_se_sources": 73.25581395348837,
|
| 878 |
-
"percentage_ge_sources_in_se_sources":
|
| 879 |
},
|
| 880 |
{
|
| 881 |
"model_name": "Gemini-3-Pro-Preview",
|
|
@@ -898,7 +898,7 @@
|
|
| 898 |
"normalized_reciprocal_se_rank": 0.11912889330801175,
|
| 899 |
"reciprocal_se_rank": 0.038334369993915436,
|
| 900 |
"percentage_ge_sources_not_in_se_sources": 71.60493827160494,
|
| 901 |
-
"percentage_ge_sources_in_se_sources":
|
| 902 |
},
|
| 903 |
{
|
| 904 |
"model_name": "Gemini-3-Pro-Preview",
|
|
@@ -921,7 +921,7 @@
|
|
| 921 |
"normalized_reciprocal_se_rank": 0.007184894289987778,
|
| 922 |
"reciprocal_se_rank": 0.01143520518133201,
|
| 923 |
"percentage_ge_sources_not_in_se_sources": 96.80851063829788,
|
| 924 |
-
"percentage_ge_sources_in_se_sources":
|
| 925 |
},
|
| 926 |
{
|
| 927 |
"model_name": "Gemini-3-Flash-Preview",
|
|
@@ -944,7 +944,7 @@
|
|
| 944 |
"normalized_reciprocal_se_rank": 0.12549266202008144,
|
| 945 |
"reciprocal_se_rank": 0.039863528009679766,
|
| 946 |
"percentage_ge_sources_not_in_se_sources": 71.0,
|
| 947 |
-
"percentage_ge_sources_in_se_sources":
|
| 948 |
},
|
| 949 |
{
|
| 950 |
"model_name": "Gemini-3-Flash-Preview",
|
|
@@ -967,7 +967,7 @@
|
|
| 967 |
"normalized_reciprocal_se_rank": 0.06427878374204764,
|
| 968 |
"reciprocal_se_rank": 0.0251543679380163,
|
| 969 |
"percentage_ge_sources_not_in_se_sources": 78.82352941176471,
|
| 970 |
-
"percentage_ge_sources_in_se_sources":
|
| 971 |
},
|
| 972 |
{
|
| 973 |
"model_name": "Gemini-3-Flash-Preview",
|
|
@@ -990,7 +990,7 @@
|
|
| 990 |
"normalized_reciprocal_se_rank": 0.14693960626866004,
|
| 991 |
"reciprocal_se_rank": 0.04501704131212949,
|
| 992 |
"percentage_ge_sources_not_in_se_sources": 73.03370786516854,
|
| 993 |
-
"percentage_ge_sources_in_se_sources":
|
| 994 |
},
|
| 995 |
{
|
| 996 |
"model_name": "Gemini-3-Flash-Preview",
|
|
@@ -1013,7 +1013,7 @@
|
|
| 1013 |
"normalized_reciprocal_se_rank": 0.14047144175948556,
|
| 1014 |
"reciprocal_se_rank": 0.04346279789851716,
|
| 1015 |
"percentage_ge_sources_not_in_se_sources": 72.22222222222223,
|
| 1016 |
-
"percentage_ge_sources_in_se_sources":
|
| 1017 |
},
|
| 1018 |
{
|
| 1019 |
"model_name": "Gemini-3-Flash-Preview",
|
|
@@ -1036,7 +1036,7 @@
|
|
| 1036 |
"normalized_reciprocal_se_rank": 0.032034724656595535,
|
| 1037 |
"reciprocal_se_rank": 0.017406402283987762,
|
| 1038 |
"percentage_ge_sources_not_in_se_sources": 89.1304347826087,
|
| 1039 |
-
"percentage_ge_sources_in_se_sources":
|
| 1040 |
},
|
| 1041 |
{
|
| 1042 |
"model_name": "Gemini-2.5-Flash-Preview",
|
|
@@ -1059,7 +1059,7 @@
|
|
| 1059 |
"normalized_reciprocal_se_rank": null,
|
| 1060 |
"reciprocal_se_rank": null,
|
| 1061 |
"percentage_ge_sources_not_in_se_sources": null,
|
| 1062 |
-
"percentage_ge_sources_in_se_sources":
|
| 1063 |
},
|
| 1064 |
{
|
| 1065 |
"model_name": "Gemini-2.5-Flash-Preview",
|
|
@@ -1082,7 +1082,7 @@
|
|
| 1082 |
"normalized_reciprocal_se_rank": null,
|
| 1083 |
"reciprocal_se_rank": null,
|
| 1084 |
"percentage_ge_sources_not_in_se_sources": null,
|
| 1085 |
-
"percentage_ge_sources_in_se_sources":
|
| 1086 |
},
|
| 1087 |
{
|
| 1088 |
"model_name": "Gemini-2.5-Flash-Preview",
|
|
@@ -1105,7 +1105,7 @@
|
|
| 1105 |
"normalized_reciprocal_se_rank": null,
|
| 1106 |
"reciprocal_se_rank": null,
|
| 1107 |
"percentage_ge_sources_not_in_se_sources": null,
|
| 1108 |
-
"percentage_ge_sources_in_se_sources":
|
| 1109 |
},
|
| 1110 |
{
|
| 1111 |
"model_name": "Gemini-2.5-Flash-Preview",
|
|
@@ -1128,7 +1128,7 @@
|
|
| 1128 |
"normalized_reciprocal_se_rank": null,
|
| 1129 |
"reciprocal_se_rank": null,
|
| 1130 |
"percentage_ge_sources_not_in_se_sources": null,
|
| 1131 |
-
"percentage_ge_sources_in_se_sources":
|
| 1132 |
},
|
| 1133 |
{
|
| 1134 |
"model_name": "Gemini-2.5-Flash-Preview",
|
|
@@ -1151,7 +1151,7 @@
|
|
| 1151 |
"normalized_reciprocal_se_rank": null,
|
| 1152 |
"reciprocal_se_rank": null,
|
| 1153 |
"percentage_ge_sources_not_in_se_sources": null,
|
| 1154 |
-
"percentage_ge_sources_in_se_sources":
|
| 1155 |
},
|
| 1156 |
{
|
| 1157 |
"model_name": "claude",
|
|
@@ -1174,7 +1174,7 @@
|
|
| 1174 |
"normalized_reciprocal_se_rank": 0.21041652104583275,
|
| 1175 |
"reciprocal_se_rank": 0.06026998928043071,
|
| 1176 |
"percentage_ge_sources_not_in_se_sources": 54.28571428571426,
|
| 1177 |
-
"percentage_ge_sources_in_se_sources":
|
| 1178 |
},
|
| 1179 |
{
|
| 1180 |
"model_name": "claude",
|
|
@@ -1197,7 +1197,7 @@
|
|
| 1197 |
"normalized_reciprocal_se_rank": 0.1327922077922078,
|
| 1198 |
"reciprocal_se_rank": 0.04161754507628294,
|
| 1199 |
"percentage_ge_sources_not_in_se_sources": 70.00000000000001,
|
| 1200 |
-
"percentage_ge_sources_in_se_sources":
|
| 1201 |
},
|
| 1202 |
{
|
| 1203 |
"model_name": "claude",
|
|
@@ -1220,7 +1220,7 @@
|
|
| 1220 |
"normalized_reciprocal_se_rank": 0.28256007847697834,
|
| 1221 |
"reciprocal_se_rank": 0.07760545575053605,
|
| 1222 |
"percentage_ge_sources_not_in_se_sources": 53.84615384615383,
|
| 1223 |
-
"percentage_ge_sources_in_se_sources":
|
| 1224 |
},
|
| 1225 |
{
|
| 1226 |
"model_name": "claude",
|
|
@@ -1243,7 +1243,7 @@
|
|
| 1243 |
"normalized_reciprocal_se_rank": 0.18350554762304847,
|
| 1244 |
"reciprocal_se_rank": 0.0538035175113636,
|
| 1245 |
"percentage_ge_sources_not_in_se_sources": 49.15254237288135,
|
| 1246 |
-
"percentage_ge_sources_in_se_sources":
|
| 1247 |
},
|
| 1248 |
{
|
| 1249 |
"model_name": "claude",
|
|
@@ -1266,7 +1266,7 @@
|
|
| 1266 |
"normalized_reciprocal_se_rank": 0.056933641949831956,
|
| 1267 |
"reciprocal_se_rank": 0.02338939454619748,
|
| 1268 |
"percentage_ge_sources_not_in_se_sources": 83.95061728395062,
|
| 1269 |
-
"percentage_ge_sources_in_se_sources":
|
| 1270 |
},
|
| 1271 |
{
|
| 1272 |
"model_name": "Perplexity-Sonar-Pro",
|
|
@@ -1289,7 +1289,7 @@
|
|
| 1289 |
"normalized_reciprocal_se_rank": 0.2274442114543135,
|
| 1290 |
"reciprocal_se_rank": 0.0643615944999443,
|
| 1291 |
"percentage_ge_sources_not_in_se_sources": 46.34146341463415,
|
| 1292 |
-
"percentage_ge_sources_in_se_sources":
|
| 1293 |
},
|
| 1294 |
{
|
| 1295 |
"model_name": "Perplexity-Sonar-Pro",
|
|
@@ -1312,7 +1312,7 @@
|
|
| 1312 |
"normalized_reciprocal_se_rank": 0.13063939371395492,
|
| 1313 |
"reciprocal_se_rank": 0.04110024266427558,
|
| 1314 |
"percentage_ge_sources_not_in_se_sources": 63.29113924050633,
|
| 1315 |
-
"percentage_ge_sources_in_se_sources":
|
| 1316 |
},
|
| 1317 |
{
|
| 1318 |
"model_name": "Perplexity-Sonar-Pro",
|
|
@@ -1335,7 +1335,7 @@
|
|
| 1335 |
"normalized_reciprocal_se_rank": 0.20414708640646176,
|
| 1336 |
"reciprocal_se_rank": 0.05876349891805757,
|
| 1337 |
"percentage_ge_sources_not_in_se_sources": 62.5,
|
| 1338 |
-
"percentage_ge_sources_in_se_sources":
|
| 1339 |
},
|
| 1340 |
{
|
| 1341 |
"model_name": "Perplexity-Sonar-Pro",
|
|
@@ -1358,7 +1358,7 @@
|
|
| 1358 |
"normalized_reciprocal_se_rank": 0.2024394067077523,
|
| 1359 |
"reciprocal_se_rank": 0.05835315840793079,
|
| 1360 |
"percentage_ge_sources_not_in_se_sources": 52.4390243902439,
|
| 1361 |
-
"percentage_ge_sources_in_se_sources":
|
| 1362 |
},
|
| 1363 |
{
|
| 1364 |
"model_name": "Perplexity-Sonar-Pro",
|
|
@@ -1381,7 +1381,7 @@
|
|
| 1381 |
"normalized_reciprocal_se_rank": 0.051796852838519515,
|
| 1382 |
"reciprocal_se_rank": 0.02215506900731415,
|
| 1383 |
"percentage_ge_sources_not_in_se_sources": 85.93750000000001,
|
| 1384 |
-
"percentage_ge_sources_in_se_sources":
|
| 1385 |
},
|
| 1386 |
{
|
| 1387 |
"model_name": "google-search",
|
|
@@ -1519,7 +1519,7 @@
|
|
| 1519 |
"normalized_reciprocal_se_rank": 0.28063309301928224,
|
| 1520 |
"reciprocal_se_rank": 0.07714241798278879,
|
| 1521 |
"percentage_ge_sources_not_in_se_sources": 38.20224719101124,
|
| 1522 |
-
"percentage_ge_sources_in_se_sources":
|
| 1523 |
},
|
| 1524 |
{
|
| 1525 |
"model_name": "exa",
|
|
@@ -1542,7 +1542,7 @@
|
|
| 1542 |
"normalized_reciprocal_se_rank": 0.22448376867351463,
|
| 1543 |
"reciprocal_se_rank": 0.06365022596766494,
|
| 1544 |
"percentage_ge_sources_not_in_se_sources": 54.21686746987952,
|
| 1545 |
-
"percentage_ge_sources_in_se_sources":
|
| 1546 |
},
|
| 1547 |
{
|
| 1548 |
"model_name": "exa",
|
|
@@ -1565,7 +1565,7 @@
|
|
| 1565 |
"normalized_reciprocal_se_rank": 0.2313966587355651,
|
| 1566 |
"reciprocal_se_rank": 0.06531133304568201,
|
| 1567 |
"percentage_ge_sources_not_in_se_sources": 62.06896551724138,
|
| 1568 |
-
"percentage_ge_sources_in_se_sources":
|
| 1569 |
},
|
| 1570 |
{
|
| 1571 |
"model_name": "exa",
|
|
@@ -1588,7 +1588,7 @@
|
|
| 1588 |
"normalized_reciprocal_se_rank": 0.1994493307755928,
|
| 1589 |
"reciprocal_se_rank": 0.05763466928830994,
|
| 1590 |
"percentage_ge_sources_not_in_se_sources": 50.588235294117645,
|
| 1591 |
-
"percentage_ge_sources_in_se_sources":
|
| 1592 |
},
|
| 1593 |
{
|
| 1594 |
"model_name": "exa",
|
|
@@ -1611,7 +1611,7 @@
|
|
| 1611 |
"normalized_reciprocal_se_rank": 0.07692643713869617,
|
| 1612 |
"reciprocal_se_rank": 0.028193488535754666,
|
| 1613 |
"percentage_ge_sources_not_in_se_sources": 82.71604938271606,
|
| 1614 |
-
"percentage_ge_sources_in_se_sources":
|
| 1615 |
},
|
| 1616 |
{
|
| 1617 |
"model_name": "tavily",
|
|
@@ -1634,7 +1634,7 @@
|
|
| 1634 |
"normalized_reciprocal_se_rank": 0.3434220529106368,
|
| 1635 |
"reciprocal_se_rank": 0.09223005640328419,
|
| 1636 |
"percentage_ge_sources_not_in_se_sources": 36.8421052631579,
|
| 1637 |
-
"percentage_ge_sources_in_se_sources":
|
| 1638 |
},
|
| 1639 |
{
|
| 1640 |
"model_name": "tavily",
|
|
@@ -1657,7 +1657,7 @@
|
|
| 1657 |
"normalized_reciprocal_se_rank": 0.27249775517340985,
|
| 1658 |
"reciprocal_se_rank": 0.07518756738390195,
|
| 1659 |
"percentage_ge_sources_not_in_se_sources": 41.55844155844156,
|
| 1660 |
-
"percentage_ge_sources_in_se_sources":
|
| 1661 |
},
|
| 1662 |
{
|
| 1663 |
"model_name": "tavily",
|
|
@@ -1680,7 +1680,7 @@
|
|
| 1680 |
"normalized_reciprocal_se_rank": 0.3128941728047197,
|
| 1681 |
"reciprocal_se_rank": 0.08489447356229925,
|
| 1682 |
"percentage_ge_sources_not_in_se_sources": 45.67901234567901,
|
| 1683 |
-
"percentage_ge_sources_in_se_sources":
|
| 1684 |
},
|
| 1685 |
{
|
| 1686 |
"model_name": "tavily",
|
|
@@ -1703,7 +1703,7 @@
|
|
| 1703 |
"normalized_reciprocal_se_rank": 0.31851954838074426,
|
| 1704 |
"reciprocal_se_rank": 0.08624620215945074,
|
| 1705 |
"percentage_ge_sources_not_in_se_sources": 32.05128205128205,
|
| 1706 |
-
"percentage_ge_sources_in_se_sources":
|
| 1707 |
},
|
| 1708 |
{
|
| 1709 |
"model_name": "tavily",
|
|
@@ -1726,7 +1726,7 @@
|
|
| 1726 |
"normalized_reciprocal_se_rank": 0.1335959024960005,
|
| 1727 |
"reciprocal_se_rank": 0.04181066589102925,
|
| 1728 |
"percentage_ge_sources_not_in_se_sources": 68.67469879518072,
|
| 1729 |
-
"percentage_ge_sources_in_se_sources":
|
| 1730 |
},
|
| 1731 |
{
|
| 1732 |
"model_name": "gensee",
|
|
@@ -1749,7 +1749,7 @@
|
|
| 1749 |
"normalized_reciprocal_se_rank": 0.17012042957025347,
|
| 1750 |
"reciprocal_se_rank": 0.05058719060061921,
|
| 1751 |
"percentage_ge_sources_not_in_se_sources": 58.42696629213483,
|
| 1752 |
-
"percentage_ge_sources_in_se_sources":
|
| 1753 |
},
|
| 1754 |
{
|
| 1755 |
"model_name": "gensee",
|
|
@@ -1772,7 +1772,7 @@
|
|
| 1772 |
"normalized_reciprocal_se_rank": 0.06456158601930041,
|
| 1773 |
"reciprocal_se_rank": 0.025222322854152286,
|
| 1774 |
"percentage_ge_sources_not_in_se_sources": 84.44444444444443,
|
| 1775 |
-
"percentage_ge_sources_in_se_sources":
|
| 1776 |
},
|
| 1777 |
{
|
| 1778 |
"model_name": "gensee",
|
|
@@ -1795,7 +1795,7 @@
|
|
| 1795 |
"normalized_reciprocal_se_rank": 0.19747215967140244,
|
| 1796 |
"reciprocal_se_rank": 0.05715957234822537,
|
| 1797 |
"percentage_ge_sources_not_in_se_sources": 70.12987012987011,
|
| 1798 |
-
"percentage_ge_sources_in_se_sources":
|
| 1799 |
},
|
| 1800 |
{
|
| 1801 |
"model_name": "gensee",
|
|
@@ -1818,7 +1818,7 @@
|
|
| 1818 |
"normalized_reciprocal_se_rank": 0.12268532386073742,
|
| 1819 |
"reciprocal_se_rank": 0.03918894918012865,
|
| 1820 |
"percentage_ge_sources_not_in_se_sources": 62.650602409638545,
|
| 1821 |
-
"percentage_ge_sources_in_se_sources":
|
| 1822 |
},
|
| 1823 |
{
|
| 1824 |
"model_name": "gensee",
|
|
@@ -1841,7 +1841,7 @@
|
|
| 1841 |
"normalized_reciprocal_se_rank": 0.07056832757590334,
|
| 1842 |
"reciprocal_se_rank": 0.02666569036411269,
|
| 1843 |
"percentage_ge_sources_not_in_se_sources": 87.5,
|
| 1844 |
-
"percentage_ge_sources_in_se_sources":
|
| 1845 |
}
|
| 1846 |
],
|
| 1847 |
"queries": [
|
|
|
|
| 129 |
"normalized_reciprocal_se_rank": 0.07444993783835872,
|
| 130 |
"reciprocal_se_rank": 0.027598407393197893,
|
| 131 |
"percentage_ge_sources_not_in_se_sources": 87.34177215189875,
|
| 132 |
+
"percentage_ge_sources_in_se_sources": 15.99
|
| 133 |
},
|
| 134 |
{
|
| 135 |
"model_name": "gpt-4o",
|
|
|
|
| 151 |
"normalized_reciprocal_se_rank": 0.12262328761538778,
|
| 152 |
"reciprocal_se_rank": 0.03917404241243542,
|
| 153 |
"percentage_ge_sources_not_in_se_sources": 74.82993197278913,
|
| 154 |
+
"percentage_ge_sources_in_se_sources": 27.53
|
| 155 |
},
|
| 156 |
{
|
| 157 |
"model_name": "grok-4.1-fast-non-reasoning",
|
|
|
|
| 173 |
"normalized_reciprocal_se_rank": 0.14088939196167136,
|
| 174 |
"reciprocal_se_rank": 0.043563227680110374,
|
| 175 |
"percentage_ge_sources_not_in_se_sources": 69.15584415584416,
|
| 176 |
+
"percentage_ge_sources_in_se_sources": 29.67
|
| 177 |
},
|
| 178 |
{
|
| 179 |
"model_name": "claude",
|
|
|
|
| 195 |
"normalized_reciprocal_se_rank": 0.1641520228549373,
|
| 196 |
"reciprocal_se_rank": 0.04915303461805529,
|
| 197 |
"percentage_ge_sources_not_in_se_sources": 62.934362934362916,
|
| 198 |
+
"percentage_ge_sources_in_se_sources": 37.1
|
| 199 |
},
|
| 200 |
{
|
| 201 |
"model_name": "Gemini-3-Pro-Preview",
|
|
|
|
| 217 |
"normalized_reciprocal_se_rank": 0.08733801529571,
|
| 218 |
"reciprocal_se_rank": 0.03069529979193034,
|
| 219 |
"percentage_ge_sources_not_in_se_sources": 79.50450450450452,
|
| 220 |
+
"percentage_ge_sources_in_se_sources": 20.95
|
| 221 |
},
|
| 222 |
{
|
| 223 |
"model_name": "Gemini-3-Flash-Preview",
|
|
|
|
| 239 |
"normalized_reciprocal_se_rank": 0.10236888641393292,
|
| 240 |
"reciprocal_se_rank": 0.03430708678393044,
|
| 241 |
"percentage_ge_sources_not_in_se_sources": 76.7543859649123,
|
| 242 |
+
"percentage_ge_sources_in_se_sources": 23.5
|
| 243 |
},
|
| 244 |
{
|
| 245 |
"model_name": "Gemini-2.5-Flash-Preview",
|
|
|
|
| 261 |
"normalized_reciprocal_se_rank": null,
|
| 262 |
"reciprocal_se_rank": null,
|
| 263 |
"percentage_ge_sources_not_in_se_sources": null,
|
| 264 |
+
"percentage_ge_sources_in_se_sources": 31.96
|
| 265 |
},
|
| 266 |
{
|
| 267 |
"model_name": "Perplexity-Sonar-Pro",
|
|
|
|
| 283 |
"normalized_reciprocal_se_rank": 0.16776928125494017,
|
| 284 |
"reciprocal_se_rank": 0.050022230204463676,
|
| 285 |
"percentage_ge_sources_not_in_se_sources": 60.949868073878655,
|
| 286 |
+
"percentage_ge_sources_in_se_sources": 40.0
|
| 287 |
},
|
| 288 |
{
|
| 289 |
"model_name": "google-search",
|
|
|
|
| 327 |
"normalized_reciprocal_se_rank": 0.20452762803005117,
|
| 328 |
"reciprocal_se_rank": 0.058854939745084926,
|
| 329 |
"percentage_ge_sources_not_in_se_sources": 57.176470588235304,
|
| 330 |
+
"percentage_ge_sources_in_se_sources": 44.66
|
| 331 |
},
|
| 332 |
{
|
| 333 |
"model_name": "tavily",
|
|
|
|
| 349 |
"normalized_reciprocal_se_rank": 0.2743286477154668,
|
| 350 |
"reciprocal_se_rank": 0.07562751486366782,
|
| 351 |
"percentage_ge_sources_not_in_se_sources": 45.316455696202524,
|
| 352 |
+
"percentage_ge_sources_in_se_sources": 55.45
|
| 353 |
},
|
| 354 |
{
|
| 355 |
"model_name": "gensee",
|
|
|
|
| 371 |
"normalized_reciprocal_se_rank": 0.1299587450378004,
|
| 372 |
"reciprocal_se_rank": 0.040936688734811204,
|
| 373 |
"percentage_ge_sources_not_in_se_sources": 71.46596858638743,
|
| 374 |
+
"percentage_ge_sources_in_se_sources": 28.4
|
| 375 |
},
|
| 376 |
{
|
| 377 |
"model_name": "deepseek-chat-gensee",
|
|
|
|
| 484 |
"normalized_reciprocal_se_rank": 0.024647681783897292,
|
| 485 |
"reciprocal_se_rank": 0.015631360428654928,
|
| 486 |
"percentage_ge_sources_not_in_se_sources": 89.41176470588235,
|
| 487 |
+
"percentage_ge_sources_in_se_sources": 15.99
|
| 488 |
},
|
| 489 |
{
|
| 490 |
"model_name": "gpt-5",
|
|
|
|
| 507 |
"normalized_reciprocal_se_rank": 0.24588844317711944,
|
| 508 |
"reciprocal_se_rank": 0.06879358221974471,
|
| 509 |
"percentage_ge_sources_not_in_se_sources": 71.05263157894737,
|
| 510 |
+
"percentage_ge_sources_in_se_sources": 15.99
|
| 511 |
},
|
| 512 |
{
|
| 513 |
"model_name": "gpt-5",
|
|
|
|
| 530 |
"normalized_reciprocal_se_rank": 0.16050373868555687,
|
| 531 |
"reciprocal_se_rank": 0.048276383810364386,
|
| 532 |
"percentage_ge_sources_not_in_se_sources": 74.54545454545455,
|
| 533 |
+
"percentage_ge_sources_in_se_sources": 15.99
|
| 534 |
},
|
| 535 |
{
|
| 536 |
"model_name": "gpt-5",
|
|
|
|
| 553 |
"normalized_reciprocal_se_rank": 0.04254727146439185,
|
| 554 |
"reciprocal_se_rank": 0.019932475424696096,
|
| 555 |
"percentage_ge_sources_not_in_se_sources": 91.37931034482759,
|
| 556 |
+
"percentage_ge_sources_in_se_sources": 15.99
|
| 557 |
},
|
| 558 |
{
|
| 559 |
"model_name": "gpt-5",
|
|
|
|
| 576 |
"normalized_reciprocal_se_rank": 0.0098989898989899,
|
| 577 |
"reciprocal_se_rank": 0.012087378640776695,
|
| 578 |
"percentage_ge_sources_not_in_se_sources": 98.75,
|
| 579 |
+
"percentage_ge_sources_in_se_sources": 15.99
|
| 580 |
},
|
| 581 |
{
|
| 582 |
"model_name": "gpt-4o",
|
|
|
|
| 599 |
"normalized_reciprocal_se_rank": 0.12281919725976252,
|
| 600 |
"reciprocal_se_rank": 0.03922111778814682,
|
| 601 |
"percentage_ge_sources_not_in_se_sources": 71.7948717948718,
|
| 602 |
+
"percentage_ge_sources_in_se_sources": 27.53
|
| 603 |
},
|
| 604 |
{
|
| 605 |
"model_name": "gpt-4o",
|
|
|
|
| 622 |
"normalized_reciprocal_se_rank": 0.08335991493886231,
|
| 623 |
"reciprocal_se_rank": 0.029739397036280018,
|
| 624 |
"percentage_ge_sources_not_in_se_sources": 89.47368421052632,
|
| 625 |
+
"percentage_ge_sources_in_se_sources": 27.53
|
| 626 |
},
|
| 627 |
{
|
| 628 |
"model_name": "gpt-4o",
|
|
|
|
| 645 |
"normalized_reciprocal_se_rank": 0.28054167213258124,
|
| 646 |
"reciprocal_se_rank": 0.07712045034253773,
|
| 647 |
"percentage_ge_sources_not_in_se_sources": 62.5,
|
| 648 |
+
"percentage_ge_sources_in_se_sources": 27.53
|
| 649 |
},
|
| 650 |
{
|
| 651 |
"model_name": "gpt-4o",
|
|
|
|
| 668 |
"normalized_reciprocal_se_rank": 0.0996490754594708,
|
| 669 |
"reciprocal_se_rank": 0.03365353997691167,
|
| 670 |
"percentage_ge_sources_not_in_se_sources": 71.05263157894738,
|
| 671 |
+
"percentage_ge_sources_in_se_sources": 27.53
|
| 672 |
},
|
| 673 |
{
|
| 674 |
"model_name": "gpt-4o",
|
|
|
|
| 691 |
"normalized_reciprocal_se_rank": 0.07521622430371759,
|
| 692 |
"reciprocal_se_rank": 0.027782539335116603,
|
| 693 |
"percentage_ge_sources_not_in_se_sources": 83.95061728395062,
|
| 694 |
+
"percentage_ge_sources_in_se_sources": 27.53
|
| 695 |
},
|
| 696 |
{
|
| 697 |
"model_name": "Grok-4.1-Fast",
|
|
|
|
| 829 |
"normalized_reciprocal_se_rank": 0.10523315112286376,
|
| 830 |
"reciprocal_se_rank": 0.03499534456593085,
|
| 831 |
"percentage_ge_sources_not_in_se_sources": 72.0,
|
| 832 |
+
"percentage_ge_sources_in_se_sources": 20.95
|
| 833 |
},
|
| 834 |
{
|
| 835 |
"model_name": "Gemini-3-Pro-Preview",
|
|
|
|
| 852 |
"normalized_reciprocal_se_rank": 0.056782624848369385,
|
| 853 |
"reciprocal_se_rank": 0.02335310645628293,
|
| 854 |
"percentage_ge_sources_not_in_se_sources": 83.13253012048195,
|
| 855 |
+
"percentage_ge_sources_in_se_sources": 20.95
|
| 856 |
},
|
| 857 |
{
|
| 858 |
"model_name": "Gemini-3-Pro-Preview",
|
|
|
|
| 875 |
"normalized_reciprocal_se_rank": 0.15368587669053982,
|
| 876 |
"reciprocal_se_rank": 0.04663811114651323,
|
| 877 |
"percentage_ge_sources_not_in_se_sources": 73.25581395348837,
|
| 878 |
+
"percentage_ge_sources_in_se_sources": 20.95
|
| 879 |
},
|
| 880 |
{
|
| 881 |
"model_name": "Gemini-3-Pro-Preview",
|
|
|
|
| 898 |
"normalized_reciprocal_se_rank": 0.11912889330801175,
|
| 899 |
"reciprocal_se_rank": 0.038334369993915436,
|
| 900 |
"percentage_ge_sources_not_in_se_sources": 71.60493827160494,
|
| 901 |
+
"percentage_ge_sources_in_se_sources": 20.95
|
| 902 |
},
|
| 903 |
{
|
| 904 |
"model_name": "Gemini-3-Pro-Preview",
|
|
|
|
| 921 |
"normalized_reciprocal_se_rank": 0.007184894289987778,
|
| 922 |
"reciprocal_se_rank": 0.01143520518133201,
|
| 923 |
"percentage_ge_sources_not_in_se_sources": 96.80851063829788,
|
| 924 |
+
"percentage_ge_sources_in_se_sources": 20.95
|
| 925 |
},
|
| 926 |
{
|
| 927 |
"model_name": "Gemini-3-Flash-Preview",
|
|
|
|
| 944 |
"normalized_reciprocal_se_rank": 0.12549266202008144,
|
| 945 |
"reciprocal_se_rank": 0.039863528009679766,
|
| 946 |
"percentage_ge_sources_not_in_se_sources": 71.0,
|
| 947 |
+
"percentage_ge_sources_in_se_sources": 23.5
|
| 948 |
},
|
| 949 |
{
|
| 950 |
"model_name": "Gemini-3-Flash-Preview",
|
|
|
|
| 967 |
"normalized_reciprocal_se_rank": 0.06427878374204764,
|
| 968 |
"reciprocal_se_rank": 0.0251543679380163,
|
| 969 |
"percentage_ge_sources_not_in_se_sources": 78.82352941176471,
|
| 970 |
+
"percentage_ge_sources_in_se_sources": 23.5
|
| 971 |
},
|
| 972 |
{
|
| 973 |
"model_name": "Gemini-3-Flash-Preview",
|
|
|
|
| 990 |
"normalized_reciprocal_se_rank": 0.14693960626866004,
|
| 991 |
"reciprocal_se_rank": 0.04501704131212949,
|
| 992 |
"percentage_ge_sources_not_in_se_sources": 73.03370786516854,
|
| 993 |
+
"percentage_ge_sources_in_se_sources": 23.5
|
| 994 |
},
|
| 995 |
{
|
| 996 |
"model_name": "Gemini-3-Flash-Preview",
|
|
|
|
| 1013 |
"normalized_reciprocal_se_rank": 0.14047144175948556,
|
| 1014 |
"reciprocal_se_rank": 0.04346279789851716,
|
| 1015 |
"percentage_ge_sources_not_in_se_sources": 72.22222222222223,
|
| 1016 |
+
"percentage_ge_sources_in_se_sources": 23.5
|
| 1017 |
},
|
| 1018 |
{
|
| 1019 |
"model_name": "Gemini-3-Flash-Preview",
|
|
|
|
| 1036 |
"normalized_reciprocal_se_rank": 0.032034724656595535,
|
| 1037 |
"reciprocal_se_rank": 0.017406402283987762,
|
| 1038 |
"percentage_ge_sources_not_in_se_sources": 89.1304347826087,
|
| 1039 |
+
"percentage_ge_sources_in_se_sources": 23.5
|
| 1040 |
},
|
| 1041 |
{
|
| 1042 |
"model_name": "Gemini-2.5-Flash-Preview",
|
|
|
|
| 1059 |
"normalized_reciprocal_se_rank": null,
|
| 1060 |
"reciprocal_se_rank": null,
|
| 1061 |
"percentage_ge_sources_not_in_se_sources": null,
|
| 1062 |
+
"percentage_ge_sources_in_se_sources": 31.96
|
| 1063 |
},
|
| 1064 |
{
|
| 1065 |
"model_name": "Gemini-2.5-Flash-Preview",
|
|
|
|
| 1082 |
"normalized_reciprocal_se_rank": null,
|
| 1083 |
"reciprocal_se_rank": null,
|
| 1084 |
"percentage_ge_sources_not_in_se_sources": null,
|
| 1085 |
+
"percentage_ge_sources_in_se_sources": 31.96
|
| 1086 |
},
|
| 1087 |
{
|
| 1088 |
"model_name": "Gemini-2.5-Flash-Preview",
|
|
|
|
| 1105 |
"normalized_reciprocal_se_rank": null,
|
| 1106 |
"reciprocal_se_rank": null,
|
| 1107 |
"percentage_ge_sources_not_in_se_sources": null,
|
| 1108 |
+
"percentage_ge_sources_in_se_sources": 31.96
|
| 1109 |
},
|
| 1110 |
{
|
| 1111 |
"model_name": "Gemini-2.5-Flash-Preview",
|
|
|
|
| 1128 |
"normalized_reciprocal_se_rank": null,
|
| 1129 |
"reciprocal_se_rank": null,
|
| 1130 |
"percentage_ge_sources_not_in_se_sources": null,
|
| 1131 |
+
"percentage_ge_sources_in_se_sources": 31.96
|
| 1132 |
},
|
| 1133 |
{
|
| 1134 |
"model_name": "Gemini-2.5-Flash-Preview",
|
|
|
|
| 1151 |
"normalized_reciprocal_se_rank": null,
|
| 1152 |
"reciprocal_se_rank": null,
|
| 1153 |
"percentage_ge_sources_not_in_se_sources": null,
|
| 1154 |
+
"percentage_ge_sources_in_se_sources": 31.96
|
| 1155 |
},
|
| 1156 |
{
|
| 1157 |
"model_name": "claude",
|
|
|
|
| 1174 |
"normalized_reciprocal_se_rank": 0.21041652104583275,
|
| 1175 |
"reciprocal_se_rank": 0.06026998928043071,
|
| 1176 |
"percentage_ge_sources_not_in_se_sources": 54.28571428571426,
|
| 1177 |
+
"percentage_ge_sources_in_se_sources": 37.1
|
| 1178 |
},
|
| 1179 |
{
|
| 1180 |
"model_name": "claude",
|
|
|
|
| 1197 |
"normalized_reciprocal_se_rank": 0.1327922077922078,
|
| 1198 |
"reciprocal_se_rank": 0.04161754507628294,
|
| 1199 |
"percentage_ge_sources_not_in_se_sources": 70.00000000000001,
|
| 1200 |
+
"percentage_ge_sources_in_se_sources": 37.1
|
| 1201 |
},
|
| 1202 |
{
|
| 1203 |
"model_name": "claude",
|
|
|
|
| 1220 |
"normalized_reciprocal_se_rank": 0.28256007847697834,
|
| 1221 |
"reciprocal_se_rank": 0.07760545575053605,
|
| 1222 |
"percentage_ge_sources_not_in_se_sources": 53.84615384615383,
|
| 1223 |
+
"percentage_ge_sources_in_se_sources": 37.1
|
| 1224 |
},
|
| 1225 |
{
|
| 1226 |
"model_name": "claude",
|
|
|
|
| 1243 |
"normalized_reciprocal_se_rank": 0.18350554762304847,
|
| 1244 |
"reciprocal_se_rank": 0.0538035175113636,
|
| 1245 |
"percentage_ge_sources_not_in_se_sources": 49.15254237288135,
|
| 1246 |
+
"percentage_ge_sources_in_se_sources": 37.1
|
| 1247 |
},
|
| 1248 |
{
|
| 1249 |
"model_name": "claude",
|
|
|
|
| 1266 |
"normalized_reciprocal_se_rank": 0.056933641949831956,
|
| 1267 |
"reciprocal_se_rank": 0.02338939454619748,
|
| 1268 |
"percentage_ge_sources_not_in_se_sources": 83.95061728395062,
|
| 1269 |
+
"percentage_ge_sources_in_se_sources": 37.1
|
| 1270 |
},
|
| 1271 |
{
|
| 1272 |
"model_name": "Perplexity-Sonar-Pro",
|
|
|
|
| 1289 |
"normalized_reciprocal_se_rank": 0.2274442114543135,
|
| 1290 |
"reciprocal_se_rank": 0.0643615944999443,
|
| 1291 |
"percentage_ge_sources_not_in_se_sources": 46.34146341463415,
|
| 1292 |
+
"percentage_ge_sources_in_se_sources": 40.0
|
| 1293 |
},
|
| 1294 |
{
|
| 1295 |
"model_name": "Perplexity-Sonar-Pro",
|
|
|
|
| 1312 |
"normalized_reciprocal_se_rank": 0.13063939371395492,
|
| 1313 |
"reciprocal_se_rank": 0.04110024266427558,
|
| 1314 |
"percentage_ge_sources_not_in_se_sources": 63.29113924050633,
|
| 1315 |
+
"percentage_ge_sources_in_se_sources": 40.0
|
| 1316 |
},
|
| 1317 |
{
|
| 1318 |
"model_name": "Perplexity-Sonar-Pro",
|
|
|
|
| 1335 |
"normalized_reciprocal_se_rank": 0.20414708640646176,
|
| 1336 |
"reciprocal_se_rank": 0.05876349891805757,
|
| 1337 |
"percentage_ge_sources_not_in_se_sources": 62.5,
|
| 1338 |
+
"percentage_ge_sources_in_se_sources": 40.0
|
| 1339 |
},
|
| 1340 |
{
|
| 1341 |
"model_name": "Perplexity-Sonar-Pro",
|
|
|
|
| 1358 |
"normalized_reciprocal_se_rank": 0.2024394067077523,
|
| 1359 |
"reciprocal_se_rank": 0.05835315840793079,
|
| 1360 |
"percentage_ge_sources_not_in_se_sources": 52.4390243902439,
|
| 1361 |
+
"percentage_ge_sources_in_se_sources": 40.0
|
| 1362 |
},
|
| 1363 |
{
|
| 1364 |
"model_name": "Perplexity-Sonar-Pro",
|
|
|
|
| 1381 |
"normalized_reciprocal_se_rank": 0.051796852838519515,
|
| 1382 |
"reciprocal_se_rank": 0.02215506900731415,
|
| 1383 |
"percentage_ge_sources_not_in_se_sources": 85.93750000000001,
|
| 1384 |
+
"percentage_ge_sources_in_se_sources": 40.0
|
| 1385 |
},
|
| 1386 |
{
|
| 1387 |
"model_name": "google-search",
|
|
|
|
| 1519 |
"normalized_reciprocal_se_rank": 0.28063309301928224,
|
| 1520 |
"reciprocal_se_rank": 0.07714241798278879,
|
| 1521 |
"percentage_ge_sources_not_in_se_sources": 38.20224719101124,
|
| 1522 |
+
"percentage_ge_sources_in_se_sources": 44.66
|
| 1523 |
},
|
| 1524 |
{
|
| 1525 |
"model_name": "exa",
|
|
|
|
| 1542 |
"normalized_reciprocal_se_rank": 0.22448376867351463,
|
| 1543 |
"reciprocal_se_rank": 0.06365022596766494,
|
| 1544 |
"percentage_ge_sources_not_in_se_sources": 54.21686746987952,
|
| 1545 |
+
"percentage_ge_sources_in_se_sources": 44.66
|
| 1546 |
},
|
| 1547 |
{
|
| 1548 |
"model_name": "exa",
|
|
|
|
| 1565 |
"normalized_reciprocal_se_rank": 0.2313966587355651,
|
| 1566 |
"reciprocal_se_rank": 0.06531133304568201,
|
| 1567 |
"percentage_ge_sources_not_in_se_sources": 62.06896551724138,
|
| 1568 |
+
"percentage_ge_sources_in_se_sources": 44.66
|
| 1569 |
},
|
| 1570 |
{
|
| 1571 |
"model_name": "exa",
|
|
|
|
| 1588 |
"normalized_reciprocal_se_rank": 0.1994493307755928,
|
| 1589 |
"reciprocal_se_rank": 0.05763466928830994,
|
| 1590 |
"percentage_ge_sources_not_in_se_sources": 50.588235294117645,
|
| 1591 |
+
"percentage_ge_sources_in_se_sources": 44.66
|
| 1592 |
},
|
| 1593 |
{
|
| 1594 |
"model_name": "exa",
|
|
|
|
| 1611 |
"normalized_reciprocal_se_rank": 0.07692643713869617,
|
| 1612 |
"reciprocal_se_rank": 0.028193488535754666,
|
| 1613 |
"percentage_ge_sources_not_in_se_sources": 82.71604938271606,
|
| 1614 |
+
"percentage_ge_sources_in_se_sources": 44.66
|
| 1615 |
},
|
| 1616 |
{
|
| 1617 |
"model_name": "tavily",
|
|
|
|
| 1634 |
"normalized_reciprocal_se_rank": 0.3434220529106368,
|
| 1635 |
"reciprocal_se_rank": 0.09223005640328419,
|
| 1636 |
"percentage_ge_sources_not_in_se_sources": 36.8421052631579,
|
| 1637 |
+
"percentage_ge_sources_in_se_sources": 55.45
|
| 1638 |
},
|
| 1639 |
{
|
| 1640 |
"model_name": "tavily",
|
|
|
|
| 1657 |
"normalized_reciprocal_se_rank": 0.27249775517340985,
|
| 1658 |
"reciprocal_se_rank": 0.07518756738390195,
|
| 1659 |
"percentage_ge_sources_not_in_se_sources": 41.55844155844156,
|
| 1660 |
+
"percentage_ge_sources_in_se_sources": 55.45
|
| 1661 |
},
|
| 1662 |
{
|
| 1663 |
"model_name": "tavily",
|
|
|
|
| 1680 |
"normalized_reciprocal_se_rank": 0.3128941728047197,
|
| 1681 |
"reciprocal_se_rank": 0.08489447356229925,
|
| 1682 |
"percentage_ge_sources_not_in_se_sources": 45.67901234567901,
|
| 1683 |
+
"percentage_ge_sources_in_se_sources": 55.45
|
| 1684 |
},
|
| 1685 |
{
|
| 1686 |
"model_name": "tavily",
|
|
|
|
| 1703 |
"normalized_reciprocal_se_rank": 0.31851954838074426,
|
| 1704 |
"reciprocal_se_rank": 0.08624620215945074,
|
| 1705 |
"percentage_ge_sources_not_in_se_sources": 32.05128205128205,
|
| 1706 |
+
"percentage_ge_sources_in_se_sources": 55.45
|
| 1707 |
},
|
| 1708 |
{
|
| 1709 |
"model_name": "tavily",
|
|
|
|
| 1726 |
"normalized_reciprocal_se_rank": 0.1335959024960005,
|
| 1727 |
"reciprocal_se_rank": 0.04181066589102925,
|
| 1728 |
"percentage_ge_sources_not_in_se_sources": 68.67469879518072,
|
| 1729 |
+
"percentage_ge_sources_in_se_sources": 55.45
|
| 1730 |
},
|
| 1731 |
{
|
| 1732 |
"model_name": "gensee",
|
|
|
|
| 1749 |
"normalized_reciprocal_se_rank": 0.17012042957025347,
|
| 1750 |
"reciprocal_se_rank": 0.05058719060061921,
|
| 1751 |
"percentage_ge_sources_not_in_se_sources": 58.42696629213483,
|
| 1752 |
+
"percentage_ge_sources_in_se_sources": 28.4
|
| 1753 |
},
|
| 1754 |
{
|
| 1755 |
"model_name": "gensee",
|
|
|
|
| 1772 |
"normalized_reciprocal_se_rank": 0.06456158601930041,
|
| 1773 |
"reciprocal_se_rank": 0.025222322854152286,
|
| 1774 |
"percentage_ge_sources_not_in_se_sources": 84.44444444444443,
|
| 1775 |
+
"percentage_ge_sources_in_se_sources": 28.4
|
| 1776 |
},
|
| 1777 |
{
|
| 1778 |
"model_name": "gensee",
|
|
|
|
| 1795 |
"normalized_reciprocal_se_rank": 0.19747215967140244,
|
| 1796 |
"reciprocal_se_rank": 0.05715957234822537,
|
| 1797 |
"percentage_ge_sources_not_in_se_sources": 70.12987012987011,
|
| 1798 |
+
"percentage_ge_sources_in_se_sources": 28.4
|
| 1799 |
},
|
| 1800 |
{
|
| 1801 |
"model_name": "gensee",
|
|
|
|
| 1818 |
"normalized_reciprocal_se_rank": 0.12268532386073742,
|
| 1819 |
"reciprocal_se_rank": 0.03918894918012865,
|
| 1820 |
"percentage_ge_sources_not_in_se_sources": 62.650602409638545,
|
| 1821 |
+
"percentage_ge_sources_in_se_sources": 28.4
|
| 1822 |
},
|
| 1823 |
{
|
| 1824 |
"model_name": "gensee",
|
|
|
|
| 1841 |
"normalized_reciprocal_se_rank": 0.07056832757590334,
|
| 1842 |
"reciprocal_se_rank": 0.02666569036411269,
|
| 1843 |
"percentage_ge_sources_not_in_se_sources": 87.5,
|
| 1844 |
+
"percentage_ge_sources_in_se_sources": 28.4
|
| 1845 |
}
|
| 1846 |
],
|
| 1847 |
"queries": [
|