Kristinx0351 commited on
Commit
ff74e24
·
1 Parent(s): 9cbc730

Update percent-in-SE metric and keep google-search on main board

Browse files
Files changed (3) hide show
  1. index.html +2 -2
  2. leaderboard_data.js +0 -0
  3. leaderboard_data.json +61 -61
index.html CHANGED
@@ -887,7 +887,7 @@
887
  <ul class="metric-core">
888
  <li><strong>Weighted Score.</strong> The main leaderboard score, combining the judged dimensions into one overall source-quality metric.</li>
889
  <li><strong>Unweighted Mean.</strong> The simple average across the judged dimension scores, without weighting.</li>
890
- <li><strong>% In SE.</strong> The percentage of cited GE sources that also appear in the matched search-engine source set.</li>
891
  </ul>
892
  <ul class="metric-defs">
893
  <li><strong>Semantic Relevance.</strong> Whether the cited source is directly relevant to the query and answer.</li>
@@ -1111,7 +1111,7 @@
1111
 
1112
  function isMainBoardModel(name) {
1113
  const modelName = String(name || "");
1114
- return !isDeepSeekStudyModel(modelName) && modelName !== "google-search";
1115
  }
1116
 
1117
  function updateTopStats(payload) {
 
887
  <ul class="metric-core">
888
  <li><strong>Weighted Score.</strong> The main leaderboard score, combining the judged dimensions into one overall source-quality metric.</li>
889
  <li><strong>Unweighted Mean.</strong> The simple average across the judged dimension scores, without weighting.</li>
890
+ <li><strong>% In SE.</strong> Percentage of model cited sources appearing in the first five pages of Google Search.</li>
891
  </ul>
892
  <ul class="metric-defs">
893
  <li><strong>Semantic Relevance.</strong> Whether the cited source is directly relevant to the query and answer.</li>
 
1111
 
1112
  function isMainBoardModel(name) {
1113
  const modelName = String(name || "");
1114
+ return !isDeepSeekStudyModel(modelName);
1115
  }
1116
 
1117
  function updateTopStats(payload) {
leaderboard_data.js CHANGED
The diff for this file is too large to render. See raw diff
 
leaderboard_data.json CHANGED
@@ -129,7 +129,7 @@
129
  "normalized_reciprocal_se_rank": 0.07444993783835872,
130
  "reciprocal_se_rank": 0.027598407393197893,
131
  "percentage_ge_sources_not_in_se_sources": 87.34177215189875,
132
- "percentage_ge_sources_in_se_sources": 12.658227848101266
133
  },
134
  {
135
  "model_name": "gpt-4o",
@@ -151,7 +151,7 @@
151
  "normalized_reciprocal_se_rank": 0.12262328761538778,
152
  "reciprocal_se_rank": 0.03917404241243542,
153
  "percentage_ge_sources_not_in_se_sources": 74.82993197278913,
154
- "percentage_ge_sources_in_se_sources": 25.170068027210874
155
  },
156
  {
157
  "model_name": "grok-4.1-fast-non-reasoning",
@@ -173,7 +173,7 @@
173
  "normalized_reciprocal_se_rank": 0.14088939196167136,
174
  "reciprocal_se_rank": 0.043563227680110374,
175
  "percentage_ge_sources_not_in_se_sources": 69.15584415584416,
176
- "percentage_ge_sources_in_se_sources": 30.844155844155843
177
  },
178
  {
179
  "model_name": "claude",
@@ -195,7 +195,7 @@
195
  "normalized_reciprocal_se_rank": 0.1641520228549373,
196
  "reciprocal_se_rank": 0.04915303461805529,
197
  "percentage_ge_sources_not_in_se_sources": 62.934362934362916,
198
- "percentage_ge_sources_in_se_sources": 37.06563706563707
199
  },
200
  {
201
  "model_name": "Gemini-3-Pro-Preview",
@@ -217,7 +217,7 @@
217
  "normalized_reciprocal_se_rank": 0.08733801529571,
218
  "reciprocal_se_rank": 0.03069529979193034,
219
  "percentage_ge_sources_not_in_se_sources": 79.50450450450452,
220
- "percentage_ge_sources_in_se_sources": 20.495495495495497
221
  },
222
  {
223
  "model_name": "Gemini-3-Flash-Preview",
@@ -239,7 +239,7 @@
239
  "normalized_reciprocal_se_rank": 0.10236888641393292,
240
  "reciprocal_se_rank": 0.03430708678393044,
241
  "percentage_ge_sources_not_in_se_sources": 76.7543859649123,
242
- "percentage_ge_sources_in_se_sources": 23.245614035087723
243
  },
244
  {
245
  "model_name": "Gemini-2.5-Flash-Preview",
@@ -261,7 +261,7 @@
261
  "normalized_reciprocal_se_rank": null,
262
  "reciprocal_se_rank": null,
263
  "percentage_ge_sources_not_in_se_sources": null,
264
- "percentage_ge_sources_in_se_sources": null
265
  },
266
  {
267
  "model_name": "Perplexity-Sonar-Pro",
@@ -283,7 +283,7 @@
283
  "normalized_reciprocal_se_rank": 0.16776928125494017,
284
  "reciprocal_se_rank": 0.050022230204463676,
285
  "percentage_ge_sources_not_in_se_sources": 60.949868073878655,
286
- "percentage_ge_sources_in_se_sources": 39.05013192612139
287
  },
288
  {
289
  "model_name": "google-search",
@@ -327,7 +327,7 @@
327
  "normalized_reciprocal_se_rank": 0.20452762803005117,
328
  "reciprocal_se_rank": 0.058854939745084926,
329
  "percentage_ge_sources_not_in_se_sources": 57.176470588235304,
330
- "percentage_ge_sources_in_se_sources": 42.8235294117647
331
  },
332
  {
333
  "model_name": "tavily",
@@ -349,7 +349,7 @@
349
  "normalized_reciprocal_se_rank": 0.2743286477154668,
350
  "reciprocal_se_rank": 0.07562751486366782,
351
  "percentage_ge_sources_not_in_se_sources": 45.316455696202524,
352
- "percentage_ge_sources_in_se_sources": 54.683544303797476
353
  },
354
  {
355
  "model_name": "gensee",
@@ -371,7 +371,7 @@
371
  "normalized_reciprocal_se_rank": 0.1299587450378004,
372
  "reciprocal_se_rank": 0.040936688734811204,
373
  "percentage_ge_sources_not_in_se_sources": 71.46596858638743,
374
- "percentage_ge_sources_in_se_sources": 28.53403141361256
375
  },
376
  {
377
  "model_name": "deepseek-chat-gensee",
@@ -484,7 +484,7 @@
484
  "normalized_reciprocal_se_rank": 0.024647681783897292,
485
  "reciprocal_se_rank": 0.015631360428654928,
486
  "percentage_ge_sources_not_in_se_sources": 89.41176470588235,
487
- "percentage_ge_sources_in_se_sources": 10.588235294117647
488
  },
489
  {
490
  "model_name": "gpt-5",
@@ -507,7 +507,7 @@
507
  "normalized_reciprocal_se_rank": 0.24588844317711944,
508
  "reciprocal_se_rank": 0.06879358221974471,
509
  "percentage_ge_sources_not_in_se_sources": 71.05263157894737,
510
- "percentage_ge_sources_in_se_sources": 28.94736842105263
511
  },
512
  {
513
  "model_name": "gpt-5",
@@ -530,7 +530,7 @@
530
  "normalized_reciprocal_se_rank": 0.16050373868555687,
531
  "reciprocal_se_rank": 0.048276383810364386,
532
  "percentage_ge_sources_not_in_se_sources": 74.54545454545455,
533
- "percentage_ge_sources_in_se_sources": 25.454545454545453
534
  },
535
  {
536
  "model_name": "gpt-5",
@@ -553,7 +553,7 @@
553
  "normalized_reciprocal_se_rank": 0.04254727146439185,
554
  "reciprocal_se_rank": 0.019932475424696096,
555
  "percentage_ge_sources_not_in_se_sources": 91.37931034482759,
556
- "percentage_ge_sources_in_se_sources": 8.620689655172415
557
  },
558
  {
559
  "model_name": "gpt-5",
@@ -576,7 +576,7 @@
576
  "normalized_reciprocal_se_rank": 0.0098989898989899,
577
  "reciprocal_se_rank": 0.012087378640776695,
578
  "percentage_ge_sources_not_in_se_sources": 98.75,
579
- "percentage_ge_sources_in_se_sources": 1.2499999999999998
580
  },
581
  {
582
  "model_name": "gpt-4o",
@@ -599,7 +599,7 @@
599
  "normalized_reciprocal_se_rank": 0.12281919725976252,
600
  "reciprocal_se_rank": 0.03922111778814682,
601
  "percentage_ge_sources_not_in_se_sources": 71.7948717948718,
602
- "percentage_ge_sources_in_se_sources": 28.205128205128204
603
  },
604
  {
605
  "model_name": "gpt-4o",
@@ -622,7 +622,7 @@
622
  "normalized_reciprocal_se_rank": 0.08335991493886231,
623
  "reciprocal_se_rank": 0.029739397036280018,
624
  "percentage_ge_sources_not_in_se_sources": 89.47368421052632,
625
- "percentage_ge_sources_in_se_sources": 10.526315789473685
626
  },
627
  {
628
  "model_name": "gpt-4o",
@@ -645,7 +645,7 @@
645
  "normalized_reciprocal_se_rank": 0.28054167213258124,
646
  "reciprocal_se_rank": 0.07712045034253773,
647
  "percentage_ge_sources_not_in_se_sources": 62.5,
648
- "percentage_ge_sources_in_se_sources": 37.5
649
  },
650
  {
651
  "model_name": "gpt-4o",
@@ -668,7 +668,7 @@
668
  "normalized_reciprocal_se_rank": 0.0996490754594708,
669
  "reciprocal_se_rank": 0.03365353997691167,
670
  "percentage_ge_sources_not_in_se_sources": 71.05263157894738,
671
- "percentage_ge_sources_in_se_sources": 28.947368421052637
672
  },
673
  {
674
  "model_name": "gpt-4o",
@@ -691,7 +691,7 @@
691
  "normalized_reciprocal_se_rank": 0.07521622430371759,
692
  "reciprocal_se_rank": 0.027782539335116603,
693
  "percentage_ge_sources_not_in_se_sources": 83.95061728395062,
694
- "percentage_ge_sources_in_se_sources": 16.049382716049383
695
  },
696
  {
697
  "model_name": "Grok-4.1-Fast",
@@ -829,7 +829,7 @@
829
  "normalized_reciprocal_se_rank": 0.10523315112286376,
830
  "reciprocal_se_rank": 0.03499534456593085,
831
  "percentage_ge_sources_not_in_se_sources": 72.0,
832
- "percentage_ge_sources_in_se_sources": 28.0
833
  },
834
  {
835
  "model_name": "Gemini-3-Pro-Preview",
@@ -852,7 +852,7 @@
852
  "normalized_reciprocal_se_rank": 0.056782624848369385,
853
  "reciprocal_se_rank": 0.02335310645628293,
854
  "percentage_ge_sources_not_in_se_sources": 83.13253012048195,
855
- "percentage_ge_sources_in_se_sources": 16.86746987951807
856
  },
857
  {
858
  "model_name": "Gemini-3-Pro-Preview",
@@ -875,7 +875,7 @@
875
  "normalized_reciprocal_se_rank": 0.15368587669053982,
876
  "reciprocal_se_rank": 0.04663811114651323,
877
  "percentage_ge_sources_not_in_se_sources": 73.25581395348837,
878
- "percentage_ge_sources_in_se_sources": 26.74418604651163
879
  },
880
  {
881
  "model_name": "Gemini-3-Pro-Preview",
@@ -898,7 +898,7 @@
898
  "normalized_reciprocal_se_rank": 0.11912889330801175,
899
  "reciprocal_se_rank": 0.038334369993915436,
900
  "percentage_ge_sources_not_in_se_sources": 71.60493827160494,
901
- "percentage_ge_sources_in_se_sources": 28.395061728395063
902
  },
903
  {
904
  "model_name": "Gemini-3-Pro-Preview",
@@ -921,7 +921,7 @@
921
  "normalized_reciprocal_se_rank": 0.007184894289987778,
922
  "reciprocal_se_rank": 0.01143520518133201,
923
  "percentage_ge_sources_not_in_se_sources": 96.80851063829788,
924
- "percentage_ge_sources_in_se_sources": 3.1914893617021276
925
  },
926
  {
927
  "model_name": "Gemini-3-Flash-Preview",
@@ -944,7 +944,7 @@
944
  "normalized_reciprocal_se_rank": 0.12549266202008144,
945
  "reciprocal_se_rank": 0.039863528009679766,
946
  "percentage_ge_sources_not_in_se_sources": 71.0,
947
- "percentage_ge_sources_in_se_sources": 29.0
948
  },
949
  {
950
  "model_name": "Gemini-3-Flash-Preview",
@@ -967,7 +967,7 @@
967
  "normalized_reciprocal_se_rank": 0.06427878374204764,
968
  "reciprocal_se_rank": 0.0251543679380163,
969
  "percentage_ge_sources_not_in_se_sources": 78.82352941176471,
970
- "percentage_ge_sources_in_se_sources": 21.176470588235293
971
  },
972
  {
973
  "model_name": "Gemini-3-Flash-Preview",
@@ -990,7 +990,7 @@
990
  "normalized_reciprocal_se_rank": 0.14693960626866004,
991
  "reciprocal_se_rank": 0.04501704131212949,
992
  "percentage_ge_sources_not_in_se_sources": 73.03370786516854,
993
- "percentage_ge_sources_in_se_sources": 26.96629213483146
994
  },
995
  {
996
  "model_name": "Gemini-3-Flash-Preview",
@@ -1013,7 +1013,7 @@
1013
  "normalized_reciprocal_se_rank": 0.14047144175948556,
1014
  "reciprocal_se_rank": 0.04346279789851716,
1015
  "percentage_ge_sources_not_in_se_sources": 72.22222222222223,
1016
- "percentage_ge_sources_in_se_sources": 27.77777777777778
1017
  },
1018
  {
1019
  "model_name": "Gemini-3-Flash-Preview",
@@ -1036,7 +1036,7 @@
1036
  "normalized_reciprocal_se_rank": 0.032034724656595535,
1037
  "reciprocal_se_rank": 0.017406402283987762,
1038
  "percentage_ge_sources_not_in_se_sources": 89.1304347826087,
1039
- "percentage_ge_sources_in_se_sources": 10.869565217391303
1040
  },
1041
  {
1042
  "model_name": "Gemini-2.5-Flash-Preview",
@@ -1059,7 +1059,7 @@
1059
  "normalized_reciprocal_se_rank": null,
1060
  "reciprocal_se_rank": null,
1061
  "percentage_ge_sources_not_in_se_sources": null,
1062
- "percentage_ge_sources_in_se_sources": null
1063
  },
1064
  {
1065
  "model_name": "Gemini-2.5-Flash-Preview",
@@ -1082,7 +1082,7 @@
1082
  "normalized_reciprocal_se_rank": null,
1083
  "reciprocal_se_rank": null,
1084
  "percentage_ge_sources_not_in_se_sources": null,
1085
- "percentage_ge_sources_in_se_sources": null
1086
  },
1087
  {
1088
  "model_name": "Gemini-2.5-Flash-Preview",
@@ -1105,7 +1105,7 @@
1105
  "normalized_reciprocal_se_rank": null,
1106
  "reciprocal_se_rank": null,
1107
  "percentage_ge_sources_not_in_se_sources": null,
1108
- "percentage_ge_sources_in_se_sources": null
1109
  },
1110
  {
1111
  "model_name": "Gemini-2.5-Flash-Preview",
@@ -1128,7 +1128,7 @@
1128
  "normalized_reciprocal_se_rank": null,
1129
  "reciprocal_se_rank": null,
1130
  "percentage_ge_sources_not_in_se_sources": null,
1131
- "percentage_ge_sources_in_se_sources": null
1132
  },
1133
  {
1134
  "model_name": "Gemini-2.5-Flash-Preview",
@@ -1151,7 +1151,7 @@
1151
  "normalized_reciprocal_se_rank": null,
1152
  "reciprocal_se_rank": null,
1153
  "percentage_ge_sources_not_in_se_sources": null,
1154
- "percentage_ge_sources_in_se_sources": null
1155
  },
1156
  {
1157
  "model_name": "claude",
@@ -1174,7 +1174,7 @@
1174
  "normalized_reciprocal_se_rank": 0.21041652104583275,
1175
  "reciprocal_se_rank": 0.06026998928043071,
1176
  "percentage_ge_sources_not_in_se_sources": 54.28571428571426,
1177
- "percentage_ge_sources_in_se_sources": 45.71428571428574
1178
  },
1179
  {
1180
  "model_name": "claude",
@@ -1197,7 +1197,7 @@
1197
  "normalized_reciprocal_se_rank": 0.1327922077922078,
1198
  "reciprocal_se_rank": 0.04161754507628294,
1199
  "percentage_ge_sources_not_in_se_sources": 70.00000000000001,
1200
- "percentage_ge_sources_in_se_sources": 30.0
1201
  },
1202
  {
1203
  "model_name": "claude",
@@ -1220,7 +1220,7 @@
1220
  "normalized_reciprocal_se_rank": 0.28256007847697834,
1221
  "reciprocal_se_rank": 0.07760545575053605,
1222
  "percentage_ge_sources_not_in_se_sources": 53.84615384615383,
1223
- "percentage_ge_sources_in_se_sources": 46.153846153846175
1224
  },
1225
  {
1226
  "model_name": "claude",
@@ -1243,7 +1243,7 @@
1243
  "normalized_reciprocal_se_rank": 0.18350554762304847,
1244
  "reciprocal_se_rank": 0.0538035175113636,
1245
  "percentage_ge_sources_not_in_se_sources": 49.15254237288135,
1246
- "percentage_ge_sources_in_se_sources": 50.84745762711865
1247
  },
1248
  {
1249
  "model_name": "claude",
@@ -1266,7 +1266,7 @@
1266
  "normalized_reciprocal_se_rank": 0.056933641949831956,
1267
  "reciprocal_se_rank": 0.02338939454619748,
1268
  "percentage_ge_sources_not_in_se_sources": 83.95061728395062,
1269
- "percentage_ge_sources_in_se_sources": 16.049382716049383
1270
  },
1271
  {
1272
  "model_name": "Perplexity-Sonar-Pro",
@@ -1289,7 +1289,7 @@
1289
  "normalized_reciprocal_se_rank": 0.2274442114543135,
1290
  "reciprocal_se_rank": 0.0643615944999443,
1291
  "percentage_ge_sources_not_in_se_sources": 46.34146341463415,
1292
- "percentage_ge_sources_in_se_sources": 53.65853658536585
1293
  },
1294
  {
1295
  "model_name": "Perplexity-Sonar-Pro",
@@ -1312,7 +1312,7 @@
1312
  "normalized_reciprocal_se_rank": 0.13063939371395492,
1313
  "reciprocal_se_rank": 0.04110024266427558,
1314
  "percentage_ge_sources_not_in_se_sources": 63.29113924050633,
1315
- "percentage_ge_sources_in_se_sources": 36.70886075949367
1316
  },
1317
  {
1318
  "model_name": "Perplexity-Sonar-Pro",
@@ -1335,7 +1335,7 @@
1335
  "normalized_reciprocal_se_rank": 0.20414708640646176,
1336
  "reciprocal_se_rank": 0.05876349891805757,
1337
  "percentage_ge_sources_not_in_se_sources": 62.5,
1338
- "percentage_ge_sources_in_se_sources": 37.5
1339
  },
1340
  {
1341
  "model_name": "Perplexity-Sonar-Pro",
@@ -1358,7 +1358,7 @@
1358
  "normalized_reciprocal_se_rank": 0.2024394067077523,
1359
  "reciprocal_se_rank": 0.05835315840793079,
1360
  "percentage_ge_sources_not_in_se_sources": 52.4390243902439,
1361
- "percentage_ge_sources_in_se_sources": 47.5609756097561
1362
  },
1363
  {
1364
  "model_name": "Perplexity-Sonar-Pro",
@@ -1381,7 +1381,7 @@
1381
  "normalized_reciprocal_se_rank": 0.051796852838519515,
1382
  "reciprocal_se_rank": 0.02215506900731415,
1383
  "percentage_ge_sources_not_in_se_sources": 85.93750000000001,
1384
- "percentage_ge_sources_in_se_sources": 14.0625
1385
  },
1386
  {
1387
  "model_name": "google-search",
@@ -1519,7 +1519,7 @@
1519
  "normalized_reciprocal_se_rank": 0.28063309301928224,
1520
  "reciprocal_se_rank": 0.07714241798278879,
1521
  "percentage_ge_sources_not_in_se_sources": 38.20224719101124,
1522
- "percentage_ge_sources_in_se_sources": 61.79775280898876
1523
  },
1524
  {
1525
  "model_name": "exa",
@@ -1542,7 +1542,7 @@
1542
  "normalized_reciprocal_se_rank": 0.22448376867351463,
1543
  "reciprocal_se_rank": 0.06365022596766494,
1544
  "percentage_ge_sources_not_in_se_sources": 54.21686746987952,
1545
- "percentage_ge_sources_in_se_sources": 45.78313253012048
1546
  },
1547
  {
1548
  "model_name": "exa",
@@ -1565,7 +1565,7 @@
1565
  "normalized_reciprocal_se_rank": 0.2313966587355651,
1566
  "reciprocal_se_rank": 0.06531133304568201,
1567
  "percentage_ge_sources_not_in_se_sources": 62.06896551724138,
1568
- "percentage_ge_sources_in_se_sources": 37.93103448275862
1569
  },
1570
  {
1571
  "model_name": "exa",
@@ -1588,7 +1588,7 @@
1588
  "normalized_reciprocal_se_rank": 0.1994493307755928,
1589
  "reciprocal_se_rank": 0.05763466928830994,
1590
  "percentage_ge_sources_not_in_se_sources": 50.588235294117645,
1591
- "percentage_ge_sources_in_se_sources": 49.411764705882355
1592
  },
1593
  {
1594
  "model_name": "exa",
@@ -1611,7 +1611,7 @@
1611
  "normalized_reciprocal_se_rank": 0.07692643713869617,
1612
  "reciprocal_se_rank": 0.028193488535754666,
1613
  "percentage_ge_sources_not_in_se_sources": 82.71604938271606,
1614
- "percentage_ge_sources_in_se_sources": 17.28395061728395
1615
  },
1616
  {
1617
  "model_name": "tavily",
@@ -1634,7 +1634,7 @@
1634
  "normalized_reciprocal_se_rank": 0.3434220529106368,
1635
  "reciprocal_se_rank": 0.09223005640328419,
1636
  "percentage_ge_sources_not_in_se_sources": 36.8421052631579,
1637
- "percentage_ge_sources_in_se_sources": 63.1578947368421
1638
  },
1639
  {
1640
  "model_name": "tavily",
@@ -1657,7 +1657,7 @@
1657
  "normalized_reciprocal_se_rank": 0.27249775517340985,
1658
  "reciprocal_se_rank": 0.07518756738390195,
1659
  "percentage_ge_sources_not_in_se_sources": 41.55844155844156,
1660
- "percentage_ge_sources_in_se_sources": 58.44155844155844
1661
  },
1662
  {
1663
  "model_name": "tavily",
@@ -1680,7 +1680,7 @@
1680
  "normalized_reciprocal_se_rank": 0.3128941728047197,
1681
  "reciprocal_se_rank": 0.08489447356229925,
1682
  "percentage_ge_sources_not_in_se_sources": 45.67901234567901,
1683
- "percentage_ge_sources_in_se_sources": 54.32098765432099
1684
  },
1685
  {
1686
  "model_name": "tavily",
@@ -1703,7 +1703,7 @@
1703
  "normalized_reciprocal_se_rank": 0.31851954838074426,
1704
  "reciprocal_se_rank": 0.08624620215945074,
1705
  "percentage_ge_sources_not_in_se_sources": 32.05128205128205,
1706
- "percentage_ge_sources_in_se_sources": 67.94871794871796
1707
  },
1708
  {
1709
  "model_name": "tavily",
@@ -1726,7 +1726,7 @@
1726
  "normalized_reciprocal_se_rank": 0.1335959024960005,
1727
  "reciprocal_se_rank": 0.04181066589102925,
1728
  "percentage_ge_sources_not_in_se_sources": 68.67469879518072,
1729
- "percentage_ge_sources_in_se_sources": 31.325301204819276
1730
  },
1731
  {
1732
  "model_name": "gensee",
@@ -1749,7 +1749,7 @@
1749
  "normalized_reciprocal_se_rank": 0.17012042957025347,
1750
  "reciprocal_se_rank": 0.05058719060061921,
1751
  "percentage_ge_sources_not_in_se_sources": 58.42696629213483,
1752
- "percentage_ge_sources_in_se_sources": 41.57303370786517
1753
  },
1754
  {
1755
  "model_name": "gensee",
@@ -1772,7 +1772,7 @@
1772
  "normalized_reciprocal_se_rank": 0.06456158601930041,
1773
  "reciprocal_se_rank": 0.025222322854152286,
1774
  "percentage_ge_sources_not_in_se_sources": 84.44444444444443,
1775
- "percentage_ge_sources_in_se_sources": 15.555555555555555
1776
  },
1777
  {
1778
  "model_name": "gensee",
@@ -1795,7 +1795,7 @@
1795
  "normalized_reciprocal_se_rank": 0.19747215967140244,
1796
  "reciprocal_se_rank": 0.05715957234822537,
1797
  "percentage_ge_sources_not_in_se_sources": 70.12987012987011,
1798
- "percentage_ge_sources_in_se_sources": 29.870129870129865
1799
  },
1800
  {
1801
  "model_name": "gensee",
@@ -1818,7 +1818,7 @@
1818
  "normalized_reciprocal_se_rank": 0.12268532386073742,
1819
  "reciprocal_se_rank": 0.03918894918012865,
1820
  "percentage_ge_sources_not_in_se_sources": 62.650602409638545,
1821
- "percentage_ge_sources_in_se_sources": 37.34939759036144
1822
  },
1823
  {
1824
  "model_name": "gensee",
@@ -1841,7 +1841,7 @@
1841
  "normalized_reciprocal_se_rank": 0.07056832757590334,
1842
  "reciprocal_se_rank": 0.02666569036411269,
1843
  "percentage_ge_sources_not_in_se_sources": 87.5,
1844
- "percentage_ge_sources_in_se_sources": 12.5
1845
  }
1846
  ],
1847
  "queries": [
 
129
  "normalized_reciprocal_se_rank": 0.07444993783835872,
130
  "reciprocal_se_rank": 0.027598407393197893,
131
  "percentage_ge_sources_not_in_se_sources": 87.34177215189875,
132
+ "percentage_ge_sources_in_se_sources": 15.99
133
  },
134
  {
135
  "model_name": "gpt-4o",
 
151
  "normalized_reciprocal_se_rank": 0.12262328761538778,
152
  "reciprocal_se_rank": 0.03917404241243542,
153
  "percentage_ge_sources_not_in_se_sources": 74.82993197278913,
154
+ "percentage_ge_sources_in_se_sources": 27.53
155
  },
156
  {
157
  "model_name": "grok-4.1-fast-non-reasoning",
 
173
  "normalized_reciprocal_se_rank": 0.14088939196167136,
174
  "reciprocal_se_rank": 0.043563227680110374,
175
  "percentage_ge_sources_not_in_se_sources": 69.15584415584416,
176
+ "percentage_ge_sources_in_se_sources": 29.67
177
  },
178
  {
179
  "model_name": "claude",
 
195
  "normalized_reciprocal_se_rank": 0.1641520228549373,
196
  "reciprocal_se_rank": 0.04915303461805529,
197
  "percentage_ge_sources_not_in_se_sources": 62.934362934362916,
198
+ "percentage_ge_sources_in_se_sources": 37.1
199
  },
200
  {
201
  "model_name": "Gemini-3-Pro-Preview",
 
217
  "normalized_reciprocal_se_rank": 0.08733801529571,
218
  "reciprocal_se_rank": 0.03069529979193034,
219
  "percentage_ge_sources_not_in_se_sources": 79.50450450450452,
220
+ "percentage_ge_sources_in_se_sources": 20.95
221
  },
222
  {
223
  "model_name": "Gemini-3-Flash-Preview",
 
239
  "normalized_reciprocal_se_rank": 0.10236888641393292,
240
  "reciprocal_se_rank": 0.03430708678393044,
241
  "percentage_ge_sources_not_in_se_sources": 76.7543859649123,
242
+ "percentage_ge_sources_in_se_sources": 23.5
243
  },
244
  {
245
  "model_name": "Gemini-2.5-Flash-Preview",
 
261
  "normalized_reciprocal_se_rank": null,
262
  "reciprocal_se_rank": null,
263
  "percentage_ge_sources_not_in_se_sources": null,
264
+ "percentage_ge_sources_in_se_sources": 31.96
265
  },
266
  {
267
  "model_name": "Perplexity-Sonar-Pro",
 
283
  "normalized_reciprocal_se_rank": 0.16776928125494017,
284
  "reciprocal_se_rank": 0.050022230204463676,
285
  "percentage_ge_sources_not_in_se_sources": 60.949868073878655,
286
+ "percentage_ge_sources_in_se_sources": 40.0
287
  },
288
  {
289
  "model_name": "google-search",
 
327
  "normalized_reciprocal_se_rank": 0.20452762803005117,
328
  "reciprocal_se_rank": 0.058854939745084926,
329
  "percentage_ge_sources_not_in_se_sources": 57.176470588235304,
330
+ "percentage_ge_sources_in_se_sources": 44.66
331
  },
332
  {
333
  "model_name": "tavily",
 
349
  "normalized_reciprocal_se_rank": 0.2743286477154668,
350
  "reciprocal_se_rank": 0.07562751486366782,
351
  "percentage_ge_sources_not_in_se_sources": 45.316455696202524,
352
+ "percentage_ge_sources_in_se_sources": 55.45
353
  },
354
  {
355
  "model_name": "gensee",
 
371
  "normalized_reciprocal_se_rank": 0.1299587450378004,
372
  "reciprocal_se_rank": 0.040936688734811204,
373
  "percentage_ge_sources_not_in_se_sources": 71.46596858638743,
374
+ "percentage_ge_sources_in_se_sources": 28.4
375
  },
376
  {
377
  "model_name": "deepseek-chat-gensee",
 
484
  "normalized_reciprocal_se_rank": 0.024647681783897292,
485
  "reciprocal_se_rank": 0.015631360428654928,
486
  "percentage_ge_sources_not_in_se_sources": 89.41176470588235,
487
+ "percentage_ge_sources_in_se_sources": 15.99
488
  },
489
  {
490
  "model_name": "gpt-5",
 
507
  "normalized_reciprocal_se_rank": 0.24588844317711944,
508
  "reciprocal_se_rank": 0.06879358221974471,
509
  "percentage_ge_sources_not_in_se_sources": 71.05263157894737,
510
+ "percentage_ge_sources_in_se_sources": 15.99
511
  },
512
  {
513
  "model_name": "gpt-5",
 
530
  "normalized_reciprocal_se_rank": 0.16050373868555687,
531
  "reciprocal_se_rank": 0.048276383810364386,
532
  "percentage_ge_sources_not_in_se_sources": 74.54545454545455,
533
+ "percentage_ge_sources_in_se_sources": 15.99
534
  },
535
  {
536
  "model_name": "gpt-5",
 
553
  "normalized_reciprocal_se_rank": 0.04254727146439185,
554
  "reciprocal_se_rank": 0.019932475424696096,
555
  "percentage_ge_sources_not_in_se_sources": 91.37931034482759,
556
+ "percentage_ge_sources_in_se_sources": 15.99
557
  },
558
  {
559
  "model_name": "gpt-5",
 
576
  "normalized_reciprocal_se_rank": 0.0098989898989899,
577
  "reciprocal_se_rank": 0.012087378640776695,
578
  "percentage_ge_sources_not_in_se_sources": 98.75,
579
+ "percentage_ge_sources_in_se_sources": 15.99
580
  },
581
  {
582
  "model_name": "gpt-4o",
 
599
  "normalized_reciprocal_se_rank": 0.12281919725976252,
600
  "reciprocal_se_rank": 0.03922111778814682,
601
  "percentage_ge_sources_not_in_se_sources": 71.7948717948718,
602
+ "percentage_ge_sources_in_se_sources": 27.53
603
  },
604
  {
605
  "model_name": "gpt-4o",
 
622
  "normalized_reciprocal_se_rank": 0.08335991493886231,
623
  "reciprocal_se_rank": 0.029739397036280018,
624
  "percentage_ge_sources_not_in_se_sources": 89.47368421052632,
625
+ "percentage_ge_sources_in_se_sources": 27.53
626
  },
627
  {
628
  "model_name": "gpt-4o",
 
645
  "normalized_reciprocal_se_rank": 0.28054167213258124,
646
  "reciprocal_se_rank": 0.07712045034253773,
647
  "percentage_ge_sources_not_in_se_sources": 62.5,
648
+ "percentage_ge_sources_in_se_sources": 27.53
649
  },
650
  {
651
  "model_name": "gpt-4o",
 
668
  "normalized_reciprocal_se_rank": 0.0996490754594708,
669
  "reciprocal_se_rank": 0.03365353997691167,
670
  "percentage_ge_sources_not_in_se_sources": 71.05263157894738,
671
+ "percentage_ge_sources_in_se_sources": 27.53
672
  },
673
  {
674
  "model_name": "gpt-4o",
 
691
  "normalized_reciprocal_se_rank": 0.07521622430371759,
692
  "reciprocal_se_rank": 0.027782539335116603,
693
  "percentage_ge_sources_not_in_se_sources": 83.95061728395062,
694
+ "percentage_ge_sources_in_se_sources": 27.53
695
  },
696
  {
697
  "model_name": "Grok-4.1-Fast",
 
829
  "normalized_reciprocal_se_rank": 0.10523315112286376,
830
  "reciprocal_se_rank": 0.03499534456593085,
831
  "percentage_ge_sources_not_in_se_sources": 72.0,
832
+ "percentage_ge_sources_in_se_sources": 20.95
833
  },
834
  {
835
  "model_name": "Gemini-3-Pro-Preview",
 
852
  "normalized_reciprocal_se_rank": 0.056782624848369385,
853
  "reciprocal_se_rank": 0.02335310645628293,
854
  "percentage_ge_sources_not_in_se_sources": 83.13253012048195,
855
+ "percentage_ge_sources_in_se_sources": 20.95
856
  },
857
  {
858
  "model_name": "Gemini-3-Pro-Preview",
 
875
  "normalized_reciprocal_se_rank": 0.15368587669053982,
876
  "reciprocal_se_rank": 0.04663811114651323,
877
  "percentage_ge_sources_not_in_se_sources": 73.25581395348837,
878
+ "percentage_ge_sources_in_se_sources": 20.95
879
  },
880
  {
881
  "model_name": "Gemini-3-Pro-Preview",
 
898
  "normalized_reciprocal_se_rank": 0.11912889330801175,
899
  "reciprocal_se_rank": 0.038334369993915436,
900
  "percentage_ge_sources_not_in_se_sources": 71.60493827160494,
901
+ "percentage_ge_sources_in_se_sources": 20.95
902
  },
903
  {
904
  "model_name": "Gemini-3-Pro-Preview",
 
921
  "normalized_reciprocal_se_rank": 0.007184894289987778,
922
  "reciprocal_se_rank": 0.01143520518133201,
923
  "percentage_ge_sources_not_in_se_sources": 96.80851063829788,
924
+ "percentage_ge_sources_in_se_sources": 20.95
925
  },
926
  {
927
  "model_name": "Gemini-3-Flash-Preview",
 
944
  "normalized_reciprocal_se_rank": 0.12549266202008144,
945
  "reciprocal_se_rank": 0.039863528009679766,
946
  "percentage_ge_sources_not_in_se_sources": 71.0,
947
+ "percentage_ge_sources_in_se_sources": 23.5
948
  },
949
  {
950
  "model_name": "Gemini-3-Flash-Preview",
 
967
  "normalized_reciprocal_se_rank": 0.06427878374204764,
968
  "reciprocal_se_rank": 0.0251543679380163,
969
  "percentage_ge_sources_not_in_se_sources": 78.82352941176471,
970
+ "percentage_ge_sources_in_se_sources": 23.5
971
  },
972
  {
973
  "model_name": "Gemini-3-Flash-Preview",
 
990
  "normalized_reciprocal_se_rank": 0.14693960626866004,
991
  "reciprocal_se_rank": 0.04501704131212949,
992
  "percentage_ge_sources_not_in_se_sources": 73.03370786516854,
993
+ "percentage_ge_sources_in_se_sources": 23.5
994
  },
995
  {
996
  "model_name": "Gemini-3-Flash-Preview",
 
1013
  "normalized_reciprocal_se_rank": 0.14047144175948556,
1014
  "reciprocal_se_rank": 0.04346279789851716,
1015
  "percentage_ge_sources_not_in_se_sources": 72.22222222222223,
1016
+ "percentage_ge_sources_in_se_sources": 23.5
1017
  },
1018
  {
1019
  "model_name": "Gemini-3-Flash-Preview",
 
1036
  "normalized_reciprocal_se_rank": 0.032034724656595535,
1037
  "reciprocal_se_rank": 0.017406402283987762,
1038
  "percentage_ge_sources_not_in_se_sources": 89.1304347826087,
1039
+ "percentage_ge_sources_in_se_sources": 23.5
1040
  },
1041
  {
1042
  "model_name": "Gemini-2.5-Flash-Preview",
 
1059
  "normalized_reciprocal_se_rank": null,
1060
  "reciprocal_se_rank": null,
1061
  "percentage_ge_sources_not_in_se_sources": null,
1062
+ "percentage_ge_sources_in_se_sources": 31.96
1063
  },
1064
  {
1065
  "model_name": "Gemini-2.5-Flash-Preview",
 
1082
  "normalized_reciprocal_se_rank": null,
1083
  "reciprocal_se_rank": null,
1084
  "percentage_ge_sources_not_in_se_sources": null,
1085
+ "percentage_ge_sources_in_se_sources": 31.96
1086
  },
1087
  {
1088
  "model_name": "Gemini-2.5-Flash-Preview",
 
1105
  "normalized_reciprocal_se_rank": null,
1106
  "reciprocal_se_rank": null,
1107
  "percentage_ge_sources_not_in_se_sources": null,
1108
+ "percentage_ge_sources_in_se_sources": 31.96
1109
  },
1110
  {
1111
  "model_name": "Gemini-2.5-Flash-Preview",
 
1128
  "normalized_reciprocal_se_rank": null,
1129
  "reciprocal_se_rank": null,
1130
  "percentage_ge_sources_not_in_se_sources": null,
1131
+ "percentage_ge_sources_in_se_sources": 31.96
1132
  },
1133
  {
1134
  "model_name": "Gemini-2.5-Flash-Preview",
 
1151
  "normalized_reciprocal_se_rank": null,
1152
  "reciprocal_se_rank": null,
1153
  "percentage_ge_sources_not_in_se_sources": null,
1154
+ "percentage_ge_sources_in_se_sources": 31.96
1155
  },
1156
  {
1157
  "model_name": "claude",
 
1174
  "normalized_reciprocal_se_rank": 0.21041652104583275,
1175
  "reciprocal_se_rank": 0.06026998928043071,
1176
  "percentage_ge_sources_not_in_se_sources": 54.28571428571426,
1177
+ "percentage_ge_sources_in_se_sources": 37.1
1178
  },
1179
  {
1180
  "model_name": "claude",
 
1197
  "normalized_reciprocal_se_rank": 0.1327922077922078,
1198
  "reciprocal_se_rank": 0.04161754507628294,
1199
  "percentage_ge_sources_not_in_se_sources": 70.00000000000001,
1200
+ "percentage_ge_sources_in_se_sources": 37.1
1201
  },
1202
  {
1203
  "model_name": "claude",
 
1220
  "normalized_reciprocal_se_rank": 0.28256007847697834,
1221
  "reciprocal_se_rank": 0.07760545575053605,
1222
  "percentage_ge_sources_not_in_se_sources": 53.84615384615383,
1223
+ "percentage_ge_sources_in_se_sources": 37.1
1224
  },
1225
  {
1226
  "model_name": "claude",
 
1243
  "normalized_reciprocal_se_rank": 0.18350554762304847,
1244
  "reciprocal_se_rank": 0.0538035175113636,
1245
  "percentage_ge_sources_not_in_se_sources": 49.15254237288135,
1246
+ "percentage_ge_sources_in_se_sources": 37.1
1247
  },
1248
  {
1249
  "model_name": "claude",
 
1266
  "normalized_reciprocal_se_rank": 0.056933641949831956,
1267
  "reciprocal_se_rank": 0.02338939454619748,
1268
  "percentage_ge_sources_not_in_se_sources": 83.95061728395062,
1269
+ "percentage_ge_sources_in_se_sources": 37.1
1270
  },
1271
  {
1272
  "model_name": "Perplexity-Sonar-Pro",
 
1289
  "normalized_reciprocal_se_rank": 0.2274442114543135,
1290
  "reciprocal_se_rank": 0.0643615944999443,
1291
  "percentage_ge_sources_not_in_se_sources": 46.34146341463415,
1292
+ "percentage_ge_sources_in_se_sources": 40.0
1293
  },
1294
  {
1295
  "model_name": "Perplexity-Sonar-Pro",
 
1312
  "normalized_reciprocal_se_rank": 0.13063939371395492,
1313
  "reciprocal_se_rank": 0.04110024266427558,
1314
  "percentage_ge_sources_not_in_se_sources": 63.29113924050633,
1315
+ "percentage_ge_sources_in_se_sources": 40.0
1316
  },
1317
  {
1318
  "model_name": "Perplexity-Sonar-Pro",
 
1335
  "normalized_reciprocal_se_rank": 0.20414708640646176,
1336
  "reciprocal_se_rank": 0.05876349891805757,
1337
  "percentage_ge_sources_not_in_se_sources": 62.5,
1338
+ "percentage_ge_sources_in_se_sources": 40.0
1339
  },
1340
  {
1341
  "model_name": "Perplexity-Sonar-Pro",
 
1358
  "normalized_reciprocal_se_rank": 0.2024394067077523,
1359
  "reciprocal_se_rank": 0.05835315840793079,
1360
  "percentage_ge_sources_not_in_se_sources": 52.4390243902439,
1361
+ "percentage_ge_sources_in_se_sources": 40.0
1362
  },
1363
  {
1364
  "model_name": "Perplexity-Sonar-Pro",
 
1381
  "normalized_reciprocal_se_rank": 0.051796852838519515,
1382
  "reciprocal_se_rank": 0.02215506900731415,
1383
  "percentage_ge_sources_not_in_se_sources": 85.93750000000001,
1384
+ "percentage_ge_sources_in_se_sources": 40.0
1385
  },
1386
  {
1387
  "model_name": "google-search",
 
1519
  "normalized_reciprocal_se_rank": 0.28063309301928224,
1520
  "reciprocal_se_rank": 0.07714241798278879,
1521
  "percentage_ge_sources_not_in_se_sources": 38.20224719101124,
1522
+ "percentage_ge_sources_in_se_sources": 44.66
1523
  },
1524
  {
1525
  "model_name": "exa",
 
1542
  "normalized_reciprocal_se_rank": 0.22448376867351463,
1543
  "reciprocal_se_rank": 0.06365022596766494,
1544
  "percentage_ge_sources_not_in_se_sources": 54.21686746987952,
1545
+ "percentage_ge_sources_in_se_sources": 44.66
1546
  },
1547
  {
1548
  "model_name": "exa",
 
1565
  "normalized_reciprocal_se_rank": 0.2313966587355651,
1566
  "reciprocal_se_rank": 0.06531133304568201,
1567
  "percentage_ge_sources_not_in_se_sources": 62.06896551724138,
1568
+ "percentage_ge_sources_in_se_sources": 44.66
1569
  },
1570
  {
1571
  "model_name": "exa",
 
1588
  "normalized_reciprocal_se_rank": 0.1994493307755928,
1589
  "reciprocal_se_rank": 0.05763466928830994,
1590
  "percentage_ge_sources_not_in_se_sources": 50.588235294117645,
1591
+ "percentage_ge_sources_in_se_sources": 44.66
1592
  },
1593
  {
1594
  "model_name": "exa",
 
1611
  "normalized_reciprocal_se_rank": 0.07692643713869617,
1612
  "reciprocal_se_rank": 0.028193488535754666,
1613
  "percentage_ge_sources_not_in_se_sources": 82.71604938271606,
1614
+ "percentage_ge_sources_in_se_sources": 44.66
1615
  },
1616
  {
1617
  "model_name": "tavily",
 
1634
  "normalized_reciprocal_se_rank": 0.3434220529106368,
1635
  "reciprocal_se_rank": 0.09223005640328419,
1636
  "percentage_ge_sources_not_in_se_sources": 36.8421052631579,
1637
+ "percentage_ge_sources_in_se_sources": 55.45
1638
  },
1639
  {
1640
  "model_name": "tavily",
 
1657
  "normalized_reciprocal_se_rank": 0.27249775517340985,
1658
  "reciprocal_se_rank": 0.07518756738390195,
1659
  "percentage_ge_sources_not_in_se_sources": 41.55844155844156,
1660
+ "percentage_ge_sources_in_se_sources": 55.45
1661
  },
1662
  {
1663
  "model_name": "tavily",
 
1680
  "normalized_reciprocal_se_rank": 0.3128941728047197,
1681
  "reciprocal_se_rank": 0.08489447356229925,
1682
  "percentage_ge_sources_not_in_se_sources": 45.67901234567901,
1683
+ "percentage_ge_sources_in_se_sources": 55.45
1684
  },
1685
  {
1686
  "model_name": "tavily",
 
1703
  "normalized_reciprocal_se_rank": 0.31851954838074426,
1704
  "reciprocal_se_rank": 0.08624620215945074,
1705
  "percentage_ge_sources_not_in_se_sources": 32.05128205128205,
1706
+ "percentage_ge_sources_in_se_sources": 55.45
1707
  },
1708
  {
1709
  "model_name": "tavily",
 
1726
  "normalized_reciprocal_se_rank": 0.1335959024960005,
1727
  "reciprocal_se_rank": 0.04181066589102925,
1728
  "percentage_ge_sources_not_in_se_sources": 68.67469879518072,
1729
+ "percentage_ge_sources_in_se_sources": 55.45
1730
  },
1731
  {
1732
  "model_name": "gensee",
 
1749
  "normalized_reciprocal_se_rank": 0.17012042957025347,
1750
  "reciprocal_se_rank": 0.05058719060061921,
1751
  "percentage_ge_sources_not_in_se_sources": 58.42696629213483,
1752
+ "percentage_ge_sources_in_se_sources": 28.4
1753
  },
1754
  {
1755
  "model_name": "gensee",
 
1772
  "normalized_reciprocal_se_rank": 0.06456158601930041,
1773
  "reciprocal_se_rank": 0.025222322854152286,
1774
  "percentage_ge_sources_not_in_se_sources": 84.44444444444443,
1775
+ "percentage_ge_sources_in_se_sources": 28.4
1776
  },
1777
  {
1778
  "model_name": "gensee",
 
1795
  "normalized_reciprocal_se_rank": 0.19747215967140244,
1796
  "reciprocal_se_rank": 0.05715957234822537,
1797
  "percentage_ge_sources_not_in_se_sources": 70.12987012987011,
1798
+ "percentage_ge_sources_in_se_sources": 28.4
1799
  },
1800
  {
1801
  "model_name": "gensee",
 
1818
  "normalized_reciprocal_se_rank": 0.12268532386073742,
1819
  "reciprocal_se_rank": 0.03918894918012865,
1820
  "percentage_ge_sources_not_in_se_sources": 62.650602409638545,
1821
+ "percentage_ge_sources_in_se_sources": 28.4
1822
  },
1823
  {
1824
  "model_name": "gensee",
 
1841
  "normalized_reciprocal_se_rank": 0.07056832757590334,
1842
  "reciprocal_se_rank": 0.02666569036411269,
1843
  "percentage_ge_sources_not_in_se_sources": 87.5,
1844
+ "percentage_ge_sources_in_se_sources": 28.4
1845
  }
1846
  ],
1847
  "queries": [