Spaces:
Paused
Paused
github-actions[bot] commited on
Commit ·
b128a99
1
Parent(s): b89bdfe
chore: update benchmark data [skip ci]
Browse files- data/benchmarks.json +924 -829
data/benchmarks.json
CHANGED
|
@@ -7,8 +7,8 @@
|
|
| 7 |
"gpqa": 0.369,
|
| 8 |
"arc": 0.93,
|
| 9 |
"gsm8k": 0.87,
|
| 10 |
-
"arena_elo": 1288.
|
| 11 |
-
"arena_rank":
|
| 12 |
"arena_votes": 8659
|
| 13 |
},
|
| 14 |
{
|
|
@@ -19,8 +19,8 @@
|
|
| 19 |
"gpqa": 0.323,
|
| 20 |
"arc": 0.857,
|
| 21 |
"gsm8k": 0.758,
|
| 22 |
-
"arena_elo": 1238.
|
| 23 |
-
"arena_rank":
|
| 24 |
"arena_votes": 8854
|
| 25 |
},
|
| 26 |
{
|
|
@@ -82,9 +82,9 @@
|
|
| 82 |
"lb_language": 0.39707333333333333,
|
| 83 |
"lb_if": 0.6187925,
|
| 84 |
"lb_data_analysis": 0.5411900000000001,
|
| 85 |
-
"arena_elo": 1323.
|
| 86 |
-
"arena_rank":
|
| 87 |
-
"arena_votes":
|
| 88 |
"aider_pass_rate": 0.617
|
| 89 |
},
|
| 90 |
{
|
|
@@ -106,8 +106,8 @@
|
|
| 106 |
"lb_language": 0.56937,
|
| 107 |
"lb_if": 0.7229999999999999,
|
| 108 |
"lb_data_analysis": 0.5411,
|
| 109 |
-
"arena_elo": 1342.
|
| 110 |
-
"arena_rank":
|
| 111 |
"arena_votes": 82417,
|
| 112 |
"aider_pass_rate": 0.5710000000000001
|
| 113 |
},
|
|
@@ -132,9 +132,9 @@
|
|
| 132 |
"lb_language": 0.54477,
|
| 133 |
"lb_if": 0.69296,
|
| 134 |
"lb_data_analysis": 0.5618650000000001,
|
| 135 |
-
"arena_elo":
|
| 136 |
-
"arena_rank":
|
| 137 |
-
"arena_votes":
|
| 138 |
"aider_pass_rate": 0.6920000000000001
|
| 139 |
},
|
| 140 |
{
|
|
@@ -157,8 +157,8 @@
|
|
| 157 |
"lb_language": 0.30073333333333335,
|
| 158 |
"lb_if": 0.6402924999999999,
|
| 159 |
"lb_data_analysis": 0.3731,
|
| 160 |
-
"arena_elo":
|
| 161 |
-
"arena_rank":
|
| 162 |
"arena_votes": 117705,
|
| 163 |
"aider_pass_rate": 0.406
|
| 164 |
},
|
|
@@ -183,8 +183,8 @@
|
|
| 183 |
"lb_language": 0.53574,
|
| 184 |
"lb_if": 0.6388750000000001,
|
| 185 |
"lb_data_analysis": 0.5784,
|
| 186 |
-
"arena_elo": 1321.
|
| 187 |
-
"arena_rank":
|
| 188 |
"arena_votes": 194904,
|
| 189 |
"aider_pass_rate": 0.534
|
| 190 |
},
|
|
@@ -209,8 +209,8 @@
|
|
| 209 |
"lb_language": 0.38083333333333336,
|
| 210 |
"lb_if": 0.6500425,
|
| 211 |
"lb_data_analysis": 0.38839999999999997,
|
| 212 |
-
"arena_elo": 1280.
|
| 213 |
-
"arena_rank":
|
| 214 |
"arena_votes": 109289,
|
| 215 |
"aider_pass_rate": 0.436
|
| 216 |
},
|
|
@@ -228,8 +228,8 @@
|
|
| 228 |
"lb_language": 0.14644333333333334,
|
| 229 |
"lb_if": 0.571625,
|
| 230 |
"lb_data_analysis": 0.2354,
|
| 231 |
-
"arena_elo": 1226.
|
| 232 |
-
"arena_rank":
|
| 233 |
"arena_votes": 54038
|
| 234 |
},
|
| 235 |
{
|
|
@@ -249,8 +249,8 @@
|
|
| 249 |
"lb_language": 0.5477066666666667,
|
| 250 |
"lb_if": 0.8050825,
|
| 251 |
"lb_data_analysis": 0.69625,
|
| 252 |
-
"arena_elo": 1397.
|
| 253 |
-
"arena_rank":
|
| 254 |
"arena_votes": 18537
|
| 255 |
},
|
| 256 |
{
|
|
@@ -268,8 +268,8 @@
|
|
| 268 |
"lb_language": 0.3518266666666667,
|
| 269 |
"lb_if": 0.6915024999999999,
|
| 270 |
"lb_data_analysis": 0.4417,
|
| 271 |
-
"arena_elo": 1306.
|
| 272 |
-
"arena_rank":
|
| 273 |
"arena_votes": 24574,
|
| 274 |
"aider_pass_rate": 0.5489999999999999
|
| 275 |
},
|
|
@@ -289,8 +289,8 @@
|
|
| 289 |
"lb_language": 0.47484666666666664,
|
| 290 |
"lb_if": 0.75246,
|
| 291 |
"lb_data_analysis": 0.6241,
|
| 292 |
-
"arena_elo": 1358.
|
| 293 |
-
"arena_rank":
|
| 294 |
"arena_votes": 21788
|
| 295 |
},
|
| 296 |
{
|
|
@@ -385,8 +385,8 @@
|
|
| 385 |
"lb_language": 0.32621666666666665,
|
| 386 |
"lb_if": 0.5810025000000001,
|
| 387 |
"lb_data_analysis": 0.4481,
|
| 388 |
-
"arena_elo":
|
| 389 |
-
"arena_rank":
|
| 390 |
"arena_votes": 75764
|
| 391 |
},
|
| 392 |
{
|
|
@@ -415,8 +415,8 @@
|
|
| 415 |
"lb_language": 0.25531,
|
| 416 |
"lb_if": 0.52621,
|
| 417 |
"lb_data_analysis": 0.2959,
|
| 418 |
-
"arena_elo": 1265.
|
| 419 |
-
"arena_rank":
|
| 420 |
"arena_votes": 54615
|
| 421 |
},
|
| 422 |
{
|
|
@@ -452,8 +452,8 @@
|
|
| 452 |
"hf_math_lvl5": 0.3806646525679758,
|
| 453 |
"hf_musr": 0.45806250000000004,
|
| 454 |
"hf_avg": 43.409948245645786,
|
| 455 |
-
"arena_elo": 1293.
|
| 456 |
-
"arena_rank":
|
| 457 |
"arena_votes": 55234,
|
| 458 |
"aider_pass_rate": 0.436
|
| 459 |
},
|
|
@@ -477,8 +477,8 @@
|
|
| 477 |
"hf_math_lvl5": 0.1729607250755287,
|
| 478 |
"hf_musr": 0.3845416666666666,
|
| 479 |
"hf_avg": 28.01011138792457,
|
| 480 |
-
"arena_elo": 1211.
|
| 481 |
-
"arena_rank":
|
| 482 |
"arena_votes": 49605,
|
| 483 |
"aider_pass_rate": 0.263
|
| 484 |
},
|
|
@@ -509,8 +509,8 @@
|
|
| 509 |
"hf_math_lvl5": 0.17673716012084592,
|
| 510 |
"hf_musr": 0.3528541666666667,
|
| 511 |
"hf_avg": 24.204650807793456,
|
| 512 |
-
"arena_elo": 1166.
|
| 513 |
-
"arena_rank":
|
| 514 |
"arena_votes": 7936
|
| 515 |
},
|
| 516 |
{
|
|
@@ -538,9 +538,9 @@
|
|
| 538 |
"hf_math_lvl5": 0.48338368580060426,
|
| 539 |
"hf_musr": 0.44612500000000005,
|
| 540 |
"hf_avg": 44.84747145129876,
|
| 541 |
-
"arena_elo":
|
| 542 |
-
"arena_rank":
|
| 543 |
-
"arena_votes":
|
| 544 |
"aider_pass_rate": 0.42100000000000004
|
| 545 |
},
|
| 546 |
{
|
|
@@ -628,8 +628,8 @@
|
|
| 628 |
"lb_language": 0.29333666666666663,
|
| 629 |
"lb_if": 0.5838349999999999,
|
| 630 |
"lb_data_analysis": 0.4376,
|
| 631 |
-
"arena_elo":
|
| 632 |
-
"arena_rank":
|
| 633 |
"arena_votes": 24126
|
| 634 |
},
|
| 635 |
{
|
|
@@ -729,8 +729,8 @@
|
|
| 729 |
"lb_language": 0.31805,
|
| 730 |
"lb_if": 0.6957099999999999,
|
| 731 |
"lb_data_analysis": 0.33599999999999997,
|
| 732 |
-
"arena_elo": 1298.
|
| 733 |
-
"arena_rank":
|
| 734 |
"arena_votes": 7136
|
| 735 |
},
|
| 736 |
{
|
|
@@ -750,8 +750,8 @@
|
|
| 750 |
"lb_language": 0.2421633333333333,
|
| 751 |
"lb_if": 0.604665,
|
| 752 |
"lb_data_analysis": 0.3682,
|
| 753 |
-
"arena_elo": 1223.
|
| 754 |
-
"arena_rank":
|
| 755 |
"arena_votes": 66191,
|
| 756 |
"aider_pass_rate": 0.414
|
| 757 |
},
|
|
@@ -773,8 +773,8 @@
|
|
| 773 |
"lb_language": 0.4956833333333333,
|
| 774 |
"lb_if": 0.717875,
|
| 775 |
"lb_data_analysis": 0.39039999999999997,
|
| 776 |
-
"arena_elo":
|
| 777 |
-
"arena_rank":
|
| 778 |
"arena_votes": 88721,
|
| 779 |
"aider_pass_rate": 0.466
|
| 780 |
},
|
|
@@ -794,8 +794,8 @@
|
|
| 794 |
"lb_language": 0.45262,
|
| 795 |
"lb_if": 0.713875,
|
| 796 |
"lb_data_analysis": 0.4998,
|
| 797 |
-
"arena_elo":
|
| 798 |
-
"arena_rank":
|
| 799 |
"arena_votes": 98130
|
| 800 |
},
|
| 801 |
{
|
|
@@ -815,8 +815,8 @@
|
|
| 815 |
"lb_language": 0.5393533333333334,
|
| 816 |
"lb_if": 0.7217100000000001,
|
| 817 |
"lb_data_analysis": 0.4661,
|
| 818 |
-
"arena_elo": 1345.
|
| 819 |
-
"arena_rank":
|
| 820 |
"arena_votes": 112863,
|
| 821 |
"aider_pass_rate": 0.602
|
| 822 |
},
|
|
@@ -834,8 +834,8 @@
|
|
| 834 |
"lb_language": 0.4563466666666667,
|
| 835 |
"lb_if": 0.6858299999999999,
|
| 836 |
"lb_data_analysis": 0.6236999999999999,
|
| 837 |
-
"arena_elo": 1334.
|
| 838 |
-
"arena_rank":
|
| 839 |
"arena_votes": 45498,
|
| 840 |
"aider_pass_rate": 0.5710000000000001
|
| 841 |
},
|
|
@@ -857,8 +857,8 @@
|
|
| 857 |
"lb_language": 0.29879333333333336,
|
| 858 |
"lb_if": 0.5679974999999999,
|
| 859 |
"lb_data_analysis": 0.55099,
|
| 860 |
-
"arena_elo": 1317.
|
| 861 |
-
"arena_rank":
|
| 862 |
"arena_votes": 68794,
|
| 863 |
"aider_pass_rate": 0.406
|
| 864 |
},
|
|
@@ -880,8 +880,8 @@
|
|
| 880 |
"hf_math_lvl5": 0.37462235649546827,
|
| 881 |
"hf_musr": 0.41384375,
|
| 882 |
"hf_avg": 27.639223265636087,
|
| 883 |
-
"arena_elo": 1401.
|
| 884 |
-
"arena_rank":
|
| 885 |
"arena_votes": 27822,
|
| 886 |
"aider_pass_rate": 0.654
|
| 887 |
},
|
|
@@ -892,8 +892,8 @@
|
|
| 892 |
"mmlu": 0.852,
|
| 893 |
"gpqa": 0.6,
|
| 894 |
"math": 0.9,
|
| 895 |
-
"arena_elo": 1336.
|
| 896 |
-
"arena_rank":
|
| 897 |
"arena_votes": 51986,
|
| 898 |
"aider_pass_rate": 0.5
|
| 899 |
},
|
|
@@ -904,8 +904,8 @@
|
|
| 904 |
"mmlu": 0.908,
|
| 905 |
"gpqa": 0.733,
|
| 906 |
"mgsm": 0.908,
|
| 907 |
-
"arena_elo": 1388.
|
| 908 |
-
"arena_rank":
|
| 909 |
"arena_votes": 31120,
|
| 910 |
"aider_pass_rate": 0.579
|
| 911 |
},
|
|
@@ -921,9 +921,9 @@
|
|
| 921 |
"mmlu": 0.869,
|
| 922 |
"math": 0.979,
|
| 923 |
"mgsm": 0.92,
|
| 924 |
-
"arena_elo":
|
| 925 |
-
"arena_rank":
|
| 926 |
-
"arena_votes":
|
| 927 |
},
|
| 928 |
{
|
| 929 |
"slug": "openai/o3",
|
|
@@ -997,8 +997,8 @@
|
|
| 997 |
"lb_language": 0.38114999999999993,
|
| 998 |
"lb_if": 0.7550025,
|
| 999 |
"lb_data_analysis": 0.4718,
|
| 1000 |
-
"arena_elo": 1302.
|
| 1001 |
-
"arena_rank":
|
| 1002 |
"arena_votes": 39409
|
| 1003 |
},
|
| 1004 |
{
|
|
@@ -1044,8 +1044,8 @@
|
|
| 1044 |
"lb_language": 0.23245333333333332,
|
| 1045 |
"lb_if": 0.5869175,
|
| 1046 |
"lb_data_analysis": 0.48810000000000003,
|
| 1047 |
-
"arena_elo": 1270.
|
| 1048 |
-
"arena_rank":
|
| 1049 |
"arena_votes": 5430,
|
| 1050 |
"aider_pass_rate": 0.594
|
| 1051 |
},
|
|
@@ -1096,8 +1096,8 @@
|
|
| 1096 |
"lb_language": 0.29213666666666666,
|
| 1097 |
"lb_if": 0.68271,
|
| 1098 |
"lb_data_analysis": 0.1636,
|
| 1099 |
-
"arena_elo": 1261.
|
| 1100 |
-
"arena_rank":
|
| 1101 |
"arena_votes": 37325,
|
| 1102 |
"aider_pass_rate": 0.444
|
| 1103 |
},
|
|
@@ -1160,8 +1160,8 @@
|
|
| 1160 |
"lb_language": 0.21091000000000001,
|
| 1161 |
"lb_if": 0.35587499999999994,
|
| 1162 |
"lb_data_analysis": 0.25925,
|
| 1163 |
-
"arena_elo":
|
| 1164 |
-
"arena_rank":
|
| 1165 |
"arena_votes": 3233
|
| 1166 |
},
|
| 1167 |
{
|
|
@@ -1263,8 +1263,8 @@
|
|
| 1263 |
"hf_math_lvl5": 0.277190332326284,
|
| 1264 |
"hf_musr": 0.4281979166666667,
|
| 1265 |
"hf_avg": 33.35799367075618,
|
| 1266 |
-
"arena_elo": 1213.
|
| 1267 |
-
"arena_rank":
|
| 1268 |
"arena_votes": 24142
|
| 1269 |
},
|
| 1270 |
{
|
|
@@ -1386,8 +1386,8 @@
|
|
| 1386 |
"hf_math_lvl5": 0.06268882175226587,
|
| 1387 |
"hf_musr": 0.39784375,
|
| 1388 |
"hf_avg": 24.226662652803373,
|
| 1389 |
-
"arena_elo": 1183.
|
| 1390 |
-
"arena_rank":
|
| 1391 |
"arena_votes": 15483
|
| 1392 |
},
|
| 1393 |
{
|
|
@@ -1673,8 +1673,8 @@
|
|
| 1673 |
"hf_math_lvl5": 0.03851963746223565,
|
| 1674 |
"hf_musr": 0.47709375000000004,
|
| 1675 |
"hf_avg": 14.343669671742774,
|
| 1676 |
-
"arena_elo":
|
| 1677 |
-
"arena_rank":
|
| 1678 |
"arena_votes": 104636
|
| 1679 |
},
|
| 1680 |
{
|
|
@@ -6142,8 +6142,8 @@
|
|
| 6142 |
"lb_language": 0.07196666666666666,
|
| 6143 |
"lb_if": 0.5279175,
|
| 6144 |
"lb_data_analysis": 0.081,
|
| 6145 |
-
"arena_elo": 1126.
|
| 6146 |
-
"arena_rank":
|
| 6147 |
"arena_votes": 1785
|
| 6148 |
},
|
| 6149 |
{
|
|
@@ -6164,8 +6164,8 @@
|
|
| 6164 |
"lb_language": 0.042846666666666665,
|
| 6165 |
"lb_if": 0.48317,
|
| 6166 |
"lb_data_analysis": 0.0762,
|
| 6167 |
-
"arena_elo":
|
| 6168 |
-
"arena_rank":
|
| 6169 |
"arena_votes": 11116
|
| 6170 |
},
|
| 6171 |
{
|
|
@@ -6191,8 +6191,8 @@
|
|
| 6191 |
"hf_math_lvl5": 0.20468277945619334,
|
| 6192 |
"hf_musr": 0.4465208333333333,
|
| 6193 |
"hf_avg": 34.125963384670946,
|
| 6194 |
-
"arena_elo": 1212.
|
| 6195 |
-
"arena_rank":
|
| 6196 |
"arena_votes": 4653
|
| 6197 |
},
|
| 6198 |
{
|
|
@@ -6290,8 +6290,8 @@
|
|
| 6290 |
"hf_math_lvl5": 0.0581570996978852,
|
| 6291 |
"hf_musr": 0.342125,
|
| 6292 |
"hf_avg": 15.02227766709556,
|
| 6293 |
-
"arena_elo":
|
| 6294 |
-
"arena_rank":
|
| 6295 |
"arena_votes": 2201
|
| 6296 |
},
|
| 6297 |
{
|
|
@@ -10127,8 +10127,8 @@
|
|
| 10127 |
"hf_math_lvl5": 0.12235649546827794,
|
| 10128 |
"hf_musr": 0.4595416666666667,
|
| 10129 |
"hf_avg": 27.353190438571634,
|
| 10130 |
-
"arena_elo": 1164.
|
| 10131 |
-
"arena_rank":
|
| 10132 |
"arena_votes": 3776
|
| 10133 |
},
|
| 10134 |
{
|
|
@@ -11176,9 +11176,9 @@
|
|
| 11176 |
"lb_language": 0.5148133333333332,
|
| 11177 |
"lb_if": 0.81829,
|
| 11178 |
"lb_data_analysis": 0.69529,
|
| 11179 |
-
"arena_elo": 1335.
|
| 11180 |
-
"arena_rank":
|
| 11181 |
-
"arena_votes":
|
| 11182 |
},
|
| 11183 |
{
|
| 11184 |
"hf_id": "Qwen/Qwen1.5-0.5B",
|
|
@@ -11272,8 +11272,8 @@
|
|
| 11272 |
"lb_language": 0.13224333333333332,
|
| 11273 |
"lb_if": 0.5526275,
|
| 11274 |
"lb_data_analysis": 0.20179999999999998,
|
| 11275 |
-
"arena_elo": 1233.
|
| 11276 |
-
"arena_rank":
|
| 11277 |
"arena_votes": 26191,
|
| 11278 |
"aider_pass_rate": 0.308
|
| 11279 |
},
|
|
@@ -11300,8 +11300,8 @@
|
|
| 11300 |
"hf_math_lvl5": 0.15256797583081572,
|
| 11301 |
"hf_musr": 0.43997916666666664,
|
| 11302 |
"hf_avg": 23.566106475051374,
|
| 11303 |
-
"arena_elo": 1190.
|
| 11304 |
-
"arena_rank":
|
| 11305 |
"arena_votes": 17841
|
| 11306 |
},
|
| 11307 |
{
|
|
@@ -11327,8 +11327,8 @@
|
|
| 11327 |
"hf_math_lvl5": 0.19561933534743203,
|
| 11328 |
"hf_musr": 0.4159791666666666,
|
| 11329 |
"hf_avg": 29.25746822860332,
|
| 11330 |
-
"arena_elo": 1203.
|
| 11331 |
-
"arena_rank":
|
| 11332 |
"arena_votes": 21744
|
| 11333 |
},
|
| 11334 |
{
|
|
@@ -11361,8 +11361,8 @@
|
|
| 11361 |
"lb_language": 0.05798333333333333,
|
| 11362 |
"lb_if": 0.27749999999999997,
|
| 11363 |
"lb_data_analysis": 0.0469,
|
| 11364 |
-
"arena_elo":
|
| 11365 |
-
"arena_rank":
|
| 11366 |
"arena_votes": 7598
|
| 11367 |
},
|
| 11368 |
{
|
|
@@ -11395,8 +11395,8 @@
|
|
| 11395 |
"lb_language": 0.061816666666666666,
|
| 11396 |
"lb_if": 0.4411675,
|
| 11397 |
"lb_data_analysis": 0.0435,
|
| 11398 |
-
"arena_elo": 1143.
|
| 11399 |
-
"arena_rank":
|
| 11400 |
"arena_votes": 4735
|
| 11401 |
},
|
| 11402 |
{
|
|
@@ -16644,8 +16644,8 @@
|
|
| 16644 |
"hf_math_lvl5": 0.4501510574018127,
|
| 16645 |
"hf_musr": 0.4948333333333334,
|
| 16646 |
"hf_avg": 42.33178738532094,
|
| 16647 |
-
"arena_elo": 1286.
|
| 16648 |
-
"arena_rank":
|
| 16649 |
"arena_votes": 2846
|
| 16650 |
},
|
| 16651 |
{
|
|
@@ -16695,8 +16695,8 @@
|
|
| 16695 |
"hf_math_lvl5": 0.19637462235649547,
|
| 16696 |
"hf_musr": 0.41746875,
|
| 16697 |
"hf_avg": 26.034998081672143,
|
| 16698 |
-
"arena_elo": 1220.
|
| 16699 |
-
"arena_rank":
|
| 16700 |
"arena_votes": 2895
|
| 16701 |
},
|
| 16702 |
{
|
|
@@ -18637,8 +18637,8 @@
|
|
| 18637 |
"hf_math_lvl5": 0.08383685800604229,
|
| 18638 |
"hf_musr": 0.41201041666666666,
|
| 18639 |
"hf_avg": 20.83936104726783,
|
| 18640 |
-
"arena_elo": 1167.
|
| 18641 |
-
"arena_rank":
|
| 18642 |
"arena_votes": 10224
|
| 18643 |
},
|
| 18644 |
{
|
|
@@ -19608,8 +19608,8 @@
|
|
| 19608 |
"hf_math_lvl5": 0.013595166163141994,
|
| 19609 |
"hf_musr": 0.37390625000000005,
|
| 19610 |
"hf_avg": 6.3704357034963754,
|
| 19611 |
-
"arena_elo": 979.
|
| 19612 |
-
"arena_rank":
|
| 19613 |
"arena_votes": 3412
|
| 19614 |
},
|
| 19615 |
{
|
|
@@ -19759,8 +19759,8 @@
|
|
| 19759 |
"hf_math_lvl5": 0.09290030211480363,
|
| 19760 |
"hf_musr": 0.5058645833333334,
|
| 19761 |
"hf_avg": 27.310631874736753,
|
| 19762 |
-
"arena_elo":
|
| 19763 |
-
"arena_rank":
|
| 19764 |
"arena_votes": 4933
|
| 19765 |
},
|
| 19766 |
{
|
|
@@ -21237,8 +21237,8 @@
|
|
| 21237 |
"hf_math_lvl5": 0.01812688821752266,
|
| 21238 |
"hf_musr": 0.33939583333333334,
|
| 21239 |
"hf_avg": 8.053373854341979,
|
| 21240 |
-
"arena_elo":
|
| 21241 |
-
"arena_rank":
|
| 21242 |
"arena_votes": 10853
|
| 21243 |
},
|
| 21244 |
{
|
|
@@ -21259,8 +21259,8 @@
|
|
| 21259 |
"lb_language": 0.10647333333333332,
|
| 21260 |
"lb_if": 0.443375,
|
| 21261 |
"lb_data_analysis": 0.0726,
|
| 21262 |
-
"arena_elo":
|
| 21263 |
-
"arena_rank":
|
| 21264 |
"arena_votes": 23893
|
| 21265 |
},
|
| 21266 |
{
|
|
@@ -21298,8 +21298,8 @@
|
|
| 21298 |
"hf_math_lvl5": 0.0007552870090634441,
|
| 21299 |
"hf_musr": 0.39288541666666665,
|
| 21300 |
"hf_avg": 17.046939294966545,
|
| 21301 |
-
"arena_elo": 1198.
|
| 21302 |
-
"arena_rank":
|
| 21303 |
"arena_votes": 46618
|
| 21304 |
},
|
| 21305 |
{
|
|
@@ -21349,8 +21349,8 @@
|
|
| 21349 |
"hf_math_lvl5": 0.02039274924471299,
|
| 21350 |
"hf_musr": 0.334125,
|
| 21351 |
"hf_avg": 7.485804130315127,
|
| 21352 |
-
"arena_elo": 1091.
|
| 21353 |
-
"arena_rank":
|
| 21354 |
"arena_votes": 4779
|
| 21355 |
},
|
| 21356 |
{
|
|
@@ -21376,8 +21376,8 @@
|
|
| 21376 |
"hf_math_lvl5": 0.02945619335347432,
|
| 21377 |
"hf_musr": 0.42742708333333335,
|
| 21378 |
"hf_avg": 13.067087110466217,
|
| 21379 |
-
"arena_elo": 1135.
|
| 21380 |
-
"arena_rank":
|
| 21381 |
"arena_votes": 8925
|
| 21382 |
},
|
| 21383 |
{
|
|
@@ -22381,8 +22381,8 @@
|
|
| 22381 |
"hf_math_lvl5": 0.02039274924471299,
|
| 22382 |
"hf_musr": 0.34621875,
|
| 22383 |
"hf_avg": 9.39218439885523,
|
| 22384 |
-
"arena_elo":
|
| 22385 |
-
"arena_rank":
|
| 22386 |
"arena_votes": 2391
|
| 22387 |
},
|
| 22388 |
{
|
|
@@ -22696,8 +22696,8 @@
|
|
| 22696 |
"hf_math_lvl5": 0.09214501510574018,
|
| 22697 |
"hf_musr": 0.35148958333333336,
|
| 22698 |
"hf_avg": 18.396095114284222,
|
| 22699 |
-
"arena_elo": 1155.
|
| 22700 |
-
"arena_rank":
|
| 22701 |
"arena_votes": 6837
|
| 22702 |
},
|
| 22703 |
{
|
|
@@ -22747,8 +22747,8 @@
|
|
| 22747 |
"hf_math_lvl5": 0.1419939577039275,
|
| 22748 |
"hf_musr": 0.3900625,
|
| 22749 |
"hf_avg": 24.027678753483297,
|
| 22750 |
-
"arena_elo": 1181.
|
| 22751 |
-
"arena_rank":
|
| 22752 |
"arena_votes": 6643
|
| 22753 |
},
|
| 22754 |
{
|
|
@@ -22798,8 +22798,8 @@
|
|
| 22798 |
"hf_math_lvl5": 0.15256797583081572,
|
| 22799 |
"hf_musr": 0.3605416666666667,
|
| 22800 |
"hf_avg": 21.712212822028288,
|
| 22801 |
-
"arena_elo":
|
| 22802 |
-
"arena_rank":
|
| 22803 |
"arena_votes": 3191
|
| 22804 |
},
|
| 22805 |
{
|
|
@@ -22849,8 +22849,8 @@
|
|
| 22849 |
"hf_math_lvl5": 0.21978851963746224,
|
| 22850 |
"hf_musr": 0.47070833333333334,
|
| 22851 |
"hf_avg": 30.6030430081627,
|
| 22852 |
-
"arena_elo": 1208.
|
| 22853 |
-
"arena_rank":
|
| 22854 |
"arena_votes": 3092
|
| 22855 |
},
|
| 22856 |
{
|
|
@@ -23318,8 +23318,8 @@
|
|
| 23318 |
"hf_math_lvl5": 0.4078549848942598,
|
| 23319 |
"hf_musr": 0.4558229166666667,
|
| 23320 |
"hf_avg": 38.87959582082076,
|
| 23321 |
-
"arena_elo": 1191.
|
| 23322 |
-
"arena_rank":
|
| 23323 |
"arena_votes": 9902
|
| 23324 |
},
|
| 23325 |
{
|
|
@@ -25538,8 +25538,8 @@
|
|
| 25538 |
"hf_math_lvl5": 0.0702416918429003,
|
| 25539 |
"hf_musr": 0.3328541666666667,
|
| 25540 |
"hf_avg": 14.443126333711135,
|
| 25541 |
-
"arena_elo":
|
| 25542 |
-
"arena_rank":
|
| 25543 |
"arena_votes": 8045
|
| 25544 |
},
|
| 25545 |
{
|
|
@@ -25719,8 +25719,8 @@
|
|
| 25719 |
"lb_language": 0.13909000000000002,
|
| 25720 |
"lb_if": 0.5330400000000001,
|
| 25721 |
"lb_data_analysis": 0.2044,
|
| 25722 |
-
"arena_elo": 1197.
|
| 25723 |
-
"arena_rank":
|
| 25724 |
"arena_votes": 25055
|
| 25725 |
},
|
| 25726 |
{
|
|
@@ -25741,8 +25741,8 @@
|
|
| 25741 |
"lb_language": 0.09153666666666667,
|
| 25742 |
"lb_if": 0.39083500000000004,
|
| 25743 |
"lb_data_analysis": 0.26030000000000003,
|
| 25744 |
-
"arena_elo":
|
| 25745 |
-
"arena_rank":
|
| 25746 |
"arena_votes": 20691
|
| 25747 |
},
|
| 25748 |
{
|
|
@@ -25763,8 +25763,8 @@
|
|
| 25763 |
"lb_language": 0.08559,
|
| 25764 |
"lb_if": 0.363625,
|
| 25765 |
"lb_data_analysis": 0.2232,
|
| 25766 |
-
"arena_elo": 1128
|
| 25767 |
-
"arena_rank":
|
| 25768 |
"arena_votes": 20115
|
| 25769 |
},
|
| 25770 |
{
|
|
@@ -25823,8 +25823,8 @@
|
|
| 25823 |
"lb_language": 0.12944,
|
| 25824 |
"lb_if": 0.472,
|
| 25825 |
"lb_data_analysis": 0.2343,
|
| 25826 |
-
"arena_elo":
|
| 25827 |
-
"arena_rank":
|
| 25828 |
"arena_votes": 17763
|
| 25829 |
},
|
| 25830 |
{
|
|
@@ -26072,8 +26072,8 @@
|
|
| 26072 |
"lb_language": 0.09055,
|
| 26073 |
"lb_if": 0.5165025,
|
| 26074 |
"lb_data_analysis": 0.059300000000000005,
|
| 26075 |
-
"arena_elo": 1149.
|
| 26076 |
-
"arena_rank":
|
| 26077 |
"arena_votes": 19402
|
| 26078 |
},
|
| 26079 |
{
|
|
@@ -26197,8 +26197,8 @@
|
|
| 26197 |
"lb_language": 0.26477666666666666,
|
| 26198 |
"lb_if": 0.63167,
|
| 26199 |
"lb_data_analysis": 0.255,
|
| 26200 |
-
"arena_elo": 1229.
|
| 26201 |
-
"arena_rank":
|
| 26202 |
"arena_votes": 51417
|
| 26203 |
},
|
| 26204 |
{
|
|
@@ -26231,8 +26231,8 @@
|
|
| 26231 |
"lb_language": 0.13761333333333334,
|
| 26232 |
"lb_if": 0.4480825,
|
| 26233 |
"lb_data_analysis": 0.1619,
|
| 26234 |
-
"arena_elo":
|
| 26235 |
-
"arena_rank":
|
| 26236 |
"arena_votes": 73505
|
| 26237 |
},
|
| 26238 |
{
|
|
@@ -28559,8 +28559,8 @@
|
|
| 28559 |
"hf_math_lvl5": 0.07628398791540786,
|
| 28560 |
"hf_musr": 0.42543749999999997,
|
| 28561 |
"hf_avg": 22.70925524673515,
|
| 28562 |
-
"arena_elo":
|
| 28563 |
-
"arena_rank":
|
| 28564 |
"arena_votes": 12636
|
| 28565 |
},
|
| 28566 |
{
|
|
@@ -28598,8 +28598,8 @@
|
|
| 28598 |
"hf_math_lvl5": 0.07250755287009064,
|
| 28599 |
"hf_musr": 0.4228645833333333,
|
| 28600 |
"hf_avg": 21.635827111564595,
|
| 28601 |
-
"arena_elo":
|
| 28602 |
-
"arena_rank":
|
| 28603 |
"arena_votes": 7967
|
| 28604 |
},
|
| 28605 |
{
|
|
@@ -29752,8 +29752,8 @@
|
|
| 29752 |
"hf_math_lvl5": 0.07099697885196375,
|
| 29753 |
"hf_musr": 0.41232291666666665,
|
| 29754 |
"hf_avg": 22.3449346084354,
|
| 29755 |
-
"arena_elo": 1279.
|
| 29756 |
-
"arena_rank":
|
| 29757 |
"arena_votes": 10069
|
| 29758 |
},
|
| 29759 |
{
|
|
@@ -33248,8 +33248,8 @@
|
|
| 33248 |
"lb_language": 0.11368333333333334,
|
| 33249 |
"lb_if": 0.52779,
|
| 33250 |
"lb_data_analysis": 0.1738,
|
| 33251 |
-
"arena_elo":
|
| 33252 |
-
"arena_rank":
|
| 33253 |
"arena_votes": 5006
|
| 33254 |
},
|
| 33255 |
{
|
|
@@ -34302,8 +34302,8 @@
|
|
| 34302 |
"hf_math_lvl5": 0.05664652567975831,
|
| 34303 |
"hf_musr": 0.3899375,
|
| 34304 |
"hf_avg": 20.57236409322395,
|
| 34305 |
-
"arena_elo":
|
| 34306 |
-
"arena_rank":
|
| 34307 |
"arena_votes": 4155
|
| 34308 |
},
|
| 34309 |
{
|
|
@@ -35718,8 +35718,8 @@
|
|
| 35718 |
"lb_language": 0.2793333333333334,
|
| 35719 |
"lb_if": 0.5561674999999999,
|
| 35720 |
"lb_data_analysis": 0.39766500000000005,
|
| 35721 |
-
"arena_elo":
|
| 35722 |
-
"arena_rank":
|
| 35723 |
"arena_votes": 10141
|
| 35724 |
},
|
| 35725 |
{
|
|
@@ -35731,8 +35731,8 @@
|
|
| 35731 |
"lb_language": 0.23921666666666663,
|
| 35732 |
"lb_if": 0.7150825,
|
| 35733 |
"lb_data_analysis": 0.179,
|
| 35734 |
-
"arena_elo": 1261.
|
| 35735 |
-
"arena_rank":
|
| 35736 |
"arena_votes": 77556,
|
| 35737 |
"aider_pass_rate": 0.218
|
| 35738 |
},
|
|
@@ -35746,8 +35746,8 @@
|
|
| 35746 |
"lb_language": 0.3086066666666667,
|
| 35747 |
"lb_if": 0.5761225,
|
| 35748 |
"lb_data_analysis": 0.492345,
|
| 35749 |
-
"arena_elo": 1276.
|
| 35750 |
-
"arena_rank":
|
| 35751 |
"arena_votes": 9869
|
| 35752 |
},
|
| 35753 |
{
|
|
@@ -35759,8 +35759,8 @@
|
|
| 35759 |
"lb_language": 0.33044,
|
| 35760 |
"lb_if": 0.6718325,
|
| 35761 |
"lb_data_analysis": 0.3438,
|
| 35762 |
-
"arena_elo":
|
| 35763 |
-
"arena_rank":
|
| 35764 |
"arena_votes": 15147
|
| 35765 |
},
|
| 35766 |
{
|
|
@@ -35865,8 +35865,8 @@
|
|
| 35865 |
"lb_language": 0.43553333333333333,
|
| 35866 |
"lb_if": 0.6392074999999999,
|
| 35867 |
"lb_data_analysis": 0.5308999999999999,
|
| 35868 |
-
"arena_elo":
|
| 35869 |
-
"arena_rank":
|
| 35870 |
"arena_votes": 93439,
|
| 35871 |
"aider_pass_rate": 0.556
|
| 35872 |
},
|
|
@@ -35919,8 +35919,8 @@
|
|
| 35919 |
"lb_language": 0.28744,
|
| 35920 |
"lb_if": 0.6819175000000001,
|
| 35921 |
"lb_data_analysis": 0.3983,
|
| 35922 |
-
"arena_elo": 1242.
|
| 35923 |
-
"arena_rank":
|
| 35924 |
"arena_votes": 62437
|
| 35925 |
},
|
| 35926 |
{
|
|
@@ -35932,8 +35932,8 @@
|
|
| 35932 |
"lb_language": 0.3979266666666667,
|
| 35933 |
"lb_if": 0.7184575000000001,
|
| 35934 |
"lb_data_analysis": 0.42910000000000004,
|
| 35935 |
-
"arena_elo":
|
| 35936 |
-
"arena_rank":
|
| 35937 |
"arena_votes": 45460
|
| 35938 |
},
|
| 35939 |
{
|
|
@@ -35965,8 +35965,8 @@
|
|
| 35965 |
"lb_language": 0.11368333333333332,
|
| 35966 |
"lb_if": 0.5824975,
|
| 35967 |
"lb_data_analysis": 0.2347,
|
| 35968 |
-
"arena_elo":
|
| 35969 |
-
"arena_rank":
|
| 35970 |
"arena_votes": 39296
|
| 35971 |
},
|
| 35972 |
{
|
|
@@ -35988,8 +35988,8 @@
|
|
| 35988 |
"lb_language": 0.07264333333333332,
|
| 35989 |
"lb_if": 0.3832075,
|
| 35990 |
"lb_data_analysis": 0.03,
|
| 35991 |
-
"arena_elo": 1171.
|
| 35992 |
-
"arena_rank":
|
| 35993 |
"arena_votes": 16057
|
| 35994 |
},
|
| 35995 |
{
|
|
@@ -36044,8 +36044,8 @@
|
|
| 36044 |
"lb_language": 0.29534,
|
| 36045 |
"lb_if": 0.8454575,
|
| 36046 |
"lb_data_analysis": 0.39359999999999995,
|
| 36047 |
-
"arena_elo": 1309.
|
| 36048 |
-
"arena_rank":
|
| 36049 |
"arena_votes": 34909
|
| 36050 |
},
|
| 36051 |
{
|
|
@@ -36057,8 +36057,8 @@
|
|
| 36057 |
"lb_language": 0.4740566666666666,
|
| 36058 |
"lb_if": 0.7774575,
|
| 36059 |
"lb_data_analysis": 0.5145,
|
| 36060 |
-
"arena_elo": 1351.
|
| 36061 |
-
"arena_rank":
|
| 36062 |
"arena_votes": 55607,
|
| 36063 |
"aider_pass_rate": 0.496
|
| 36064 |
},
|
|
@@ -36212,8 +36212,8 @@
|
|
| 36212 |
"lb_language": 0.3427633333333333,
|
| 36213 |
"lb_if": 0.782835,
|
| 36214 |
"lb_data_analysis": 0.5621,
|
| 36215 |
-
"arena_elo": 1353.
|
| 36216 |
-
"arena_rank":
|
| 36217 |
"arena_votes": 24951
|
| 36218 |
},
|
| 36219 |
{
|
|
@@ -36298,8 +36298,8 @@
|
|
| 36298 |
"lb_language": 0.40453333333333336,
|
| 36299 |
"lb_if": 0.67929,
|
| 36300 |
"lb_data_analysis": 0.541955,
|
| 36301 |
-
"arena_elo":
|
| 36302 |
-
"arena_rank":
|
| 36303 |
"arena_votes": 28081,
|
| 36304 |
"aider_pass_rate": 0.466
|
| 36305 |
},
|
|
@@ -36404,9 +36404,9 @@
|
|
| 36404 |
"lb_language": 0.5836933333333333,
|
| 36405 |
"lb_if": 0.7534574999999999,
|
| 36406 |
"lb_data_analysis": 0.64271,
|
| 36407 |
-
"arena_elo": 1374.
|
| 36408 |
-
"arena_rank":
|
| 36409 |
-
"arena_votes":
|
| 36410 |
},
|
| 36411 |
{
|
| 36412 |
"name": "gpt-4.5-preview",
|
|
@@ -36449,9 +36449,9 @@
|
|
| 36449 |
"lb_language": 0.41314333333333336,
|
| 36450 |
"lb_if": 0.7490399999999999,
|
| 36451 |
"lb_data_analysis": 0.387965,
|
| 36452 |
-
"arena_elo": 1365.
|
| 36453 |
-
"arena_rank":
|
| 36454 |
-
"arena_votes":
|
| 36455 |
},
|
| 36456 |
{
|
| 36457 |
"name": "gemini-2.5-pro-exp-03-25",
|
|
@@ -36473,9 +36473,9 @@
|
|
| 36473 |
"lb_language": 0.4682266666666666,
|
| 36474 |
"lb_if": 0.8147075,
|
| 36475 |
"lb_data_analysis": 0.64019,
|
| 36476 |
-
"arena_elo": 1394.
|
| 36477 |
-
"arena_rank":
|
| 36478 |
-
"arena_votes":
|
| 36479 |
},
|
| 36480 |
{
|
| 36481 |
"name": "chatgpt-4o-latest-2025-03-27",
|
|
@@ -36519,9 +36519,9 @@
|
|
| 36519 |
"lb_language": 0.6319400000000001,
|
| 36520 |
"lb_if": 0.764915,
|
| 36521 |
"lb_data_analysis": 0.599645,
|
| 36522 |
-
"arena_elo":
|
| 36523 |
-
"arena_rank":
|
| 36524 |
-
"arena_votes":
|
| 36525 |
},
|
| 36526 |
{
|
| 36527 |
"name": "command-a-03-2025",
|
|
@@ -36533,9 +36533,9 @@
|
|
| 36533 |
"lb_language": 0.36696,
|
| 36534 |
"lb_if": 0.82904,
|
| 36535 |
"lb_data_analysis": 0.48457000000000006,
|
| 36536 |
-
"arena_elo": 1353.
|
| 36537 |
-
"arena_rank":
|
| 36538 |
-
"arena_votes":
|
| 36539 |
},
|
| 36540 |
{
|
| 36541 |
"name": "gemini-1.5-flash-8b-001",
|
|
@@ -36546,8 +36546,8 @@
|
|
| 36546 |
"lb_language": 0.22867666666666667,
|
| 36547 |
"lb_if": 0.6971649999999999,
|
| 36548 |
"lb_data_analysis": 0.4241,
|
| 36549 |
-
"arena_elo": 1258.
|
| 36550 |
-
"arena_rank":
|
| 36551 |
"arena_votes": 35556
|
| 36552 |
},
|
| 36553 |
{
|
|
@@ -36559,9 +36559,9 @@
|
|
| 36559 |
"lb_language": 0.42386999999999997,
|
| 36560 |
"lb_if": 0.8578749999999999,
|
| 36561 |
"lb_data_analysis": 0.7332000000000001,
|
| 36562 |
-
"arena_elo": 1360.
|
| 36563 |
-
"arena_rank":
|
| 36564 |
-
"arena_votes":
|
| 36565 |
},
|
| 36566 |
{
|
| 36567 |
"name": "gemini-2.0-flash-lite-001",
|
|
@@ -36583,8 +36583,8 @@
|
|
| 36583 |
"lb_language": 0.3126966666666667,
|
| 36584 |
"lb_if": 0.7382925,
|
| 36585 |
"lb_data_analysis": 0.4284,
|
| 36586 |
-
"arena_elo": 1341.
|
| 36587 |
-
"arena_rank":
|
| 36588 |
"arena_votes": 3829
|
| 36589 |
},
|
| 36590 |
{
|
|
@@ -36596,8 +36596,8 @@
|
|
| 36596 |
"lb_language": 0.15059333333333333,
|
| 36597 |
"lb_if": 0.6358325,
|
| 36598 |
"lb_data_analysis": 0.36950000000000005,
|
| 36599 |
-
"arena_elo": 1303.
|
| 36600 |
-
"arena_rank":
|
| 36601 |
"arena_votes": 4177
|
| 36602 |
},
|
| 36603 |
{
|
|
@@ -36610,8 +36610,8 @@
|
|
| 36610 |
"lb_language": 0.6475866666666666,
|
| 36611 |
"lb_if": 0.72325,
|
| 36612 |
"lb_data_analysis": 0.600695,
|
| 36613 |
-
"arena_elo": 1444.
|
| 36614 |
-
"arena_rank":
|
| 36615 |
"arena_votes": 14549
|
| 36616 |
},
|
| 36617 |
{
|
|
@@ -36623,9 +36623,9 @@
|
|
| 36623 |
"lb_language": 0.4964766666666667,
|
| 36624 |
"lb_if": 0.7574575,
|
| 36625 |
"lb_data_analysis": 0.6155,
|
| 36626 |
-
"arena_elo": 1327.
|
| 36627 |
-
"arena_rank":
|
| 36628 |
-
"arena_votes":
|
| 36629 |
},
|
| 36630 |
{
|
| 36631 |
"name": "grok-3-beta",
|
|
@@ -36659,9 +36659,9 @@
|
|
| 36659 |
"lb_language": 0.54551,
|
| 36660 |
"lb_if": 0.7704575,
|
| 36661 |
"lb_data_analysis": 0.66404,
|
| 36662 |
-
"arena_elo": 1413.
|
| 36663 |
-
"arena_rank":
|
| 36664 |
-
"arena_votes":
|
| 36665 |
},
|
| 36666 |
{
|
| 36667 |
"name": "gpt-4.1-mini-2025-04-14",
|
|
@@ -36673,9 +36673,9 @@
|
|
| 36673 |
"lb_language": 0.37996,
|
| 36674 |
"lb_if": 0.7030825,
|
| 36675 |
"lb_data_analysis": 0.6133799999999999,
|
| 36676 |
-
"arena_elo": 1381.
|
| 36677 |
-
"arena_rank":
|
| 36678 |
-
"arena_votes":
|
| 36679 |
},
|
| 36680 |
{
|
| 36681 |
"name": "gpt-4.1-nano-2025-04-14",
|
|
@@ -36687,8 +36687,8 @@
|
|
| 36687 |
"lb_language": 0.30958,
|
| 36688 |
"lb_if": 0.5753725,
|
| 36689 |
"lb_data_analysis": 0.498195,
|
| 36690 |
-
"arena_elo": 1321.
|
| 36691 |
-
"arena_rank":
|
| 36692 |
"arena_votes": 6107
|
| 36693 |
},
|
| 36694 |
{
|
|
@@ -36778,9 +36778,9 @@
|
|
| 36778 |
"lb_language": 0.6482266666666666,
|
| 36779 |
"lb_if": 0.79954,
|
| 36780 |
"lb_data_analysis": 0.7153849999999999,
|
| 36781 |
-
"arena_elo": 1419.
|
| 36782 |
-
"arena_rank":
|
| 36783 |
-
"arena_votes":
|
| 36784 |
},
|
| 36785 |
{
|
| 36786 |
"name": "gemini-2.5-flash-preview-05-20",
|
|
@@ -36847,9 +36847,9 @@
|
|
| 36847 |
"lb_language": 0.44743666666666665,
|
| 36848 |
"lb_if": 0.7139575,
|
| 36849 |
"lb_data_analysis": 0.602025,
|
| 36850 |
-
"arena_elo": 1384.
|
| 36851 |
-
"arena_rank":
|
| 36852 |
-
"arena_votes":
|
| 36853 |
},
|
| 36854 |
{
|
| 36855 |
"name": "phi-4-reasoning-plus",
|
|
@@ -36883,9 +36883,9 @@
|
|
| 36883 |
"lb_language": 0.60609,
|
| 36884 |
"lb_if": 0.8772925,
|
| 36885 |
"lb_data_analysis": 0.68308,
|
| 36886 |
-
"arena_elo": 1374.
|
| 36887 |
-
"arena_rank":
|
| 36888 |
-
"arena_votes":
|
| 36889 |
},
|
| 36890 |
{
|
| 36891 |
"name": "qwen3-30b-a3b",
|
|
@@ -36897,9 +36897,9 @@
|
|
| 36897 |
"lb_language": 0.54465,
|
| 36898 |
"lb_if": 0.21108249999999998,
|
| 36899 |
"lb_data_analysis": 0.44922666666666666,
|
| 36900 |
-
"arena_elo": 1328.
|
| 36901 |
-
"arena_rank":
|
| 36902 |
-
"arena_votes":
|
| 36903 |
},
|
| 36904 |
{
|
| 36905 |
"name": "qwen3-32b",
|
|
@@ -36911,8 +36911,8 @@
|
|
| 36911 |
"lb_language": 0.5554233333333333,
|
| 36912 |
"lb_if": 0.1777075,
|
| 36913 |
"lb_data_analysis": 0.4654,
|
| 36914 |
-
"arena_elo": 1347,
|
| 36915 |
-
"arena_rank":
|
| 36916 |
"arena_votes": 3932
|
| 36917 |
},
|
| 36918 |
{
|
|
@@ -36936,9 +36936,9 @@
|
|
| 36936 |
"lb_language": 0.7599833333333333,
|
| 36937 |
"lb_if": 0.2352075,
|
| 36938 |
"lb_data_analysis": 0.47005,
|
| 36939 |
-
"arena_elo":
|
| 36940 |
"arena_rank": 21,
|
| 36941 |
-
"arena_votes":
|
| 36942 |
},
|
| 36943 |
{
|
| 36944 |
"name": "deepseek-v3.1-terminus",
|
|
@@ -36950,9 +36950,9 @@
|
|
| 36950 |
"lb_language": 0.63882,
|
| 36951 |
"lb_if": 0.8189575,
|
| 36952 |
"lb_data_analysis": 0.67298,
|
| 36953 |
-
"arena_elo":
|
| 36954 |
-
"arena_rank":
|
| 36955 |
-
"arena_votes":
|
| 36956 |
},
|
| 36957 |
{
|
| 36958 |
"name": "gemini-2.5-flash-06-05",
|
|
@@ -36997,9 +36997,9 @@
|
|
| 36997 |
"lb_language": 0.6534300000000001,
|
| 36998 |
"lb_if": 0.2767925,
|
| 36999 |
"lb_data_analysis": 0.6098266666666666,
|
| 37000 |
-
"arena_elo": 1404.
|
| 37001 |
-
"arena_rank":
|
| 37002 |
-
"arena_votes":
|
| 37003 |
},
|
| 37004 |
{
|
| 37005 |
"name": "gemini-2.5-pro-06-05",
|
|
@@ -37022,9 +37022,9 @@
|
|
| 37022 |
"lb_language": 0.6162266666666666,
|
| 37023 |
"lb_if": 0.8157925,
|
| 37024 |
"lb_data_analysis": 0.6628999999999999,
|
| 37025 |
-
"arena_elo": 1410.
|
| 37026 |
-
"arena_rank":
|
| 37027 |
-
"arena_votes":
|
| 37028 |
},
|
| 37029 |
{
|
| 37030 |
"name": "glm-4.5-air",
|
|
@@ -37036,9 +37036,9 @@
|
|
| 37036 |
"lb_language": 0.44289666666666666,
|
| 37037 |
"lb_if": 0.7883775000000001,
|
| 37038 |
"lb_data_analysis": 0.65962,
|
| 37039 |
-
"arena_elo": 1371.
|
| 37040 |
-
"arena_rank":
|
| 37041 |
-
"arena_votes":
|
| 37042 |
},
|
| 37043 |
{
|
| 37044 |
"name": "glm-4.6",
|
|
@@ -37050,9 +37050,9 @@
|
|
| 37050 |
"lb_language": 0.5898633333333333,
|
| 37051 |
"lb_if": 0.26192,
|
| 37052 |
"lb_data_analysis": 0.5194766666666667,
|
| 37053 |
-
"arena_elo": 1425.
|
| 37054 |
-
"arena_rank":
|
| 37055 |
-
"arena_votes":
|
| 37056 |
},
|
| 37057 |
{
|
| 37058 |
"name": "gpt-5",
|
|
@@ -37086,9 +37086,9 @@
|
|
| 37086 |
"lb_language": 0.8082699999999999,
|
| 37087 |
"lb_if": 0.8811249999999999,
|
| 37088 |
"lb_data_analysis": 0.716345,
|
| 37089 |
-
"arena_elo":
|
| 37090 |
-
"arena_rank":
|
| 37091 |
-
"arena_votes":
|
| 37092 |
},
|
| 37093 |
{
|
| 37094 |
"name": "gpt-5-low",
|
|
@@ -37122,9 +37122,9 @@
|
|
| 37122 |
"lb_language": 0.7552066666666667,
|
| 37123 |
"lb_if": 0.65271,
|
| 37124 |
"lb_data_analysis": 0.55195,
|
| 37125 |
-
"arena_elo": 1390
|
| 37126 |
-
"arena_rank":
|
| 37127 |
-
"arena_votes":
|
| 37128 |
},
|
| 37129 |
{
|
| 37130 |
"name": "gpt-5-mini-low",
|
|
@@ -37180,9 +37180,9 @@
|
|
| 37180 |
"lb_language": 0.46841666666666665,
|
| 37181 |
"lb_if": 0.5569975,
|
| 37182 |
"lb_data_analysis": 0.4340566666666667,
|
| 37183 |
-
"arena_elo": 1337.
|
| 37184 |
-
"arena_rank":
|
| 37185 |
-
"arena_votes":
|
| 37186 |
},
|
| 37187 |
{
|
| 37188 |
"name": "gpt-5-nano-low",
|
|
@@ -37216,9 +37216,9 @@
|
|
| 37216 |
"lb_language": 0.62963,
|
| 37217 |
"lb_if": 0.7300425,
|
| 37218 |
"lb_data_analysis": 0.644815,
|
| 37219 |
-
"arena_elo":
|
| 37220 |
-
"arena_rank":
|
| 37221 |
-
"arena_votes":
|
| 37222 |
},
|
| 37223 |
{
|
| 37224 |
"name": "gpt-oss-120b",
|
|
@@ -37230,9 +37230,9 @@
|
|
| 37230 |
"lb_language": 0.48590666666666665,
|
| 37231 |
"lb_if": 0.5029175,
|
| 37232 |
"lb_data_analysis": 0.38804999999999995,
|
| 37233 |
-
"arena_elo":
|
| 37234 |
-
"arena_rank":
|
| 37235 |
-
"arena_votes":
|
| 37236 |
},
|
| 37237 |
{
|
| 37238 |
"name": "grok-4-0709",
|
|
@@ -37244,9 +37244,9 @@
|
|
| 37244 |
"lb_language": 0.76388,
|
| 37245 |
"lb_if": 0.29075,
|
| 37246 |
"lb_data_analysis": 0.6337666666666667,
|
| 37247 |
-
"arena_elo": 1409.
|
| 37248 |
-
"arena_rank":
|
| 37249 |
-
"arena_votes":
|
| 37250 |
},
|
| 37251 |
{
|
| 37252 |
"name": "grok-code-fast-1-0825",
|
|
@@ -37280,9 +37280,9 @@
|
|
| 37280 |
"lb_language": 0.6606966666666668,
|
| 37281 |
"lb_if": 0.2172075,
|
| 37282 |
"lb_data_analysis": 0.4471566666666667,
|
| 37283 |
-
"arena_elo": 1422.
|
| 37284 |
-
"arena_rank":
|
| 37285 |
-
"arena_votes":
|
| 37286 |
},
|
| 37287 |
{
|
| 37288 |
"name": "qwen3-235b-a22b-thinking-2507",
|
|
@@ -37294,9 +37294,9 @@
|
|
| 37294 |
"lb_language": 0.6952366666666666,
|
| 37295 |
"lb_if": 0.40641999999999995,
|
| 37296 |
"lb_data_analysis": 0.5218266666666667,
|
| 37297 |
-
"arena_elo": 1398.
|
| 37298 |
-
"arena_rank":
|
| 37299 |
-
"arena_votes":
|
| 37300 |
},
|
| 37301 |
{
|
| 37302 |
"name": "qwen3-coder-480b-a35b-instruct",
|
|
@@ -37308,9 +37308,9 @@
|
|
| 37308 |
"lb_language": 0.6426233333333333,
|
| 37309 |
"lb_if": 0.741625,
|
| 37310 |
"lb_data_analysis": 0.64683,
|
| 37311 |
-
"arena_elo": 1386.
|
| 37312 |
-
"arena_rank":
|
| 37313 |
-
"arena_votes":
|
| 37314 |
},
|
| 37315 |
{
|
| 37316 |
"name": "qwen3-max-2025-09-23",
|
|
@@ -37322,9 +37322,9 @@
|
|
| 37322 |
"lb_language": 0.7144733333333333,
|
| 37323 |
"lb_if": 0.76546,
|
| 37324 |
"lb_data_analysis": 0.6536649999999999,
|
| 37325 |
-
"arena_elo": 1424.
|
| 37326 |
-
"arena_rank":
|
| 37327 |
-
"arena_votes":
|
| 37328 |
},
|
| 37329 |
{
|
| 37330 |
"name": "qwen3-next-80b-a3b-instruct",
|
|
@@ -37336,8 +37336,8 @@
|
|
| 37336 |
"lb_language": 0.6633766666666666,
|
| 37337 |
"lb_if": 0.191875,
|
| 37338 |
"lb_data_analysis": 0.49784,
|
| 37339 |
-
"arena_elo": 1401.
|
| 37340 |
-
"arena_rank":
|
| 37341 |
"arena_votes": 22670
|
| 37342 |
},
|
| 37343 |
{
|
|
@@ -37350,8 +37350,8 @@
|
|
| 37350 |
"lb_language": 0.5631166666666667,
|
| 37351 |
"lb_if": 0.41541999999999996,
|
| 37352 |
"lb_data_analysis": 0.5358333333333333,
|
| 37353 |
-
"arena_elo": 1368.
|
| 37354 |
-
"arena_rank":
|
| 37355 |
"arena_votes": 13767
|
| 37356 |
},
|
| 37357 |
{
|
|
@@ -37364,9 +37364,9 @@
|
|
| 37364 |
"lb_language": 0.5704566666666667,
|
| 37365 |
"lb_if": 0.17754250000000002,
|
| 37366 |
"lb_data_analysis": 0.45124999999999993,
|
| 37367 |
-
"arena_elo":
|
| 37368 |
-
"arena_rank":
|
| 37369 |
-
"arena_votes":
|
| 37370 |
},
|
| 37371 |
{
|
| 37372 |
"name": "deepseek-v3.2-exp",
|
|
@@ -37378,9 +37378,9 @@
|
|
| 37378 |
"lb_language": 0.65596,
|
| 37379 |
"lb_if": 0.1932925,
|
| 37380 |
"lb_data_analysis": 0.4425866666666667,
|
| 37381 |
-
"arena_elo": 1423.
|
| 37382 |
-
"arena_rank":
|
| 37383 |
-
"arena_votes":
|
| 37384 |
},
|
| 37385 |
{
|
| 37386 |
"name": "minimax-m2",
|
|
@@ -37392,9 +37392,9 @@
|
|
| 37392 |
"lb_language": 0.47647666666666666,
|
| 37393 |
"lb_if": 0.810165,
|
| 37394 |
"lb_data_analysis": 0.6755800000000001,
|
| 37395 |
-
"arena_elo": 1346.
|
| 37396 |
-
"arena_rank":
|
| 37397 |
-
"arena_votes":
|
| 37398 |
},
|
| 37399 |
{
|
| 37400 |
"name": "kimi-k2",
|
|
@@ -37516,9 +37516,9 @@
|
|
| 37516 |
"lb_language": 0.7432699999999999,
|
| 37517 |
"lb_if": 0.28204,
|
| 37518 |
"lb_data_analysis": 0.52238,
|
| 37519 |
-
"arena_elo": 1430.
|
| 37520 |
-
"arena_rank":
|
| 37521 |
-
"arena_votes":
|
| 37522 |
},
|
| 37523 |
{
|
| 37524 |
"name": "claude-opus-4-5-20251101",
|
|
@@ -37530,9 +37530,9 @@
|
|
| 37530 |
"lb_language": 0.7709166666666668,
|
| 37531 |
"lb_if": 0.26591750000000003,
|
| 37532 |
"lb_data_analysis": 0.4561233333333334,
|
| 37533 |
-
"arena_elo": 1467.
|
| 37534 |
"arena_rank": 11,
|
| 37535 |
-
"arena_votes":
|
| 37536 |
},
|
| 37537 |
{
|
| 37538 |
"name": "deepseek-v3.2",
|
|
@@ -37544,9 +37544,9 @@
|
|
| 37544 |
"lb_language": 0.6423933333333333,
|
| 37545 |
"lb_if": 0.230625,
|
| 37546 |
"lb_data_analysis": 0.45034,
|
| 37547 |
-
"arena_elo":
|
| 37548 |
-
"arena_rank":
|
| 37549 |
-
"arena_votes":
|
| 37550 |
},
|
| 37551 |
{
|
| 37552 |
"name": "deepseek-v3.2-speciale",
|
|
@@ -37602,9 +37602,9 @@
|
|
| 37602 |
"lb_language": 0.49737666666666663,
|
| 37603 |
"lb_if": 0.1706225,
|
| 37604 |
"lb_data_analysis": 0.46410666666666667,
|
| 37605 |
-
"arena_elo": 1377.
|
| 37606 |
-
"arena_rank":
|
| 37607 |
-
"arena_votes":
|
| 37608 |
},
|
| 37609 |
{
|
| 37610 |
"name": "gpt-5.1-2025-11-13-low",
|
|
@@ -37715,9 +37715,9 @@
|
|
| 37715 |
"lb_language": 0.6522633333333333,
|
| 37716 |
"lb_if": 0.3565825,
|
| 37717 |
"lb_data_analysis": 0.5517133333333334,
|
| 37718 |
-
"arena_elo": 1440.
|
| 37719 |
-
"arena_rank":
|
| 37720 |
-
"arena_votes":
|
| 37721 |
},
|
| 37722 |
{
|
| 37723 |
"name": "arcee-trinity-large-preview",
|
|
@@ -37740,9 +37740,9 @@
|
|
| 37740 |
"lb_language": 0.8326966666666666,
|
| 37741 |
"lb_if": 0.633125,
|
| 37742 |
"lb_data_analysis": 0.6989299999999999,
|
| 37743 |
-
"arena_elo":
|
| 37744 |
-
"arena_rank":
|
| 37745 |
-
"arena_votes":
|
| 37746 |
},
|
| 37747 |
{
|
| 37748 |
"name": "claude-sonnet-4-6",
|
|
@@ -37754,9 +37754,9 @@
|
|
| 37754 |
"lb_language": 0.7769333333333334,
|
| 37755 |
"lb_if": 0.639165,
|
| 37756 |
"lb_data_analysis": 0.7605666666666667,
|
| 37757 |
-
"arena_elo":
|
| 37758 |
-
"arena_rank":
|
| 37759 |
-
"arena_votes":
|
| 37760 |
},
|
| 37761 |
{
|
| 37762 |
"name": "gemini-3.1-pro-preview-high",
|
|
@@ -37769,6 +37769,17 @@
|
|
| 37769 |
"lb_if": 0.791,
|
| 37770 |
"lb_data_analysis": 0.7854133333333334
|
| 37771 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37772 |
{
|
| 37773 |
"name": "glm-5",
|
| 37774 |
"lb_name": "glm-5",
|
|
@@ -37779,9 +37790,9 @@
|
|
| 37779 |
"lb_language": 0.7752800000000001,
|
| 37780 |
"lb_if": 0.5532900000000001,
|
| 37781 |
"lb_data_analysis": 0.67896,
|
| 37782 |
-
"arena_elo":
|
| 37783 |
-
"arena_rank":
|
| 37784 |
-
"arena_votes":
|
| 37785 |
},
|
| 37786 |
{
|
| 37787 |
"name": "gpt-5.2-codex",
|
|
@@ -37816,6 +37827,42 @@
|
|
| 37816 |
"lb_if": 0.713415,
|
| 37817 |
"lb_data_analysis": 0.49679
|
| 37818 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37819 |
{
|
| 37820 |
"name": "kimi-k2.5",
|
| 37821 |
"lb_name": "kimi-k2.5-thinking",
|
|
@@ -37826,9 +37873,9 @@
|
|
| 37826 |
"lb_language": 0.77666,
|
| 37827 |
"lb_if": 0.574125,
|
| 37828 |
"lb_data_analysis": 0.6135766666666667,
|
| 37829 |
-
"arena_elo": 1451.
|
| 37830 |
"arena_rank": 19,
|
| 37831 |
-
"arena_votes":
|
| 37832 |
},
|
| 37833 |
{
|
| 37834 |
"name": "minimax-m2.5",
|
|
@@ -37840,752 +37887,800 @@
|
|
| 37840 |
"lb_language": 0.5510100000000001,
|
| 37841 |
"lb_if": 0.5723325,
|
| 37842 |
"lb_data_analysis": 0.49605000000000005,
|
| 37843 |
-
"arena_elo": 1401.
|
| 37844 |
-
"arena_rank":
|
| 37845 |
-
"arena_votes":
|
| 37846 |
},
|
| 37847 |
{
|
| 37848 |
"name": "claude-opus-4-6-thinking",
|
| 37849 |
"arena_name": "claude-opus-4-6-thinking",
|
| 37850 |
"arena_org": "Anthropic",
|
| 37851 |
-
"arena_elo":
|
| 37852 |
-
"arena_rank":
|
| 37853 |
-
"arena_votes":
|
| 37854 |
},
|
| 37855 |
{
|
| 37856 |
"name": "gemini-3.1-pro-preview",
|
| 37857 |
"arena_name": "gemini-3.1-pro-preview",
|
| 37858 |
"arena_org": "Google",
|
| 37859 |
-
"arena_elo": 1500.
|
| 37860 |
"arena_rank": 3,
|
| 37861 |
-
"arena_votes":
|
| 37862 |
},
|
| 37863 |
{
|
| 37864 |
"name": "grok-4.20-beta1",
|
| 37865 |
"arena_name": "grok-4.20-beta1",
|
| 37866 |
"arena_org": "xAI",
|
| 37867 |
-
"arena_elo":
|
| 37868 |
"arena_rank": 4,
|
| 37869 |
-
"arena_votes":
|
| 37870 |
},
|
| 37871 |
{
|
| 37872 |
"name": "gemini-3-pro",
|
| 37873 |
"arena_name": "gemini-3-pro",
|
| 37874 |
"arena_org": "Google",
|
| 37875 |
-
"arena_elo":
|
| 37876 |
"arena_rank": 5,
|
| 37877 |
-
"arena_votes":
|
| 37878 |
},
|
| 37879 |
{
|
| 37880 |
"name": "gpt-5.2-chat-latest-20260210",
|
| 37881 |
"arena_name": "gpt-5.2-chat-latest-20260210",
|
| 37882 |
"arena_org": "OpenAI",
|
| 37883 |
-
"arena_elo":
|
| 37884 |
-
"arena_rank":
|
| 37885 |
-
"arena_votes":
|
| 37886 |
},
|
| 37887 |
{
|
| 37888 |
"name": "gemini-3-flash",
|
| 37889 |
"arena_name": "gemini-3-flash",
|
| 37890 |
"arena_org": "Google",
|
| 37891 |
-
"arena_elo":
|
| 37892 |
-
"arena_rank":
|
| 37893 |
-
"arena_votes":
|
| 37894 |
},
|
| 37895 |
{
|
| 37896 |
"name": "grok-4.1-thinking",
|
| 37897 |
"arena_name": "grok-4.1-thinking",
|
| 37898 |
"arena_org": "xAI",
|
| 37899 |
-
"arena_elo": 1472.
|
| 37900 |
-
"arena_rank":
|
| 37901 |
-
"arena_votes":
|
| 37902 |
},
|
| 37903 |
{
|
| 37904 |
"name": "claude-opus-4-5-20251101-thinking-32k",
|
| 37905 |
"arena_name": "claude-opus-4-5-20251101-thinking-32k",
|
| 37906 |
"arena_org": "Anthropic",
|
| 37907 |
-
"arena_elo": 1470.
|
| 37908 |
-
"arena_rank":
|
| 37909 |
-
"arena_votes":
|
| 37910 |
},
|
| 37911 |
{
|
| 37912 |
"name": "dola-seed-2.0-preview",
|
| 37913 |
"arena_name": "dola-seed-2.0-preview",
|
| 37914 |
"arena_org": "Bytedance",
|
| 37915 |
-
"arena_elo":
|
| 37916 |
-
"arena_rank":
|
| 37917 |
-
"arena_votes":
|
| 37918 |
},
|
| 37919 |
{
|
| 37920 |
"name": "grok-4.1",
|
| 37921 |
"arena_name": "grok-4.1",
|
| 37922 |
"arena_org": "xAI",
|
| 37923 |
-
"arena_elo": 1462.
|
| 37924 |
-
"arena_rank":
|
| 37925 |
-
"arena_votes":
|
| 37926 |
},
|
| 37927 |
{
|
| 37928 |
"name": "gemini-3-flash (thinking-minimal)",
|
| 37929 |
"arena_name": "gemini-3-flash (thinking-minimal)",
|
| 37930 |
"arena_org": "Google",
|
| 37931 |
-
"arena_elo": 1461.
|
| 37932 |
-
"arena_rank":
|
| 37933 |
-
"arena_votes":
|
| 37934 |
},
|
| 37935 |
{
|
| 37936 |
-
"name": "gpt-5.
|
| 37937 |
-
"arena_name": "gpt-5.
|
| 37938 |
"arena_org": "OpenAI",
|
| 37939 |
-
"arena_elo":
|
| 37940 |
"arena_rank": 15,
|
| 37941 |
-
"arena_votes":
|
| 37942 |
},
|
| 37943 |
{
|
| 37944 |
-
"name": "
|
| 37945 |
-
"arena_name": "
|
| 37946 |
-
"arena_org": "
|
| 37947 |
-
"arena_elo":
|
| 37948 |
"arena_rank": 17,
|
| 37949 |
-
"arena_votes":
|
| 37950 |
},
|
| 37951 |
{
|
| 37952 |
"name": "ernie-5.0-0110",
|
| 37953 |
"arena_name": "ernie-5.0-0110",
|
| 37954 |
"arena_org": "Baidu",
|
| 37955 |
-
"arena_elo":
|
| 37956 |
-
"arena_rank":
|
| 37957 |
-
"arena_votes":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37958 |
},
|
| 37959 |
{
|
| 37960 |
"name": "claude-sonnet-4-5-20250929-thinking-32k",
|
| 37961 |
"arena_name": "claude-sonnet-4-5-20250929-thinking-32k",
|
| 37962 |
"arena_org": "Anthropic",
|
| 37963 |
-
"arena_elo": 1449.
|
| 37964 |
-
"arena_rank":
|
| 37965 |
-
"arena_votes":
|
| 37966 |
-
},
|
| 37967 |
-
{
|
| 37968 |
-
"name": "gemini-2.5-pro",
|
| 37969 |
-
"arena_name": "gemini-2.5-pro",
|
| 37970 |
-
"arena_org": "Google",
|
| 37971 |
-
"arena_elo": 1449.24,
|
| 37972 |
-
"arena_rank": 22,
|
| 37973 |
-
"arena_votes": 97296
|
| 37974 |
},
|
| 37975 |
{
|
| 37976 |
"name": "ernie-5.0-preview-1203",
|
| 37977 |
"arena_name": "ernie-5.0-preview-1203",
|
| 37978 |
"arena_org": "Baidu",
|
| 37979 |
-
"arena_elo": 1449.
|
| 37980 |
-
"arena_rank":
|
| 37981 |
-
"arena_votes":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37982 |
},
|
| 37983 |
{
|
| 37984 |
"name": "claude-opus-4-1-20250805-thinking-16k",
|
| 37985 |
"arena_name": "claude-opus-4-1-20250805-thinking-16k",
|
| 37986 |
"arena_org": "Anthropic",
|
| 37987 |
-
"arena_elo": 1448.
|
| 37988 |
-
"arena_rank":
|
| 37989 |
-
"arena_votes":
|
| 37990 |
},
|
| 37991 |
{
|
| 37992 |
"name": "claude-opus-4-1-20250805",
|
| 37993 |
"arena_name": "claude-opus-4-1-20250805",
|
| 37994 |
"arena_org": "Anthropic",
|
| 37995 |
-
"arena_elo": 1446.
|
| 37996 |
-
"arena_rank":
|
| 37997 |
-
"arena_votes":
|
| 37998 |
},
|
| 37999 |
{
|
| 38000 |
"name": "chatgpt-4o-latest-20250326",
|
| 38001 |
"arena_name": "chatgpt-4o-latest-20250326",
|
| 38002 |
"arena_org": "OpenAI",
|
| 38003 |
-
"arena_elo": 1442.
|
| 38004 |
-
"arena_rank":
|
| 38005 |
-
"arena_votes":
|
| 38006 |
},
|
| 38007 |
{
|
| 38008 |
"name": "gpt-5.2-high",
|
| 38009 |
"arena_name": "gpt-5.2-high",
|
| 38010 |
"arena_org": "OpenAI",
|
| 38011 |
-
"arena_elo":
|
| 38012 |
-
"arena_rank":
|
| 38013 |
-
"arena_votes":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38014 |
},
|
| 38015 |
{
|
| 38016 |
"name": "gpt-5.1",
|
| 38017 |
"arena_name": "gpt-5.1",
|
| 38018 |
"arena_org": "OpenAI",
|
| 38019 |
-
"arena_elo":
|
| 38020 |
-
"arena_rank":
|
| 38021 |
-
"arena_votes":
|
| 38022 |
},
|
| 38023 |
{
|
| 38024 |
-
"name": "
|
| 38025 |
-
"arena_name": "
|
| 38026 |
-
"arena_org": "
|
| 38027 |
-
"arena_elo":
|
| 38028 |
-
"arena_rank":
|
| 38029 |
-
"arena_votes":
|
| 38030 |
},
|
| 38031 |
{
|
| 38032 |
"name": "qwen3-max-preview",
|
| 38033 |
"arena_name": "qwen3-max-preview",
|
| 38034 |
"arena_org": "Alibaba",
|
| 38035 |
-
"arena_elo": 1434.
|
| 38036 |
-
"arena_rank":
|
| 38037 |
-
"arena_votes":
|
| 38038 |
},
|
| 38039 |
{
|
| 38040 |
"name": "kimi-k2.5-instant",
|
| 38041 |
"arena_name": "kimi-k2.5-instant",
|
| 38042 |
"arena_org": "Moonshot",
|
| 38043 |
-
"arena_elo":
|
| 38044 |
-
"arena_rank":
|
| 38045 |
-
"arena_votes":
|
| 38046 |
},
|
| 38047 |
{
|
| 38048 |
"name": "o3-2025-04-16",
|
| 38049 |
"arena_name": "o3-2025-04-16",
|
| 38050 |
"arena_org": "OpenAI",
|
| 38051 |
-
"arena_elo": 1432.
|
| 38052 |
-
"arena_rank":
|
| 38053 |
-
"arena_votes":
|
| 38054 |
},
|
| 38055 |
{
|
| 38056 |
"name": "kimi-k2-thinking-turbo",
|
| 38057 |
"arena_name": "kimi-k2-thinking-turbo",
|
| 38058 |
"arena_org": "Moonshot",
|
| 38059 |
-
"arena_elo":
|
| 38060 |
-
"arena_rank":
|
| 38061 |
-
"arena_votes":
|
| 38062 |
},
|
| 38063 |
{
|
| 38064 |
"name": "claude-opus-4-20250514-thinking-16k",
|
| 38065 |
"arena_name": "claude-opus-4-20250514-thinking-16k",
|
| 38066 |
"arena_org": "Anthropic",
|
| 38067 |
-
"arena_elo": 1423.
|
| 38068 |
-
"arena_rank":
|
| 38069 |
-
"arena_votes":
|
| 38070 |
},
|
| 38071 |
{
|
| 38072 |
"name": "deepseek-v3.2-exp-thinking",
|
| 38073 |
"arena_name": "deepseek-v3.2-exp-thinking",
|
| 38074 |
"arena_org": "DeepSeek",
|
| 38075 |
-
"arena_elo": 1423.
|
| 38076 |
-
"arena_rank":
|
| 38077 |
-
"arena_votes":
|
| 38078 |
},
|
| 38079 |
{
|
| 38080 |
"name": "grok-4-fast-chat",
|
| 38081 |
"arena_name": "grok-4-fast-chat",
|
| 38082 |
"arena_org": "xAI",
|
| 38083 |
-
"arena_elo": 1421.
|
| 38084 |
-
"arena_rank":
|
| 38085 |
-
"arena_votes":
|
| 38086 |
},
|
| 38087 |
{
|
| 38088 |
"name": "deepseek-v3.2-thinking",
|
| 38089 |
"arena_name": "deepseek-v3.2-thinking",
|
| 38090 |
"arena_org": "DeepSeek",
|
| 38091 |
-
"arena_elo":
|
| 38092 |
-
"arena_rank":
|
| 38093 |
-
"arena_votes":
|
| 38094 |
},
|
| 38095 |
{
|
| 38096 |
"name": "ernie-5.0-preview-1022",
|
| 38097 |
"arena_name": "ernie-5.0-preview-1022",
|
| 38098 |
"arena_org": "Baidu",
|
| 38099 |
-
"arena_elo": 1418.
|
| 38100 |
-
"arena_rank":
|
| 38101 |
-
"arena_votes":
|
| 38102 |
},
|
| 38103 |
{
|
| 38104 |
"name": "deepseek-v3.1",
|
| 38105 |
"arena_name": "deepseek-v3.1",
|
| 38106 |
"arena_org": "DeepSeek",
|
| 38107 |
-
"arena_elo": 1418.
|
| 38108 |
-
"arena_rank":
|
| 38109 |
-
"arena_votes":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38110 |
},
|
| 38111 |
{
|
| 38112 |
"name": "deepseek-v3.1-thinking",
|
| 38113 |
"arena_name": "deepseek-v3.1-thinking",
|
| 38114 |
"arena_org": "DeepSeek",
|
| 38115 |
-
"arena_elo": 1417.
|
| 38116 |
-
"arena_rank":
|
| 38117 |
-
"arena_votes":
|
| 38118 |
},
|
| 38119 |
{
|
| 38120 |
"name": "kimi-k2-0905-preview",
|
| 38121 |
"arena_name": "kimi-k2-0905-preview",
|
| 38122 |
"arena_org": "Moonshot",
|
| 38123 |
-
"arena_elo": 1417.
|
| 38124 |
-
"arena_rank":
|
| 38125 |
-
"arena_votes":
|
| 38126 |
},
|
| 38127 |
{
|
| 38128 |
"name": "kimi-k2-0711-preview",
|
| 38129 |
"arena_name": "kimi-k2-0711-preview",
|
| 38130 |
"arena_org": "Moonshot",
|
| 38131 |
-
"arena_elo": 1416.
|
| 38132 |
-
"arena_rank":
|
| 38133 |
-
"arena_votes":
|
| 38134 |
},
|
| 38135 |
{
|
| 38136 |
"name": "deepseek-v3.1-terminus-thinking",
|
| 38137 |
"arena_name": "deepseek-v3.1-terminus-thinking",
|
| 38138 |
"arena_org": "DeepSeek",
|
| 38139 |
-
"arena_elo": 1415.
|
| 38140 |
-
"arena_rank":
|
| 38141 |
-
"arena_votes":
|
| 38142 |
},
|
| 38143 |
{
|
| 38144 |
-
"name": "
|
| 38145 |
-
"arena_name": "
|
| 38146 |
-
"arena_org": "
|
| 38147 |
-
"arena_elo":
|
| 38148 |
-
"arena_rank":
|
| 38149 |
-
"arena_votes":
|
| 38150 |
},
|
| 38151 |
{
|
| 38152 |
"name": "mistral-large-3",
|
| 38153 |
"arena_name": "mistral-large-3",
|
| 38154 |
"arena_org": "Mistral",
|
| 38155 |
-
"arena_elo": 1414.
|
| 38156 |
-
"arena_rank":
|
| 38157 |
-
"arena_votes":
|
| 38158 |
},
|
| 38159 |
{
|
| 38160 |
-
"name": "
|
| 38161 |
-
"arena_name": "
|
| 38162 |
-
"arena_org": "
|
| 38163 |
-
"arena_elo":
|
| 38164 |
-
"arena_rank":
|
| 38165 |
-
"arena_votes":
|
| 38166 |
},
|
| 38167 |
{
|
| 38168 |
"name": "claude-opus-4-20250514",
|
| 38169 |
"arena_name": "claude-opus-4-20250514",
|
| 38170 |
"arena_org": "Anthropic",
|
| 38171 |
-
"arena_elo": 1412.
|
| 38172 |
-
"arena_rank":
|
| 38173 |
-
"arena_votes":
|
| 38174 |
-
},
|
| 38175 |
-
{
|
| 38176 |
-
"name": "mistral-medium-2508",
|
| 38177 |
-
"arena_name": "mistral-medium-2508",
|
| 38178 |
-
"arena_org": "Mistral",
|
| 38179 |
-
"arena_elo": 1411.39,
|
| 38180 |
-
"arena_rank": 61,
|
| 38181 |
-
"arena_votes": 65627
|
| 38182 |
},
|
| 38183 |
{
|
| 38184 |
"name": "grok-3-preview-02-24",
|
| 38185 |
"arena_name": "grok-3-preview-02-24",
|
| 38186 |
"arena_org": "xAI",
|
| 38187 |
-
"arena_elo": 1411.
|
| 38188 |
-
"arena_rank":
|
| 38189 |
-
"arena_votes":
|
| 38190 |
},
|
| 38191 |
{
|
| 38192 |
"name": "gemini-2.5-flash",
|
| 38193 |
"arena_name": "gemini-2.5-flash",
|
| 38194 |
"arena_org": "Google",
|
| 38195 |
-
"arena_elo": 1410.
|
| 38196 |
-
"arena_rank":
|
| 38197 |
-
"arena_votes":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38198 |
},
|
| 38199 |
{
|
| 38200 |
"name": "grok-4-fast-reasoning",
|
| 38201 |
"arena_name": "grok-4-fast-reasoning",
|
| 38202 |
"arena_org": "xAI",
|
| 38203 |
-
"arena_elo": 1403.
|
| 38204 |
-
"arena_rank":
|
| 38205 |
-
"arena_votes":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38206 |
},
|
| 38207 |
{
|
| 38208 |
"name": "qwen3-235b-a22b-no-thinking",
|
| 38209 |
"arena_name": "qwen3-235b-a22b-no-thinking",
|
| 38210 |
"arena_org": "Alibaba",
|
| 38211 |
-
"arena_elo": 1401.
|
| 38212 |
-
"arena_rank":
|
| 38213 |
-
"arena_votes":
|
| 38214 |
},
|
| 38215 |
{
|
| 38216 |
"name": "longcat-flash-chat",
|
| 38217 |
"arena_name": "longcat-flash-chat",
|
| 38218 |
"arena_org": "Meituan",
|
| 38219 |
-
"arena_elo":
|
| 38220 |
-
"arena_rank":
|
| 38221 |
"arena_votes": 11486
|
| 38222 |
},
|
| 38223 |
{
|
| 38224 |
"name": "claude-sonnet-4-20250514-thinking-32k",
|
| 38225 |
"arena_name": "claude-sonnet-4-20250514-thinking-32k",
|
| 38226 |
"arena_org": "Anthropic",
|
| 38227 |
-
"arena_elo": 1399.
|
| 38228 |
-
"arena_rank":
|
| 38229 |
-
"arena_votes":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38230 |
},
|
| 38231 |
{
|
| 38232 |
"name": "qwen3-vl-235b-a22b-thinking",
|
| 38233 |
"arena_name": "qwen3-vl-235b-a22b-thinking",
|
| 38234 |
"arena_org": "Alibaba",
|
| 38235 |
-
"arena_elo": 1395.
|
| 38236 |
-
"arena_rank":
|
| 38237 |
-
"arena_votes":
|
| 38238 |
},
|
| 38239 |
{
|
| 38240 |
"name": "amazon-nova-experimental-chat-12-10",
|
| 38241 |
"arena_name": "amazon-nova-experimental-chat-12-10",
|
| 38242 |
"arena_org": "Amazon",
|
| 38243 |
-
"arena_elo": 1394.
|
| 38244 |
-
"arena_rank":
|
| 38245 |
-
"arena_votes":
|
| 38246 |
},
|
| 38247 |
{
|
| 38248 |
"name": "hunyuan-vision-1.5-thinking",
|
| 38249 |
"arena_name": "hunyuan-vision-1.5-thinking",
|
| 38250 |
"arena_org": "Tencent",
|
| 38251 |
-
"arena_elo": 1393.
|
| 38252 |
-
"arena_rank":
|
| 38253 |
"arena_votes": 2216
|
| 38254 |
},
|
| 38255 |
{
|
| 38256 |
"name": "mai-1-preview",
|
| 38257 |
"arena_name": "mai-1-preview",
|
| 38258 |
"arena_org": "Microsoft AI",
|
| 38259 |
-
"arena_elo":
|
| 38260 |
-
"arena_rank":
|
| 38261 |
-
"arena_votes":
|
| 38262 |
-
},
|
| 38263 |
-
{
|
| 38264 |
-
"name": "o4-mini-2025-04-16",
|
| 38265 |
-
"arena_name": "o4-mini-2025-04-16",
|
| 38266 |
-
"arena_org": "OpenAI",
|
| 38267 |
-
"arena_elo": 1390.98,
|
| 38268 |
-
"arena_rank": 82,
|
| 38269 |
-
"arena_votes": 46375
|
| 38270 |
},
|
| 38271 |
{
|
| 38272 |
"name": "mimo-v2-flash (non-thinking)",
|
| 38273 |
"arena_name": "mimo-v2-flash (non-thinking)",
|
| 38274 |
"arena_org": "Xiaomi",
|
| 38275 |
-
"arena_elo":
|
| 38276 |
-
"arena_rank":
|
| 38277 |
-
"arena_votes":
|
| 38278 |
},
|
| 38279 |
{
|
| 38280 |
-
"name": "
|
| 38281 |
-
"arena_name": "
|
| 38282 |
-
"arena_org": "
|
| 38283 |
-
"arena_elo":
|
| 38284 |
-
"arena_rank":
|
| 38285 |
-
"arena_votes":
|
| 38286 |
},
|
| 38287 |
{
|
| 38288 |
"name": "claude-sonnet-4-20250514",
|
| 38289 |
"arena_name": "claude-sonnet-4-20250514",
|
| 38290 |
"arena_org": "Anthropic",
|
| 38291 |
-
"arena_elo": 1389.
|
| 38292 |
-
"arena_rank":
|
| 38293 |
-
"arena_votes":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38294 |
},
|
| 38295 |
{
|
| 38296 |
"name": "claude-3-7-sonnet-20250219-thinking-32k",
|
| 38297 |
"arena_name": "claude-3-7-sonnet-20250219-thinking-32k",
|
| 38298 |
"arena_org": "Anthropic",
|
| 38299 |
-
"arena_elo":
|
| 38300 |
-
"arena_rank":
|
| 38301 |
-
"arena_votes":
|
| 38302 |
},
|
| 38303 |
{
|
| 38304 |
"name": "mimo-v2-flash (thinking)",
|
| 38305 |
"arena_name": "mimo-v2-flash (thinking)",
|
| 38306 |
"arena_org": "Xiaomi",
|
| 38307 |
-
"arena_elo": 1386.
|
| 38308 |
-
"arena_rank":
|
| 38309 |
-
"arena_votes":
|
| 38310 |
},
|
| 38311 |
{
|
| 38312 |
"name": "hunyuan-t1-20250711",
|
| 38313 |
"arena_name": "hunyuan-t1-20250711",
|
| 38314 |
"arena_org": "Tencent",
|
| 38315 |
-
"arena_elo": 1386.
|
| 38316 |
-
"arena_rank":
|
| 38317 |
-
"arena_votes":
|
| 38318 |
},
|
| 38319 |
{
|
| 38320 |
"name": "minimax-m2.1-preview",
|
| 38321 |
"arena_name": "minimax-m2.1-preview",
|
| 38322 |
"arena_org": "MiniMax",
|
| 38323 |
-
"arena_elo": 1385.
|
| 38324 |
-
"arena_rank":
|
| 38325 |
-
"arena_votes":
|
| 38326 |
},
|
| 38327 |
{
|
| 38328 |
"name": "qwen3-30b-a3b-instruct-2507",
|
| 38329 |
"arena_name": "qwen3-30b-a3b-instruct-2507",
|
| 38330 |
"arena_org": "Alibaba",
|
| 38331 |
-
"arena_elo": 1383.
|
| 38332 |
-
"arena_rank":
|
| 38333 |
-
"arena_votes":
|
| 38334 |
},
|
| 38335 |
{
|
| 38336 |
"name": "hunyuan-turbos-20250416",
|
| 38337 |
"arena_name": "hunyuan-turbos-20250416",
|
| 38338 |
"arena_org": "Tencent",
|
| 38339 |
-
"arena_elo": 1382.
|
| 38340 |
-
"arena_rank":
|
| 38341 |
-
"arena_votes":
|
| 38342 |
},
|
| 38343 |
{
|
| 38344 |
"name": "gemini-2.5-flash-lite-preview-09-2025-no-thinking",
|
| 38345 |
"arena_name": "gemini-2.5-flash-lite-preview-09-2025-no-thinking",
|
| 38346 |
"arena_org": "Google",
|
| 38347 |
-
"arena_elo": 1379.
|
| 38348 |
-
"arena_rank":
|
| 38349 |
-
"arena_votes":
|
| 38350 |
-
},
|
| 38351 |
-
{
|
| 38352 |
-
"name": "trinity-large",
|
| 38353 |
-
"arena_name": "trinity-large",
|
| 38354 |
-
"arena_org": "Arcee AI",
|
| 38355 |
-
"arena_elo": 1375.1,
|
| 38356 |
-
"arena_rank": 99,
|
| 38357 |
-
"arena_votes": 2166
|
| 38358 |
},
|
| 38359 |
{
|
| 38360 |
"name": "gemini-2.5-flash-lite-preview-06-17-thinking",
|
| 38361 |
"arena_name": "gemini-2.5-flash-lite-preview-06-17-thinking",
|
| 38362 |
"arena_org": "Google",
|
| 38363 |
-
"arena_elo": 1374.
|
| 38364 |
-
"arena_rank":
|
| 38365 |
-
"arena_votes":
|
| 38366 |
},
|
| 38367 |
{
|
| 38368 |
-
"name": "
|
| 38369 |
-
"arena_name": "
|
| 38370 |
-
"arena_org": "
|
| 38371 |
-
"arena_elo":
|
| 38372 |
-
"arena_rank":
|
| 38373 |
-
"arena_votes":
|
| 38374 |
},
|
| 38375 |
{
|
| 38376 |
"name": "glm-4.7-flash",
|
| 38377 |
"arena_name": "glm-4.7-flash",
|
| 38378 |
"arena_org": "Z.ai",
|
| 38379 |
-
"arena_elo":
|
| 38380 |
-
"arena_rank":
|
| 38381 |
-
"arena_votes":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38382 |
},
|
| 38383 |
{
|
| 38384 |
"name": "amazon-nova-experimental-chat-11-10",
|
| 38385 |
"arena_name": "amazon-nova-experimental-chat-11-10",
|
| 38386 |
"arena_org": "Amazon",
|
| 38387 |
-
"arena_elo": 1365.
|
| 38388 |
-
"arena_rank":
|
| 38389 |
-
"arena_votes":
|
| 38390 |
},
|
| 38391 |
{
|
| 38392 |
"name": "o3-mini-high",
|
| 38393 |
"arena_name": "o3-mini-high",
|
| 38394 |
"arena_org": "OpenAI",
|
| 38395 |
-
"arena_elo": 1363.
|
| 38396 |
-
"arena_rank":
|
| 38397 |
"arena_votes": 18584
|
| 38398 |
},
|
| 38399 |
{
|
| 38400 |
"name": "grok-3-mini-high",
|
| 38401 |
"arena_name": "grok-3-mini-high",
|
| 38402 |
"arena_org": "xAI",
|
| 38403 |
-
"arena_elo": 1362.
|
| 38404 |
-
"arena_rank":
|
| 38405 |
-
"arena_votes":
|
| 38406 |
},
|
| 38407 |
{
|
| 38408 |
"name": "grok-3-mini-beta",
|
| 38409 |
"arena_name": "grok-3-mini-beta",
|
| 38410 |
"arena_org": "xAI",
|
| 38411 |
-
"arena_elo": 1356.
|
| 38412 |
-
"arena_rank":
|
| 38413 |
-
"arena_votes":
|
| 38414 |
},
|
| 38415 |
{
|
| 38416 |
"name": "intellect-3",
|
| 38417 |
"arena_name": "intellect-3",
|
| 38418 |
"arena_org": "Prime Intellect",
|
| 38419 |
-
"arena_elo": 1356.
|
| 38420 |
-
"arena_rank":
|
| 38421 |
-
"arena_votes":
|
| 38422 |
},
|
| 38423 |
{
|
| 38424 |
"name": "mistral-small-2506",
|
| 38425 |
"arena_name": "mistral-small-2506",
|
| 38426 |
"arena_org": "Mistral",
|
| 38427 |
-
"arena_elo": 1356.
|
| 38428 |
-
"arena_rank":
|
| 38429 |
-
"arena_votes":
|
| 38430 |
},
|
| 38431 |
{
|
| 38432 |
"name": "glm-4.5v",
|
| 38433 |
"arena_name": "glm-4.5v",
|
| 38434 |
"arena_org": "Z.ai",
|
| 38435 |
-
"arena_elo":
|
| 38436 |
-
"arena_rank":
|
| 38437 |
-
"arena_votes":
|
| 38438 |
},
|
| 38439 |
{
|
| 38440 |
"name": "amazon-nova-experimental-chat-10-20",
|
| 38441 |
"arena_name": "amazon-nova-experimental-chat-10-20",
|
| 38442 |
"arena_org": "Amazon",
|
| 38443 |
-
"arena_elo": 1350.
|
| 38444 |
-
"arena_rank":
|
| 38445 |
-
"arena_votes":
|
| 38446 |
},
|
| 38447 |
{
|
| 38448 |
"name": "hunyuan-turbos-20250226",
|
| 38449 |
"arena_name": "hunyuan-turbos-20250226",
|
| 38450 |
"arena_org": "Tencent",
|
| 38451 |
-
"arena_elo": 1348.
|
| 38452 |
-
"arena_rank":
|
| 38453 |
"arena_votes": 2226
|
| 38454 |
},
|
| 38455 |
{
|
| 38456 |
"name": "amazon-nova-experimental-chat-10-09",
|
| 38457 |
"arena_name": "amazon-nova-experimental-chat-10-09",
|
| 38458 |
"arena_org": "Amazon",
|
| 38459 |
-
"arena_elo": 1347.
|
| 38460 |
-
"arena_rank":
|
| 38461 |
-
"arena_votes":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38462 |
},
|
| 38463 |
{
|
| 38464 |
"name": "llama-3.1-nemotron-ultra-253b-v1",
|
| 38465 |
"arena_name": "llama-3.1-nemotron-ultra-253b-v1",
|
| 38466 |
"arena_org": "Nvidia",
|
| 38467 |
-
"arena_elo": 1347.
|
| 38468 |
-
"arena_rank":
|
| 38469 |
"arena_votes": 2546
|
| 38470 |
},
|
| 38471 |
-
{
|
| 38472 |
-
"name": "ling-flash-2.0",
|
| 38473 |
-
"arena_name": "ling-flash-2.0",
|
| 38474 |
-
"arena_org": "Ant Group",
|
| 38475 |
-
"arena_elo": 1346.92,
|
| 38476 |
-
"arena_rank": 130,
|
| 38477 |
-
"arena_votes": 6995
|
| 38478 |
-
},
|
| 38479 |
{
|
| 38480 |
"name": "step-3",
|
| 38481 |
"arena_name": "step-3",
|
| 38482 |
"arena_org": "StepFun",
|
| 38483 |
-
"arena_elo": 1346.
|
| 38484 |
-
"arena_rank":
|
| 38485 |
-
"arena_votes":
|
| 38486 |
},
|
| 38487 |
{
|
| 38488 |
"name": "qwen-plus-0125",
|
| 38489 |
"arena_name": "qwen-plus-0125",
|
| 38490 |
"arena_org": "Alibaba",
|
| 38491 |
-
"arena_elo": 1346.
|
| 38492 |
-
"arena_rank":
|
| 38493 |
"arena_votes": 5823
|
| 38494 |
},
|
| 38495 |
{
|
| 38496 |
"name": "glm-4-plus-0111",
|
| 38497 |
"arena_name": "glm-4-plus-0111",
|
| 38498 |
"arena_org": "Zhipu",
|
| 38499 |
-
"arena_elo": 1343.
|
| 38500 |
-
"arena_rank":
|
| 38501 |
"arena_votes": 5760
|
| 38502 |
},
|
| 38503 |
{
|
| 38504 |
"name": "nvidia-llama-3.3-nemotron-super-49b-v1.5",
|
| 38505 |
"arena_name": "nvidia-llama-3.3-nemotron-super-49b-v1.5",
|
| 38506 |
"arena_org": "Nvidia",
|
| 38507 |
-
"arena_elo": 1341.
|
| 38508 |
-
"arena_rank":
|
| 38509 |
-
"arena_votes":
|
| 38510 |
},
|
| 38511 |
{
|
| 38512 |
"name": "hunyuan-turbo-0110",
|
| 38513 |
"arena_name": "hunyuan-turbo-0110",
|
| 38514 |
"arena_org": "Tencent",
|
| 38515 |
-
"arena_elo": 1340.
|
| 38516 |
-
"arena_rank":
|
| 38517 |
"arena_votes": 2295
|
| 38518 |
},
|
| 38519 |
{
|
| 38520 |
"name": "nova-2-lite",
|
| 38521 |
"arena_name": "nova-2-lite",
|
| 38522 |
"arena_org": "Amazon",
|
| 38523 |
-
"arena_elo": 1337.
|
| 38524 |
-
"arena_rank":
|
| 38525 |
-
"arena_votes":
|
| 38526 |
-
},
|
| 38527 |
-
{
|
| 38528 |
-
"name": "llama-3.1-405b-instruct-bf16",
|
| 38529 |
-
"arena_name": "llama-3.1-405b-instruct-bf16",
|
| 38530 |
-
"arena_org": "Meta",
|
| 38531 |
-
"arena_elo": 1335.21,
|
| 38532 |
-
"arena_rank": 143,
|
| 38533 |
-
"arena_votes": 41392
|
| 38534 |
},
|
| 38535 |
{
|
| 38536 |
"name": "grok-2-2024-08-13",
|
| 38537 |
"arena_name": "grok-2-2024-08-13",
|
| 38538 |
"arena_org": "xAI",
|
| 38539 |
-
"arena_elo": 1335
|
| 38540 |
-
"arena_rank":
|
| 38541 |
"arena_votes": 63495
|
| 38542 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38543 |
{
|
| 38544 |
"name": "gemini-advanced-0514",
|
| 38545 |
"arena_name": "gemini-advanced-0514",
|
| 38546 |
"arena_org": "Google",
|
| 38547 |
-
"arena_elo": 1334.
|
| 38548 |
-
"arena_rank":
|
| 38549 |
"arena_votes": 50142
|
| 38550 |
},
|
| 38551 |
{
|
| 38552 |
"name": "step-2-16k-exp-202412",
|
| 38553 |
"arena_name": "step-2-16k-exp-202412",
|
| 38554 |
"arena_org": "StepFun",
|
| 38555 |
-
"arena_elo": 1334.
|
| 38556 |
-
"arena_rank":
|
| 38557 |
"arena_votes": 4829
|
| 38558 |
},
|
| 38559 |
{
|
| 38560 |
"name": "llama-3.1-405b-instruct-fp8",
|
| 38561 |
"arena_name": "llama-3.1-405b-instruct-fp8",
|
| 38562 |
"arena_org": "Meta",
|
| 38563 |
-
"arena_elo": 1333.
|
| 38564 |
-
"arena_rank":
|
| 38565 |
"arena_votes": 59655
|
| 38566 |
},
|
| 38567 |
{
|
| 38568 |
"name": "olmo-3.1-32b-instruct",
|
| 38569 |
"arena_name": "olmo-3.1-32b-instruct",
|
| 38570 |
"arena_org": "Ai2",
|
| 38571 |
-
"arena_elo": 1330.
|
| 38572 |
-
"arena_rank":
|
| 38573 |
-
"arena_votes":
|
| 38574 |
},
|
| 38575 |
{
|
| 38576 |
"name": "molmo-2-8b",
|
| 38577 |
"arena_name": "molmo-2-8b",
|
| 38578 |
"arena_org": "Ai2",
|
| 38579 |
-
"arena_elo":
|
| 38580 |
-
"arena_rank":
|
| 38581 |
-
"arena_votes":
|
| 38582 |
},
|
| 38583 |
{
|
| 38584 |
"name": "yi-lightning",
|
| 38585 |
"arena_name": "yi-lightning",
|
| 38586 |
"arena_org": "01 AI",
|
| 38587 |
-
"arena_elo": 1328.
|
| 38588 |
-
"arena_rank":
|
| 38589 |
"arena_votes": 27340,
|
| 38590 |
"aider_pass_rate": 0.496
|
| 38591 |
},
|
|
@@ -38593,24 +38688,24 @@
|
|
| 38593 |
"name": "llama-3.3-nemotron-49b-super-v1",
|
| 38594 |
"arena_name": "llama-3.3-nemotron-49b-super-v1",
|
| 38595 |
"arena_org": "Nvidia",
|
| 38596 |
-
"arena_elo": 1327.
|
| 38597 |
-
"arena_rank":
|
| 38598 |
"arena_votes": 2230
|
| 38599 |
},
|
| 38600 |
{
|
| 38601 |
"name": "hunyuan-large-2025-02-10",
|
| 38602 |
"arena_name": "hunyuan-large-2025-02-10",
|
| 38603 |
"arena_org": "Tencent",
|
| 38604 |
-
"arena_elo": 1326.
|
| 38605 |
-
"arena_rank":
|
| 38606 |
"arena_votes": 3738
|
| 38607 |
},
|
| 38608 |
{
|
| 38609 |
"name": "deepseek-v2.5-1210",
|
| 38610 |
"arena_name": "deepseek-v2.5-1210",
|
| 38611 |
"arena_org": "DeepSeek",
|
| 38612 |
-
"arena_elo": 1323.
|
| 38613 |
-
"arena_rank":
|
| 38614 |
"arena_votes": 6793,
|
| 38615 |
"aider_pass_rate": 0.586
|
| 38616 |
},
|
|
@@ -38618,8 +38713,8 @@
|
|
| 38618 |
"name": "gemini-1.5-pro-001",
|
| 38619 |
"arena_name": "gemini-1.5-pro-001",
|
| 38620 |
"arena_org": "Google",
|
| 38621 |
-
"arena_elo":
|
| 38622 |
-
"arena_rank":
|
| 38623 |
"arena_votes": 79132,
|
| 38624 |
"aider_pass_rate": 0.45899999999999996
|
| 38625 |
},
|
|
@@ -38627,88 +38722,88 @@
|
|
| 38627 |
"name": "llama-4-scout-17b-16e-instruct",
|
| 38628 |
"arena_name": "llama-4-scout-17b-16e-instruct",
|
| 38629 |
"arena_org": "Meta",
|
| 38630 |
-
"arena_elo": 1322.
|
| 38631 |
-
"arena_rank":
|
| 38632 |
-
"arena_votes":
|
| 38633 |
},
|
| 38634 |
{
|
| 38635 |
"name": "step-1o-turbo-202506",
|
| 38636 |
"arena_name": "step-1o-turbo-202506",
|
| 38637 |
"arena_org": "StepFun",
|
| 38638 |
-
"arena_elo": 1321.
|
| 38639 |
-
"arena_rank":
|
| 38640 |
-
"arena_votes":
|
| 38641 |
},
|
| 38642 |
{
|
| 38643 |
"name": "ring-flash-2.0",
|
| 38644 |
"arena_name": "ring-flash-2.0",
|
| 38645 |
"arena_org": "Ant Group",
|
| 38646 |
-
"arena_elo": 1320.
|
| 38647 |
-
"arena_rank":
|
| 38648 |
-
"arena_votes":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38649 |
},
|
| 38650 |
{
|
| 38651 |
"name": "glm-4-plus",
|
| 38652 |
"arena_name": "glm-4-plus",
|
| 38653 |
"arena_org": "Zhipu AI",
|
| 38654 |
-
"arena_elo": 1319.
|
| 38655 |
-
"arena_rank":
|
| 38656 |
"arena_votes": 26134
|
| 38657 |
},
|
| 38658 |
-
{
|
| 38659 |
-
"name": "gemma-3n-e4b-it",
|
| 38660 |
-
"arena_name": "gemma-3n-e4b-it",
|
| 38661 |
-
"arena_org": "Google",
|
| 38662 |
-
"arena_elo": 1319.29,
|
| 38663 |
-
"arena_rank": 167,
|
| 38664 |
-
"arena_votes": 23193
|
| 38665 |
-
},
|
| 38666 |
{
|
| 38667 |
"name": "qwen-max-0919",
|
| 38668 |
"arena_name": "qwen-max-0919",
|
| 38669 |
"arena_org": "Alibaba",
|
| 38670 |
-
"arena_elo":
|
| 38671 |
-
"arena_rank":
|
| 38672 |
"arena_votes": 16479
|
| 38673 |
},
|
| 38674 |
-
{
|
| 38675 |
-
"name": "gpt-oss-20b",
|
| 38676 |
-
"arena_name": "gpt-oss-20b",
|
| 38677 |
-
"arena_org": "OpenAI",
|
| 38678 |
-
"arena_elo": 1317.02,
|
| 38679 |
-
"arena_rank": 170,
|
| 38680 |
-
"arena_votes": 10758
|
| 38681 |
-
},
|
| 38682 |
{
|
| 38683 |
"name": "nvidia-nemotron-3-nano-30b-a3b-bf16",
|
| 38684 |
"arena_name": "nvidia-nemotron-3-nano-30b-a3b-bf16",
|
| 38685 |
"arena_org": "Nvidia",
|
| 38686 |
-
"arena_elo": 1317,
|
| 38687 |
-
"arena_rank":
|
| 38688 |
-
"arena_votes":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38689 |
},
|
| 38690 |
{
|
| 38691 |
"name": "qwen2.5-plus-1127",
|
| 38692 |
"arena_name": "qwen2.5-plus-1127",
|
| 38693 |
"arena_org": "Alibaba",
|
| 38694 |
-
"arena_elo": 1315.
|
| 38695 |
-
"arena_rank":
|
| 38696 |
"arena_votes": 10179
|
| 38697 |
},
|
| 38698 |
{
|
| 38699 |
"name": "athene-v2-chat",
|
| 38700 |
"arena_name": "athene-v2-chat",
|
| 38701 |
"arena_org": "NexusFlow",
|
| 38702 |
-
"arena_elo": 1314.
|
| 38703 |
-
"arena_rank":
|
| 38704 |
"arena_votes": 24746
|
| 38705 |
},
|
| 38706 |
{
|
| 38707 |
"name": "gpt-4-1106-preview",
|
| 38708 |
"arena_name": "gpt-4-1106-preview",
|
| 38709 |
"arena_org": "OpenAI",
|
| 38710 |
-
"arena_elo":
|
| 38711 |
-
"arena_rank":
|
| 38712 |
"arena_votes": 100107,
|
| 38713 |
"aider_pass_rate": 0.519
|
| 38714 |
},
|
|
@@ -38716,96 +38811,96 @@
|
|
| 38716 |
"name": "hunyuan-standard-2025-02-10",
|
| 38717 |
"arena_name": "hunyuan-standard-2025-02-10",
|
| 38718 |
"arena_org": "Tencent",
|
| 38719 |
-
"arena_elo": 1311.
|
| 38720 |
-
"arena_rank":
|
| 38721 |
"arena_votes": 3905
|
| 38722 |
},
|
| 38723 |
{
|
| 38724 |
"name": "mercury",
|
| 38725 |
"arena_name": "mercury",
|
| 38726 |
"arena_org": "Inception AI",
|
| 38727 |
-
"arena_elo": 1308.
|
| 38728 |
-
"arena_rank":
|
| 38729 |
-
"arena_votes":
|
| 38730 |
},
|
| 38731 |
{
|
| 38732 |
"name": "grok-2-mini-2024-08-13",
|
| 38733 |
"arena_name": "grok-2-mini-2024-08-13",
|
| 38734 |
"arena_org": "xAI",
|
| 38735 |
-
"arena_elo": 1307.
|
| 38736 |
-
"arena_rank":
|
| 38737 |
"arena_votes": 52574
|
| 38738 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38739 |
{
|
| 38740 |
"name": "athene-70b-0725",
|
| 38741 |
"arena_name": "athene-70b-0725",
|
| 38742 |
"arena_org": "NexusFlow",
|
| 38743 |
-
"arena_elo": 1305.
|
| 38744 |
-
"arena_rank":
|
| 38745 |
"arena_votes": 19622
|
| 38746 |
},
|
| 38747 |
-
{
|
| 38748 |
-
"name": "olmo-3-32b-think",
|
| 38749 |
-
"arena_name": "olmo-3-32b-think",
|
| 38750 |
-
"arena_org": "Ai2",
|
| 38751 |
-
"arena_elo": 1305.64,
|
| 38752 |
-
"arena_rank": 183,
|
| 38753 |
-
"arena_votes": 5868
|
| 38754 |
-
},
|
| 38755 |
{
|
| 38756 |
"name": "magistral-medium-2506",
|
| 38757 |
"arena_name": "magistral-medium-2506",
|
| 38758 |
"arena_org": "Mistral",
|
| 38759 |
-
"arena_elo": 1304.
|
| 38760 |
-
"arena_rank":
|
| 38761 |
-
"arena_votes":
|
| 38762 |
},
|
| 38763 |
{
|
| 38764 |
"name": "mistral-small-3.1-24b-instruct-2503",
|
| 38765 |
"arena_name": "mistral-small-3.1-24b-instruct-2503",
|
| 38766 |
"arena_org": "Mistral",
|
| 38767 |
-
"arena_elo": 1304.
|
| 38768 |
-
"arena_rank":
|
| 38769 |
-
"arena_votes":
|
| 38770 |
},
|
| 38771 |
{
|
| 38772 |
"name": "hunyuan-large-vision",
|
| 38773 |
"arena_name": "hunyuan-large-vision",
|
| 38774 |
"arena_org": "Tencent",
|
| 38775 |
-
"arena_elo": 1295.
|
| 38776 |
-
"arena_rank":
|
| 38777 |
-
"arena_votes":
|
| 38778 |
},
|
| 38779 |
{
|
| 38780 |
"name": "amazon-nova-pro-v1.0",
|
| 38781 |
"arena_name": "amazon-nova-pro-v1.0",
|
| 38782 |
"arena_org": "Amazon",
|
| 38783 |
-
"arena_elo": 1290.
|
| 38784 |
-
"arena_rank":
|
| 38785 |
"arena_votes": 24753
|
| 38786 |
},
|
| 38787 |
{
|
| 38788 |
"name": "reka-core-20240904",
|
| 38789 |
"arena_name": "reka-core-20240904",
|
| 38790 |
"arena_org": "Reka AI",
|
| 38791 |
-
"arena_elo": 1287.
|
| 38792 |
-
"arena_rank":
|
| 38793 |
"arena_votes": 7309
|
| 38794 |
},
|
| 38795 |
{
|
| 38796 |
"name": "ibm-granite-h-small",
|
| 38797 |
"arena_name": "ibm-granite-h-small",
|
| 38798 |
"arena_org": "IBM",
|
| 38799 |
-
"arena_elo":
|
| 38800 |
-
"arena_rank":
|
| 38801 |
-
"arena_votes":
|
| 38802 |
},
|
| 38803 |
{
|
| 38804 |
"name": "gpt-4-0314",
|
| 38805 |
"arena_name": "gpt-4-0314",
|
| 38806 |
"arena_org": "OpenAI",
|
| 38807 |
-
"arena_elo": 1286.
|
| 38808 |
-
"arena_rank":
|
| 38809 |
"arena_votes": 54167,
|
| 38810 |
"aider_pass_rate": 0.504
|
| 38811 |
},
|
|
@@ -38813,168 +38908,168 @@
|
|
| 38813 |
"name": "llama-3.1-nemotron-51b-instruct",
|
| 38814 |
"arena_name": "llama-3.1-nemotron-51b-instruct",
|
| 38815 |
"arena_org": "Nvidia",
|
| 38816 |
-
"arena_elo":
|
| 38817 |
-
"arena_rank":
|
| 38818 |
"arena_votes": 3749
|
| 38819 |
},
|
| 38820 |
{
|
| 38821 |
"name": "gemini-1.5-flash-001",
|
| 38822 |
"arena_name": "gemini-1.5-flash-001",
|
| 38823 |
"arena_org": "Google",
|
| 38824 |
-
"arena_elo": 1285.
|
| 38825 |
-
"arena_rank":
|
| 38826 |
"arena_votes": 62823
|
| 38827 |
},
|
| 38828 |
{
|
| 38829 |
"name": "olmo-3.1-32b-think",
|
| 38830 |
"arena_name": "olmo-3.1-32b-think",
|
| 38831 |
"arena_org": "Ai2",
|
| 38832 |
-
"arena_elo":
|
| 38833 |
-
"arena_rank":
|
| 38834 |
-
"arena_votes":
|
| 38835 |
},
|
| 38836 |
{
|
| 38837 |
"name": "nemotron-4-340b-instruct",
|
| 38838 |
"arena_name": "nemotron-4-340b-instruct",
|
| 38839 |
"arena_org": "Nvidia",
|
| 38840 |
-
"arena_elo": 1277.
|
| 38841 |
-
"arena_rank":
|
| 38842 |
"arena_votes": 19661
|
| 38843 |
},
|
| 38844 |
{
|
| 38845 |
"name": "llama-3-70b-instruct",
|
| 38846 |
"arena_name": "llama-3-70b-instruct",
|
| 38847 |
"arena_org": "Meta",
|
| 38848 |
-
"arena_elo": 1275.
|
| 38849 |
-
"arena_rank":
|
| 38850 |
"arena_votes": 156880
|
| 38851 |
},
|
| 38852 |
{
|
| 38853 |
"name": "mistral-small-24b-instruct-2501",
|
| 38854 |
"arena_name": "mistral-small-24b-instruct-2501",
|
| 38855 |
"arena_org": "Mistral",
|
| 38856 |
-
"arena_elo": 1273.
|
| 38857 |
-
"arena_rank":
|
| 38858 |
"arena_votes": 14677
|
| 38859 |
},
|
| 38860 |
{
|
| 38861 |
"name": "glm-4-0520",
|
| 38862 |
"arena_name": "glm-4-0520",
|
| 38863 |
"arena_org": "Zhipu AI",
|
| 38864 |
-
"arena_elo": 1273.
|
| 38865 |
-
"arena_rank":
|
| 38866 |
"arena_votes": 9788
|
| 38867 |
},
|
| 38868 |
{
|
| 38869 |
"name": "reka-flash-20240904",
|
| 38870 |
"arena_name": "reka-flash-20240904",
|
| 38871 |
"arena_org": "Reka AI",
|
| 38872 |
-
"arena_elo":
|
| 38873 |
-
"arena_rank":
|
| 38874 |
"arena_votes": 7537
|
| 38875 |
},
|
| 38876 |
{
|
| 38877 |
"name": "c4ai-aya-expanse-32b",
|
| 38878 |
"arena_name": "c4ai-aya-expanse-32b",
|
| 38879 |
"arena_org": "Cohere",
|
| 38880 |
-
"arena_elo":
|
| 38881 |
-
"arena_rank":
|
| 38882 |
"arena_votes": 27123
|
| 38883 |
},
|
| 38884 |
{
|
| 38885 |
"name": "amazon-nova-lite-v1.0",
|
| 38886 |
"arena_name": "amazon-nova-lite-v1.0",
|
| 38887 |
"arena_org": "Amazon",
|
| 38888 |
-
"arena_elo": 1260.
|
| 38889 |
-
"arena_rank":
|
| 38890 |
"arena_votes": 19376
|
| 38891 |
},
|
| 38892 |
{
|
| 38893 |
"name": "olmo-2-0325-32b-instruct",
|
| 38894 |
"arena_name": "olmo-2-0325-32b-instruct",
|
| 38895 |
"arena_org": "Ai2",
|
| 38896 |
-
"arena_elo": 1251.
|
| 38897 |
-
"arena_rank":
|
| 38898 |
"arena_votes": 3335
|
| 38899 |
},
|
| 38900 |
{
|
| 38901 |
"name": "amazon-nova-micro-v1.0",
|
| 38902 |
"arena_name": "amazon-nova-micro-v1.0",
|
| 38903 |
"arena_org": "Amazon",
|
| 38904 |
-
"arena_elo": 1240.
|
| 38905 |
-
"arena_rank":
|
| 38906 |
"arena_votes": 19355
|
| 38907 |
},
|
| 38908 |
{
|
| 38909 |
"name": "ministral-8b-2410",
|
| 38910 |
"arena_name": "ministral-8b-2410",
|
| 38911 |
"arena_org": "Mistral",
|
| 38912 |
-
"arena_elo":
|
| 38913 |
-
"arena_rank":
|
| 38914 |
"arena_votes": 4780
|
| 38915 |
},
|
| 38916 |
{
|
| 38917 |
"name": "gemini-pro-dev-api",
|
| 38918 |
"arena_name": "gemini-pro-dev-api",
|
| 38919 |
"arena_org": "Google",
|
| 38920 |
-
"arena_elo":
|
| 38921 |
-
"arena_rank":
|
| 38922 |
"arena_votes": 18352
|
| 38923 |
},
|
| 38924 |
{
|
| 38925 |
"name": "hunyuan-standard-256k",
|
| 38926 |
"arena_name": "hunyuan-standard-256k",
|
| 38927 |
"arena_org": "Tencent",
|
| 38928 |
-
"arena_elo": 1233.
|
| 38929 |
-
"arena_rank":
|
| 38930 |
"arena_votes": 2729
|
| 38931 |
},
|
| 38932 |
{
|
| 38933 |
"name": "reka-flash-21b-20240226-online",
|
| 38934 |
"arena_name": "reka-flash-21b-20240226-online",
|
| 38935 |
"arena_org": "Reka AI",
|
| 38936 |
-
"arena_elo":
|
| 38937 |
-
"arena_rank":
|
| 38938 |
"arena_votes": 15451
|
| 38939 |
},
|
| 38940 |
{
|
| 38941 |
"name": "reka-flash-21b-20240226",
|
| 38942 |
"arena_name": "reka-flash-21b-20240226",
|
| 38943 |
"arena_org": "Reka AI",
|
| 38944 |
-
"arena_elo": 1226.
|
| 38945 |
-
"arena_rank":
|
| 38946 |
"arena_votes": 24806
|
| 38947 |
},
|
| 38948 |
{
|
| 38949 |
"name": "c4ai-aya-expanse-8b",
|
| 38950 |
"arena_name": "c4ai-aya-expanse-8b",
|
| 38951 |
"arena_org": "Cohere",
|
| 38952 |
-
"arena_elo":
|
| 38953 |
-
"arena_rank":
|
| 38954 |
"arena_votes": 9827
|
| 38955 |
},
|
| 38956 |
{
|
| 38957 |
"name": "mistral-medium",
|
| 38958 |
"arena_name": "mistral-medium",
|
| 38959 |
"arena_org": "Mistral",
|
| 38960 |
-
"arena_elo": 1222.
|
| 38961 |
-
"arena_rank":
|
| 38962 |
"arena_votes": 34552
|
| 38963 |
},
|
| 38964 |
{
|
| 38965 |
"name": "gemini-pro",
|
| 38966 |
"arena_name": "gemini-pro",
|
| 38967 |
"arena_org": "Google",
|
| 38968 |
-
"arena_elo": 1221.
|
| 38969 |
-
"arena_rank":
|
| 38970 |
"arena_votes": 6390
|
| 38971 |
},
|
| 38972 |
{
|
| 38973 |
"name": "gpt-3.5-turbo-1106",
|
| 38974 |
"arena_name": "gpt-3.5-turbo-1106",
|
| 38975 |
"arena_org": "OpenAI",
|
| 38976 |
-
"arena_elo": 1202
|
| 38977 |
-
"arena_rank":
|
| 38978 |
"arena_votes": 16616,
|
| 38979 |
"aider_pass_rate": 0.455
|
| 38980 |
},
|
|
@@ -38982,280 +39077,280 @@
|
|
| 38982 |
"name": "dbrx-instruct-preview",
|
| 38983 |
"arena_name": "dbrx-instruct-preview",
|
| 38984 |
"arena_org": "Databricks",
|
| 38985 |
-
"arena_elo": 1194.
|
| 38986 |
-
"arena_rank":
|
| 38987 |
"arena_votes": 32196
|
| 38988 |
},
|
| 38989 |
{
|
| 38990 |
"name": "wizardlm-70b",
|
| 38991 |
"arena_name": "wizardlm-70b",
|
| 38992 |
"arena_org": "Microsoft",
|
| 38993 |
-
"arena_elo": 1184.
|
| 38994 |
-
"arena_rank":
|
| 38995 |
"arena_votes": 8214
|
| 38996 |
},
|
| 38997 |
{
|
| 38998 |
"name": "snowflake-arctic-instruct",
|
| 38999 |
"arena_name": "snowflake-arctic-instruct",
|
| 39000 |
"arena_org": "Snowflake",
|
| 39001 |
-
"arena_elo": 1179.
|
| 39002 |
-
"arena_rank":
|
| 39003 |
"arena_votes": 32836
|
| 39004 |
},
|
| 39005 |
{
|
| 39006 |
"name": "tulu-2-dpo-70b",
|
| 39007 |
"arena_name": "tulu-2-dpo-70b",
|
| 39008 |
"arena_org": "AllenAI/UW",
|
| 39009 |
-
"arena_elo":
|
| 39010 |
-
"arena_rank":
|
| 39011 |
"arena_votes": 6534
|
| 39012 |
},
|
| 39013 |
{
|
| 39014 |
"name": "vicuna-33b",
|
| 39015 |
"arena_name": "vicuna-33b",
|
| 39016 |
"arena_org": "LMSYS",
|
| 39017 |
-
"arena_elo": 1172.
|
| 39018 |
-
"arena_rank":
|
| 39019 |
"arena_votes": 22479
|
| 39020 |
},
|
| 39021 |
{
|
| 39022 |
"name": "llama-2-70b-chat",
|
| 39023 |
"arena_name": "llama-2-70b-chat",
|
| 39024 |
"arena_org": "Meta",
|
| 39025 |
-
"arena_elo": 1170.
|
| 39026 |
-
"arena_rank":
|
| 39027 |
"arena_votes": 38491
|
| 39028 |
},
|
| 39029 |
{
|
| 39030 |
"name": "llama2-70b-steerlm-chat",
|
| 39031 |
"arena_name": "llama2-70b-steerlm-chat",
|
| 39032 |
"arena_org": "Nvidia",
|
| 39033 |
-
"arena_elo":
|
| 39034 |
-
"arena_rank":
|
| 39035 |
"arena_votes": 3584
|
| 39036 |
},
|
| 39037 |
{
|
| 39038 |
"name": "dolphin-2.2.1-mistral-7b",
|
| 39039 |
"arena_name": "dolphin-2.2.1-mistral-7b",
|
| 39040 |
"arena_org": "Cognitive Computations",
|
| 39041 |
-
"arena_elo": 1151.
|
| 39042 |
-
"arena_rank":
|
| 39043 |
"arena_votes": 1679
|
| 39044 |
},
|
| 39045 |
{
|
| 39046 |
"name": "mpt-30b-chat",
|
| 39047 |
"arena_name": "mpt-30b-chat",
|
| 39048 |
"arena_org": "MosaicML",
|
| 39049 |
-
"arena_elo":
|
| 39050 |
-
"arena_rank":
|
| 39051 |
"arena_votes": 2571
|
| 39052 |
},
|
| 39053 |
{
|
| 39054 |
"name": "wizardlm-13b",
|
| 39055 |
"arena_name": "wizardlm-13b",
|
| 39056 |
"arena_org": "Microsoft",
|
| 39057 |
-
"arena_elo":
|
| 39058 |
-
"arena_rank":
|
| 39059 |
"arena_votes": 7046
|
| 39060 |
},
|
| 39061 |
{
|
| 39062 |
"name": "falcon-180b-chat",
|
| 39063 |
"arena_name": "falcon-180b-chat",
|
| 39064 |
"arena_org": "TII",
|
| 39065 |
-
"arena_elo": 1146.
|
| 39066 |
-
"arena_rank":
|
| 39067 |
"arena_votes": 1295
|
| 39068 |
},
|
| 39069 |
{
|
| 39070 |
"name": "phi-3-mini-4k-instruct-june-2024",
|
| 39071 |
"arena_name": "phi-3-mini-4k-instruct-june-2024",
|
| 39072 |
"arena_org": "Microsoft",
|
| 39073 |
-
"arena_elo": 1142.
|
| 39074 |
-
"arena_rank":
|
| 39075 |
"arena_votes": 12296
|
| 39076 |
},
|
| 39077 |
{
|
| 39078 |
"name": "llama-2-13b-chat",
|
| 39079 |
"arena_name": "llama-2-13b-chat",
|
| 39080 |
"arena_org": "Meta",
|
| 39081 |
-
"arena_elo": 1141.
|
| 39082 |
-
"arena_rank":
|
| 39083 |
"arena_votes": 19171
|
| 39084 |
},
|
| 39085 |
{
|
| 39086 |
"name": "vicuna-13b",
|
| 39087 |
"arena_name": "vicuna-13b",
|
| 39088 |
"arena_org": "LMSYS",
|
| 39089 |
-
"arena_elo": 1140.
|
| 39090 |
-
"arena_rank":
|
| 39091 |
"arena_votes": 19366
|
| 39092 |
},
|
| 39093 |
{
|
| 39094 |
"name": "qwen-14b-chat",
|
| 39095 |
"arena_name": "qwen-14b-chat",
|
| 39096 |
"arena_org": "Alibaba",
|
| 39097 |
-
"arena_elo": 1138.
|
| 39098 |
-
"arena_rank":
|
| 39099 |
"arena_votes": 4964
|
| 39100 |
},
|
| 39101 |
{
|
| 39102 |
"name": "palm-2",
|
| 39103 |
"arena_name": "palm-2",
|
| 39104 |
"arena_org": "Google",
|
| 39105 |
-
"arena_elo":
|
| 39106 |
-
"arena_rank":
|
| 39107 |
"arena_votes": 8554
|
| 39108 |
},
|
| 39109 |
{
|
| 39110 |
"name": "codellama-34b-instruct",
|
| 39111 |
"arena_name": "codellama-34b-instruct",
|
| 39112 |
"arena_org": "Meta",
|
| 39113 |
-
"arena_elo": 1136.
|
| 39114 |
-
"arena_rank":
|
| 39115 |
"arena_votes": 7363
|
| 39116 |
},
|
| 39117 |
{
|
| 39118 |
"name": "guanaco-33b",
|
| 39119 |
"arena_name": "guanaco-33b",
|
| 39120 |
"arena_org": "UW",
|
| 39121 |
-
"arena_elo":
|
| 39122 |
-
"arena_rank":
|
| 39123 |
"arena_votes": 2921
|
| 39124 |
},
|
| 39125 |
{
|
| 39126 |
"name": "stripedhyena-nous-7b",
|
| 39127 |
"arena_name": "stripedhyena-nous-7b",
|
| 39128 |
"arena_org": "Together AI",
|
| 39129 |
-
"arena_elo": 1120.
|
| 39130 |
-
"arena_rank":
|
| 39131 |
"arena_votes": 5184
|
| 39132 |
},
|
| 39133 |
{
|
| 39134 |
"name": "codellama-70b-instruct",
|
| 39135 |
"arena_name": "codellama-70b-instruct",
|
| 39136 |
"arena_org": "Meta",
|
| 39137 |
-
"arena_elo": 1118.
|
| 39138 |
-
"arena_rank":
|
| 39139 |
"arena_votes": 1143
|
| 39140 |
},
|
| 39141 |
{
|
| 39142 |
"name": "vicuna-7b",
|
| 39143 |
"arena_name": "vicuna-7b",
|
| 39144 |
"arena_org": "LMSYS",
|
| 39145 |
-
"arena_elo": 1114.
|
| 39146 |
-
"arena_rank":
|
| 39147 |
"arena_votes": 6923
|
| 39148 |
},
|
| 39149 |
{
|
| 39150 |
"name": "mistral-7b-instruct",
|
| 39151 |
"arena_name": "mistral-7b-instruct",
|
| 39152 |
"arena_org": "Mistral",
|
| 39153 |
-
"arena_elo": 1109.
|
| 39154 |
-
"arena_rank":
|
| 39155 |
"arena_votes": 8977
|
| 39156 |
},
|
| 39157 |
{
|
| 39158 |
"name": "llama-2-7b-chat",
|
| 39159 |
"arena_name": "llama-2-7b-chat",
|
| 39160 |
"arena_org": "Meta",
|
| 39161 |
-
"arena_elo":
|
| 39162 |
-
"arena_rank":
|
| 39163 |
"arena_votes": 14148
|
| 39164 |
},
|
| 39165 |
{
|
| 39166 |
"name": "olmo-7b-instruct",
|
| 39167 |
"arena_name": "olmo-7b-instruct",
|
| 39168 |
"arena_org": "Ai2",
|
| 39169 |
-
"arena_elo": 1074.
|
| 39170 |
-
"arena_rank":
|
| 39171 |
"arena_votes": 6329
|
| 39172 |
},
|
| 39173 |
{
|
| 39174 |
"name": "koala-13b",
|
| 39175 |
"arena_name": "koala-13b",
|
| 39176 |
"arena_org": "UC Berkeley",
|
| 39177 |
-
"arena_elo":
|
| 39178 |
-
"arena_rank":
|
| 39179 |
"arena_votes": 6964
|
| 39180 |
},
|
| 39181 |
{
|
| 39182 |
"name": "alpaca-13b",
|
| 39183 |
"arena_name": "alpaca-13b",
|
| 39184 |
"arena_org": "Stanford",
|
| 39185 |
-
"arena_elo":
|
| 39186 |
-
"arena_rank":
|
| 39187 |
"arena_votes": 5745
|
| 39188 |
},
|
| 39189 |
{
|
| 39190 |
"name": "gpt4all-13b-snoozy",
|
| 39191 |
"arena_name": "gpt4all-13b-snoozy",
|
| 39192 |
"arena_org": "Nomic AI",
|
| 39193 |
-
"arena_elo": 1065.
|
| 39194 |
-
"arena_rank":
|
| 39195 |
"arena_votes": 1743
|
| 39196 |
},
|
| 39197 |
{
|
| 39198 |
"name": "mpt-7b-chat",
|
| 39199 |
"arena_name": "mpt-7b-chat",
|
| 39200 |
"arena_org": "MosaicML",
|
| 39201 |
-
"arena_elo": 1061.
|
| 39202 |
-
"arena_rank":
|
| 39203 |
"arena_votes": 3925
|
| 39204 |
},
|
| 39205 |
{
|
| 39206 |
"name": "chatglm3-6b",
|
| 39207 |
"arena_name": "chatglm3-6b",
|
| 39208 |
"arena_org": "Tsinghua",
|
| 39209 |
-
"arena_elo": 1055.
|
| 39210 |
-
"arena_rank":
|
| 39211 |
"arena_votes": 4658
|
| 39212 |
},
|
| 39213 |
{
|
| 39214 |
"name": "RWKV-4-Raven-14B",
|
| 39215 |
"arena_name": "RWKV-4-Raven-14B",
|
| 39216 |
"arena_org": "RWKV",
|
| 39217 |
-
"arena_elo":
|
| 39218 |
-
"arena_rank":
|
| 39219 |
"arena_votes": 4845
|
| 39220 |
},
|
| 39221 |
{
|
| 39222 |
"name": "chatglm2-6b",
|
| 39223 |
"arena_name": "chatglm2-6b",
|
| 39224 |
"arena_org": "Tsinghua",
|
| 39225 |
-
"arena_elo":
|
| 39226 |
-
"arena_rank":
|
| 39227 |
"arena_votes": 2657
|
| 39228 |
},
|
| 39229 |
{
|
| 39230 |
"name": "oasst-pythia-12b",
|
| 39231 |
"arena_name": "oasst-pythia-12b",
|
| 39232 |
"arena_org": "OpenAssistant",
|
| 39233 |
-
"arena_elo":
|
| 39234 |
-
"arena_rank":
|
| 39235 |
"arena_votes": 6311
|
| 39236 |
},
|
| 39237 |
{
|
| 39238 |
"name": "chatglm-6b",
|
| 39239 |
"arena_name": "chatglm-6b",
|
| 39240 |
"arena_org": "Tsinghua",
|
| 39241 |
-
"arena_elo": 995.
|
| 39242 |
-
"arena_rank":
|
| 39243 |
"arena_votes": 4914
|
| 39244 |
},
|
| 39245 |
{
|
| 39246 |
"name": "fastchat-t5-3b",
|
| 39247 |
"arena_name": "fastchat-t5-3b",
|
| 39248 |
"arena_org": "LMSYS",
|
| 39249 |
-
"arena_elo":
|
| 39250 |
-
"arena_rank":
|
| 39251 |
"arena_votes": 4203
|
| 39252 |
},
|
| 39253 |
{
|
| 39254 |
"name": "stablelm-tuned-alpha-7b",
|
| 39255 |
"arena_name": "stablelm-tuned-alpha-7b",
|
| 39256 |
"arena_org": "Stability AI",
|
| 39257 |
-
"arena_elo": 952.
|
| 39258 |
-
"arena_rank":
|
| 39259 |
"arena_votes": 3287
|
| 39260 |
},
|
| 39261 |
{
|
|
|
|
| 7 |
"gpqa": 0.369,
|
| 8 |
"arc": 0.93,
|
| 9 |
"gsm8k": 0.87,
|
| 10 |
+
"arena_elo": 1288.48,
|
| 11 |
+
"arena_rank": 200,
|
| 12 |
"arena_votes": 8659
|
| 13 |
},
|
| 14 |
{
|
|
|
|
| 19 |
"gpqa": 0.323,
|
| 20 |
"arc": 0.857,
|
| 21 |
"gsm8k": 0.758,
|
| 22 |
+
"arena_elo": 1238.74,
|
| 23 |
+
"arena_rank": 232,
|
| 24 |
"arena_votes": 8854
|
| 25 |
},
|
| 26 |
{
|
|
|
|
| 82 |
"lb_language": 0.39707333333333333,
|
| 83 |
"lb_if": 0.6187925,
|
| 84 |
"lb_data_analysis": 0.5411900000000001,
|
| 85 |
+
"arena_elo": 1323.1,
|
| 86 |
+
"arena_rank": 165,
|
| 87 |
+
"arena_votes": 70951,
|
| 88 |
"aider_pass_rate": 0.617
|
| 89 |
},
|
| 90 |
{
|
|
|
|
| 106 |
"lb_language": 0.56937,
|
| 107 |
"lb_if": 0.7229999999999999,
|
| 108 |
"lb_data_analysis": 0.5411,
|
| 109 |
+
"arena_elo": 1342.05,
|
| 110 |
+
"arena_rank": 142,
|
| 111 |
"arena_votes": 82417,
|
| 112 |
"aider_pass_rate": 0.5710000000000001
|
| 113 |
},
|
|
|
|
| 132 |
"lb_language": 0.54477,
|
| 133 |
"lb_if": 0.69296,
|
| 134 |
"lb_data_analysis": 0.5618650000000001,
|
| 135 |
+
"arena_elo": 1371.91,
|
| 136 |
+
"arena_rank": 110,
|
| 137 |
+
"arena_votes": 89270,
|
| 138 |
"aider_pass_rate": 0.6920000000000001
|
| 139 |
},
|
| 140 |
{
|
|
|
|
| 157 |
"lb_language": 0.30073333333333335,
|
| 158 |
"lb_if": 0.6402924999999999,
|
| 159 |
"lb_data_analysis": 0.3731,
|
| 160 |
+
"arena_elo": 1260.71,
|
| 161 |
+
"arena_rank": 224,
|
| 162 |
"arena_votes": 117705,
|
| 163 |
"aider_pass_rate": 0.406
|
| 164 |
},
|
|
|
|
| 183 |
"lb_language": 0.53574,
|
| 184 |
"lb_if": 0.6388750000000001,
|
| 185 |
"lb_data_analysis": 0.5784,
|
| 186 |
+
"arena_elo": 1321.55,
|
| 187 |
+
"arena_rank": 170,
|
| 188 |
"arena_votes": 194904,
|
| 189 |
"aider_pass_rate": 0.534
|
| 190 |
},
|
|
|
|
| 209 |
"lb_language": 0.38083333333333336,
|
| 210 |
"lb_if": 0.6500425,
|
| 211 |
"lb_data_analysis": 0.38839999999999997,
|
| 212 |
+
"arena_elo": 1280.61,
|
| 213 |
+
"arena_rank": 209,
|
| 214 |
"arena_votes": 109289,
|
| 215 |
"aider_pass_rate": 0.436
|
| 216 |
},
|
|
|
|
| 228 |
"lb_language": 0.14644333333333334,
|
| 229 |
"lb_if": 0.571625,
|
| 230 |
"lb_data_analysis": 0.2354,
|
| 231 |
+
"arena_elo": 1226.58,
|
| 232 |
+
"arena_rank": 240,
|
| 233 |
"arena_votes": 54038
|
| 234 |
},
|
| 235 |
{
|
|
|
|
| 249 |
"lb_language": 0.5477066666666667,
|
| 250 |
"lb_if": 0.8050825,
|
| 251 |
"lb_data_analysis": 0.69625,
|
| 252 |
+
"arena_elo": 1397.59,
|
| 253 |
+
"arena_rank": 82,
|
| 254 |
"arena_votes": 18537
|
| 255 |
},
|
| 256 |
{
|
|
|
|
| 268 |
"lb_language": 0.3518266666666667,
|
| 269 |
"lb_if": 0.6915024999999999,
|
| 270 |
"lb_data_analysis": 0.4417,
|
| 271 |
+
"arena_elo": 1306.88,
|
| 272 |
+
"arena_rank": 188,
|
| 273 |
"arena_votes": 24574,
|
| 274 |
"aider_pass_rate": 0.5489999999999999
|
| 275 |
},
|
|
|
|
| 289 |
"lb_language": 0.47484666666666664,
|
| 290 |
"lb_if": 0.75246,
|
| 291 |
"lb_data_analysis": 0.6241,
|
| 292 |
+
"arena_elo": 1358.39,
|
| 293 |
+
"arena_rank": 121,
|
| 294 |
"arena_votes": 21788
|
| 295 |
},
|
| 296 |
{
|
|
|
|
| 385 |
"lb_language": 0.32621666666666665,
|
| 386 |
"lb_if": 0.5810025000000001,
|
| 387 |
"lb_data_analysis": 0.4481,
|
| 388 |
+
"arena_elo": 1287.89,
|
| 389 |
+
"arena_rank": 201,
|
| 390 |
"arena_votes": 75764
|
| 391 |
},
|
| 392 |
{
|
|
|
|
| 415 |
"lb_language": 0.25531,
|
| 416 |
"lb_if": 0.52621,
|
| 417 |
"lb_data_analysis": 0.2959,
|
| 418 |
+
"arena_elo": 1265.32,
|
| 419 |
+
"arena_rank": 220,
|
| 420 |
"arena_votes": 54615
|
| 421 |
},
|
| 422 |
{
|
|
|
|
| 452 |
"hf_math_lvl5": 0.3806646525679758,
|
| 453 |
"hf_musr": 0.45806250000000004,
|
| 454 |
"hf_avg": 43.409948245645786,
|
| 455 |
+
"arena_elo": 1293.3,
|
| 456 |
+
"arena_rank": 198,
|
| 457 |
"arena_votes": 55234,
|
| 458 |
"aider_pass_rate": 0.436
|
| 459 |
},
|
|
|
|
| 477 |
"hf_math_lvl5": 0.1729607250755287,
|
| 478 |
"hf_musr": 0.3845416666666666,
|
| 479 |
"hf_avg": 28.01011138792457,
|
| 480 |
+
"arena_elo": 1211.38,
|
| 481 |
+
"arena_rank": 250,
|
| 482 |
"arena_votes": 49605,
|
| 483 |
"aider_pass_rate": 0.263
|
| 484 |
},
|
|
|
|
| 509 |
"hf_math_lvl5": 0.17673716012084592,
|
| 510 |
"hf_musr": 0.3528541666666667,
|
| 511 |
"hf_avg": 24.204650807793456,
|
| 512 |
+
"arena_elo": 1166.35,
|
| 513 |
+
"arena_rank": 276,
|
| 514 |
"arena_votes": 7936
|
| 515 |
},
|
| 516 |
{
|
|
|
|
| 538 |
"hf_math_lvl5": 0.48338368580060426,
|
| 539 |
"hf_musr": 0.44612500000000005,
|
| 540 |
"hf_avg": 44.84747145129876,
|
| 541 |
+
"arena_elo": 1318.98,
|
| 542 |
+
"arena_rank": 174,
|
| 543 |
+
"arena_votes": 55436,
|
| 544 |
"aider_pass_rate": 0.42100000000000004
|
| 545 |
},
|
| 546 |
{
|
|
|
|
| 628 |
"lb_language": 0.29333666666666663,
|
| 629 |
"lb_if": 0.5838349999999999,
|
| 630 |
"lb_data_analysis": 0.4376,
|
| 631 |
+
"arena_elo": 1255.91,
|
| 632 |
+
"arena_rank": 227,
|
| 633 |
"arena_votes": 24126
|
| 634 |
},
|
| 635 |
{
|
|
|
|
| 729 |
"lb_language": 0.31805,
|
| 730 |
"lb_if": 0.6957099999999999,
|
| 731 |
"lb_data_analysis": 0.33599999999999997,
|
| 732 |
+
"arena_elo": 1298.6,
|
| 733 |
+
"arena_rank": 196,
|
| 734 |
"arena_votes": 7136
|
| 735 |
},
|
| 736 |
{
|
|
|
|
| 750 |
"lb_language": 0.2421633333333333,
|
| 751 |
"lb_if": 0.604665,
|
| 752 |
"lb_data_analysis": 0.3682,
|
| 753 |
+
"arena_elo": 1223.58,
|
| 754 |
+
"arena_rank": 242,
|
| 755 |
"arena_votes": 66191,
|
| 756 |
"aider_pass_rate": 0.414
|
| 757 |
},
|
|
|
|
| 773 |
"lb_language": 0.4956833333333333,
|
| 774 |
"lb_if": 0.717875,
|
| 775 |
"lb_data_analysis": 0.39039999999999997,
|
| 776 |
+
"arena_elo": 1274.69,
|
| 777 |
+
"arena_rank": 214,
|
| 778 |
"arena_votes": 88721,
|
| 779 |
"aider_pass_rate": 0.466
|
| 780 |
},
|
|
|
|
| 794 |
"lb_language": 0.45262,
|
| 795 |
"lb_if": 0.713875,
|
| 796 |
"lb_data_analysis": 0.4998,
|
| 797 |
+
"arena_elo": 1323.92,
|
| 798 |
+
"arena_rank": 163,
|
| 799 |
"arena_votes": 98130
|
| 800 |
},
|
| 801 |
{
|
|
|
|
| 815 |
"lb_language": 0.5393533333333334,
|
| 816 |
"lb_if": 0.7217100000000001,
|
| 817 |
"lb_data_analysis": 0.4661,
|
| 818 |
+
"arena_elo": 1345.52,
|
| 819 |
+
"arena_rank": 140,
|
| 820 |
"arena_votes": 112863,
|
| 821 |
"aider_pass_rate": 0.602
|
| 822 |
},
|
|
|
|
| 834 |
"lb_language": 0.4563466666666667,
|
| 835 |
"lb_if": 0.6858299999999999,
|
| 836 |
"lb_data_analysis": 0.6236999999999999,
|
| 837 |
+
"arena_elo": 1334.74,
|
| 838 |
+
"arena_rank": 152,
|
| 839 |
"arena_votes": 45498,
|
| 840 |
"aider_pass_rate": 0.5710000000000001
|
| 841 |
},
|
|
|
|
| 857 |
"lb_language": 0.29879333333333336,
|
| 858 |
"lb_if": 0.5679974999999999,
|
| 859 |
"lb_data_analysis": 0.55099,
|
| 860 |
+
"arena_elo": 1317.61,
|
| 861 |
+
"arena_rank": 176,
|
| 862 |
"arena_votes": 68794,
|
| 863 |
"aider_pass_rate": 0.406
|
| 864 |
},
|
|
|
|
| 880 |
"hf_math_lvl5": 0.37462235649546827,
|
| 881 |
"hf_musr": 0.41384375,
|
| 882 |
"hf_avg": 27.639223265636087,
|
| 883 |
+
"arena_elo": 1401.76,
|
| 884 |
+
"arena_rank": 78,
|
| 885 |
"arena_votes": 27822,
|
| 886 |
"aider_pass_rate": 0.654
|
| 887 |
},
|
|
|
|
| 892 |
"mmlu": 0.852,
|
| 893 |
"gpqa": 0.6,
|
| 894 |
"math": 0.9,
|
| 895 |
+
"arena_elo": 1336.75,
|
| 896 |
+
"arena_rank": 148,
|
| 897 |
"arena_votes": 51986,
|
| 898 |
"aider_pass_rate": 0.5
|
| 899 |
},
|
|
|
|
| 904 |
"mmlu": 0.908,
|
| 905 |
"gpqa": 0.733,
|
| 906 |
"mgsm": 0.908,
|
| 907 |
+
"arena_elo": 1388.05,
|
| 908 |
+
"arena_rank": 94,
|
| 909 |
"arena_votes": 31120,
|
| 910 |
"aider_pass_rate": 0.579
|
| 911 |
},
|
|
|
|
| 921 |
"mmlu": 0.869,
|
| 922 |
"math": 0.979,
|
| 923 |
"mgsm": 0.92,
|
| 924 |
+
"arena_elo": 1347.98,
|
| 925 |
+
"arena_rank": 132,
|
| 926 |
+
"arena_votes": 58415
|
| 927 |
},
|
| 928 |
{
|
| 929 |
"slug": "openai/o3",
|
|
|
|
| 997 |
"lb_language": 0.38114999999999993,
|
| 998 |
"lb_if": 0.7550025,
|
| 999 |
"lb_data_analysis": 0.4718,
|
| 1000 |
+
"arena_elo": 1302.52,
|
| 1001 |
+
"arena_rank": 195,
|
| 1002 |
"arena_votes": 39409
|
| 1003 |
},
|
| 1004 |
{
|
|
|
|
| 1044 |
"lb_language": 0.23245333333333332,
|
| 1045 |
"lb_if": 0.5869175,
|
| 1046 |
"lb_data_analysis": 0.48810000000000003,
|
| 1047 |
+
"arena_elo": 1270.39,
|
| 1048 |
+
"arena_rank": 218,
|
| 1049 |
"arena_votes": 5430,
|
| 1050 |
"aider_pass_rate": 0.594
|
| 1051 |
},
|
|
|
|
| 1096 |
"lb_language": 0.29213666666666666,
|
| 1097 |
"lb_if": 0.68271,
|
| 1098 |
"lb_data_analysis": 0.1636,
|
| 1099 |
+
"arena_elo": 1261.49,
|
| 1100 |
+
"arena_rank": 223,
|
| 1101 |
"arena_votes": 37325,
|
| 1102 |
"aider_pass_rate": 0.444
|
| 1103 |
},
|
|
|
|
| 1160 |
"lb_language": 0.21091000000000001,
|
| 1161 |
"lb_if": 0.35587499999999994,
|
| 1162 |
"lb_data_analysis": 0.25925,
|
| 1163 |
+
"arena_elo": 1156.81,
|
| 1164 |
+
"arena_rank": 278,
|
| 1165 |
"arena_votes": 3233
|
| 1166 |
},
|
| 1167 |
{
|
|
|
|
| 1263 |
"hf_math_lvl5": 0.277190332326284,
|
| 1264 |
"hf_musr": 0.4281979166666667,
|
| 1265 |
"hf_avg": 33.35799367075618,
|
| 1266 |
+
"arena_elo": 1213.08,
|
| 1267 |
+
"arena_rank": 248,
|
| 1268 |
"arena_votes": 24142
|
| 1269 |
},
|
| 1270 |
{
|
|
|
|
| 1386 |
"hf_math_lvl5": 0.06268882175226587,
|
| 1387 |
"hf_musr": 0.39784375,
|
| 1388 |
"hf_avg": 24.226662652803373,
|
| 1389 |
+
"arena_elo": 1183.46,
|
| 1390 |
+
"arena_rank": 262,
|
| 1391 |
"arena_votes": 15483
|
| 1392 |
},
|
| 1393 |
{
|
|
|
|
| 1673 |
"hf_math_lvl5": 0.03851963746223565,
|
| 1674 |
"hf_musr": 0.47709375000000004,
|
| 1675 |
"hf_avg": 14.343669671742774,
|
| 1676 |
+
"arena_elo": 1222.85,
|
| 1677 |
+
"arena_rank": 243,
|
| 1678 |
"arena_votes": 104636
|
| 1679 |
},
|
| 1680 |
{
|
|
|
|
| 6142 |
"lb_language": 0.07196666666666666,
|
| 6143 |
"lb_if": 0.5279175,
|
| 6144 |
"lb_data_analysis": 0.081,
|
| 6145 |
+
"arena_elo": 1126.5,
|
| 6146 |
+
"arena_rank": 299,
|
| 6147 |
"arena_votes": 1785
|
| 6148 |
},
|
| 6149 |
{
|
|
|
|
| 6164 |
"lb_language": 0.042846666666666665,
|
| 6165 |
"lb_if": 0.48317,
|
| 6166 |
"lb_data_analysis": 0.0762,
|
| 6167 |
+
"arena_elo": 1130.68,
|
| 6168 |
+
"arena_rank": 295,
|
| 6169 |
"arena_votes": 11116
|
| 6170 |
},
|
| 6171 |
{
|
|
|
|
| 6191 |
"hf_math_lvl5": 0.20468277945619334,
|
| 6192 |
"hf_musr": 0.4465208333333333,
|
| 6193 |
"hf_avg": 34.125963384670946,
|
| 6194 |
+
"arena_elo": 1212.38,
|
| 6195 |
+
"arena_rank": 249,
|
| 6196 |
"arena_votes": 4653
|
| 6197 |
},
|
| 6198 |
{
|
|
|
|
| 6290 |
"hf_math_lvl5": 0.0581570996978852,
|
| 6291 |
"hf_musr": 0.342125,
|
| 6292 |
"hf_avg": 15.02227766709556,
|
| 6293 |
+
"arena_elo": 1113.92,
|
| 6294 |
+
"arena_rank": 303,
|
| 6295 |
"arena_votes": 2201
|
| 6296 |
},
|
| 6297 |
{
|
|
|
|
| 10127 |
"hf_math_lvl5": 0.12235649546827794,
|
| 10128 |
"hf_musr": 0.4595416666666667,
|
| 10129 |
"hf_avg": 27.353190438571634,
|
| 10130 |
+
"arena_elo": 1164.33,
|
| 10131 |
+
"arena_rank": 277,
|
| 10132 |
"arena_votes": 3776
|
| 10133 |
},
|
| 10134 |
{
|
|
|
|
| 11176 |
"lb_language": 0.5148133333333332,
|
| 11177 |
"lb_if": 0.81829,
|
| 11178 |
"lb_data_analysis": 0.69529,
|
| 11179 |
+
"arena_elo": 1335.83,
|
| 11180 |
+
"arena_rank": 149,
|
| 11181 |
+
"arena_votes": 25985
|
| 11182 |
},
|
| 11183 |
{
|
| 11184 |
"hf_id": "Qwen/Qwen1.5-0.5B",
|
|
|
|
| 11272 |
"lb_language": 0.13224333333333332,
|
| 11273 |
"lb_if": 0.5526275,
|
| 11274 |
"lb_data_analysis": 0.20179999999999998,
|
| 11275 |
+
"arena_elo": 1233.68,
|
| 11276 |
+
"arena_rank": 235,
|
| 11277 |
"arena_votes": 26191,
|
| 11278 |
"aider_pass_rate": 0.308
|
| 11279 |
},
|
|
|
|
| 11300 |
"hf_math_lvl5": 0.15256797583081572,
|
| 11301 |
"hf_musr": 0.43997916666666664,
|
| 11302 |
"hf_avg": 23.566106475051374,
|
| 11303 |
+
"arena_elo": 1190.49,
|
| 11304 |
+
"arena_rank": 259,
|
| 11305 |
"arena_votes": 17841
|
| 11306 |
},
|
| 11307 |
{
|
|
|
|
| 11327 |
"hf_math_lvl5": 0.19561933534743203,
|
| 11328 |
"hf_musr": 0.4159791666666666,
|
| 11329 |
"hf_avg": 29.25746822860332,
|
| 11330 |
+
"arena_elo": 1203.52,
|
| 11331 |
+
"arena_rank": 252,
|
| 11332 |
"arena_votes": 21744
|
| 11333 |
},
|
| 11334 |
{
|
|
|
|
| 11361 |
"lb_language": 0.05798333333333333,
|
| 11362 |
"lb_if": 0.27749999999999997,
|
| 11363 |
"lb_data_analysis": 0.0469,
|
| 11364 |
+
"arena_elo": 1089.68,
|
| 11365 |
+
"arena_rank": 309,
|
| 11366 |
"arena_votes": 7598
|
| 11367 |
},
|
| 11368 |
{
|
|
|
|
| 11395 |
"lb_language": 0.061816666666666666,
|
| 11396 |
"lb_if": 0.4411675,
|
| 11397 |
"lb_data_analysis": 0.0435,
|
| 11398 |
+
"arena_elo": 1143.52,
|
| 11399 |
+
"arena_rank": 287,
|
| 11400 |
"arena_votes": 4735
|
| 11401 |
},
|
| 11402 |
{
|
|
|
|
| 16644 |
"hf_math_lvl5": 0.4501510574018127,
|
| 16645 |
"hf_musr": 0.4948333333333334,
|
| 16646 |
"hf_avg": 42.33178738532094,
|
| 16647 |
+
"arena_elo": 1286.35,
|
| 16648 |
+
"arena_rank": 205,
|
| 16649 |
"arena_votes": 2846
|
| 16650 |
},
|
| 16651 |
{
|
|
|
|
| 16695 |
"hf_math_lvl5": 0.19637462235649547,
|
| 16696 |
"hf_musr": 0.41746875,
|
| 16697 |
"hf_avg": 26.034998081672143,
|
| 16698 |
+
"arena_elo": 1220.4,
|
| 16699 |
+
"arena_rank": 247,
|
| 16700 |
"arena_votes": 2895
|
| 16701 |
},
|
| 16702 |
{
|
|
|
|
| 18637 |
"hf_math_lvl5": 0.08383685800604229,
|
| 18638 |
"hf_musr": 0.41201041666666666,
|
| 18639 |
"hf_avg": 20.83936104726783,
|
| 18640 |
+
"arena_elo": 1167.16,
|
| 18641 |
+
"arena_rank": 275,
|
| 18642 |
"arena_votes": 10224
|
| 18643 |
},
|
| 18644 |
{
|
|
|
|
| 19608 |
"hf_math_lvl5": 0.013595166163141994,
|
| 19609 |
"hf_musr": 0.37390625000000005,
|
| 19610 |
"hf_avg": 6.3704357034963754,
|
| 19611 |
+
"arena_elo": 979.528,
|
| 19612 |
+
"arena_rank": 321,
|
| 19613 |
"arena_votes": 3412
|
| 19614 |
},
|
| 19615 |
{
|
|
|
|
| 19759 |
"hf_math_lvl5": 0.09290030211480363,
|
| 19760 |
"hf_musr": 0.5058645833333334,
|
| 19761 |
"hf_avg": 27.310631874736753,
|
| 19762 |
+
"arena_elo": 1183.98,
|
| 19763 |
+
"arena_rank": 261,
|
| 19764 |
"arena_votes": 4933
|
| 19765 |
},
|
| 19766 |
{
|
|
|
|
| 21237 |
"hf_math_lvl5": 0.01812688821752266,
|
| 21238 |
"hf_musr": 0.33939583333333334,
|
| 21239 |
"hf_avg": 8.053373854341979,
|
| 21240 |
+
"arena_elo": 1113.84,
|
| 21241 |
+
"arena_rank": 304,
|
| 21242 |
"arena_votes": 10853
|
| 21243 |
},
|
| 21244 |
{
|
|
|
|
| 21259 |
"lb_language": 0.10647333333333332,
|
| 21260 |
"lb_if": 0.443375,
|
| 21261 |
"lb_data_analysis": 0.0726,
|
| 21262 |
+
"arena_elo": 1179.92,
|
| 21263 |
+
"arena_rank": 266,
|
| 21264 |
"arena_votes": 23893
|
| 21265 |
},
|
| 21266 |
{
|
|
|
|
| 21298 |
"hf_math_lvl5": 0.0007552870090634441,
|
| 21299 |
"hf_musr": 0.39288541666666665,
|
| 21300 |
"hf_avg": 17.046939294966545,
|
| 21301 |
+
"arena_elo": 1198.62,
|
| 21302 |
+
"arena_rank": 254,
|
| 21303 |
"arena_votes": 46618
|
| 21304 |
},
|
| 21305 |
{
|
|
|
|
| 21349 |
"hf_math_lvl5": 0.02039274924471299,
|
| 21350 |
"hf_musr": 0.334125,
|
| 21351 |
"hf_avg": 7.485804130315127,
|
| 21352 |
+
"arena_elo": 1091.3,
|
| 21353 |
+
"arena_rank": 308,
|
| 21354 |
"arena_votes": 4779
|
| 21355 |
},
|
| 21356 |
{
|
|
|
|
| 21376 |
"hf_math_lvl5": 0.02945619335347432,
|
| 21377 |
"hf_musr": 0.42742708333333335,
|
| 21378 |
"hf_avg": 13.067087110466217,
|
| 21379 |
+
"arena_elo": 1135.7,
|
| 21380 |
+
"arena_rank": 294,
|
| 21381 |
"arena_votes": 8925
|
| 21382 |
},
|
| 21383 |
{
|
|
|
|
| 22381 |
"hf_math_lvl5": 0.02039274924471299,
|
| 22382 |
"hf_musr": 0.34621875,
|
| 22383 |
"hf_avg": 9.39218439885523,
|
| 22384 |
+
"arena_elo": 971.576,
|
| 22385 |
+
"arena_rank": 322,
|
| 22386 |
"arena_votes": 2391
|
| 22387 |
},
|
| 22388 |
{
|
|
|
|
| 22696 |
"hf_math_lvl5": 0.09214501510574018,
|
| 22697 |
"hf_musr": 0.35148958333333336,
|
| 22698 |
"hf_avg": 18.396095114284222,
|
| 22699 |
+
"arena_elo": 1155.57,
|
| 22700 |
+
"arena_rank": 279,
|
| 22701 |
"arena_votes": 6837
|
| 22702 |
},
|
| 22703 |
{
|
|
|
|
| 22747 |
"hf_math_lvl5": 0.1419939577039275,
|
| 22748 |
"hf_musr": 0.3900625,
|
| 22749 |
"hf_avg": 24.027678753483297,
|
| 22750 |
+
"arena_elo": 1181.56,
|
| 22751 |
+
"arena_rank": 265,
|
| 22752 |
"arena_votes": 6643
|
| 22753 |
},
|
| 22754 |
{
|
|
|
|
| 22798 |
"hf_math_lvl5": 0.15256797583081572,
|
| 22799 |
"hf_musr": 0.3605416666666667,
|
| 22800 |
"hf_avg": 21.712212822028288,
|
| 22801 |
+
"arena_elo": 1178.99,
|
| 22802 |
+
"arena_rank": 268,
|
| 22803 |
"arena_votes": 3191
|
| 22804 |
},
|
| 22805 |
{
|
|
|
|
| 22849 |
"hf_math_lvl5": 0.21978851963746224,
|
| 22850 |
"hf_musr": 0.47070833333333334,
|
| 22851 |
"hf_avg": 30.6030430081627,
|
| 22852 |
+
"arena_elo": 1208.23,
|
| 22853 |
+
"arena_rank": 251,
|
| 22854 |
"arena_votes": 3092
|
| 22855 |
},
|
| 22856 |
{
|
|
|
|
| 23318 |
"hf_math_lvl5": 0.4078549848942598,
|
| 23319 |
"hf_musr": 0.4558229166666667,
|
| 23320 |
"hf_avg": 38.87959582082076,
|
| 23321 |
+
"arena_elo": 1191.07,
|
| 23322 |
+
"arena_rank": 258,
|
| 23323 |
"arena_votes": 9902
|
| 23324 |
},
|
| 23325 |
{
|
|
|
|
| 25538 |
"hf_math_lvl5": 0.0702416918429003,
|
| 25539 |
"hf_musr": 0.3328541666666667,
|
| 25540 |
"hf_avg": 14.443126333711135,
|
| 25541 |
+
"arena_elo": 1110.92,
|
| 25542 |
+
"arena_rank": 305,
|
| 25543 |
"arena_votes": 8045
|
| 25544 |
},
|
| 25545 |
{
|
|
|
|
| 25719 |
"lb_language": 0.13909000000000002,
|
| 25720 |
"lb_if": 0.5330400000000001,
|
| 25721 |
"lb_data_analysis": 0.2044,
|
| 25722 |
+
"arena_elo": 1197.61,
|
| 25723 |
+
"arena_rank": 255,
|
| 25724 |
"arena_votes": 25055
|
| 25725 |
},
|
| 25726 |
{
|
|
|
|
| 25741 |
"lb_language": 0.09153666666666667,
|
| 25742 |
"lb_if": 0.39083500000000004,
|
| 25743 |
"lb_data_analysis": 0.26030000000000003,
|
| 25744 |
+
"arena_elo": 1128.79,
|
| 25745 |
+
"arena_rank": 296,
|
| 25746 |
"arena_votes": 20691
|
| 25747 |
},
|
| 25748 |
{
|
|
|
|
| 25763 |
"lb_language": 0.08559,
|
| 25764 |
"lb_if": 0.363625,
|
| 25765 |
"lb_data_analysis": 0.2232,
|
| 25766 |
+
"arena_elo": 1128,
|
| 25767 |
+
"arena_rank": 297,
|
| 25768 |
"arena_votes": 20115
|
| 25769 |
},
|
| 25770 |
{
|
|
|
|
| 25823 |
"lb_language": 0.12944,
|
| 25824 |
"lb_if": 0.472,
|
| 25825 |
"lb_data_analysis": 0.2343,
|
| 25826 |
+
"arena_elo": 1170.83,
|
| 25827 |
+
"arena_rank": 273,
|
| 25828 |
"arena_votes": 17763
|
| 25829 |
},
|
| 25830 |
{
|
|
|
|
| 26072 |
"lb_language": 0.09055,
|
| 26073 |
"lb_if": 0.5165025,
|
| 26074 |
"lb_data_analysis": 0.059300000000000005,
|
| 26075 |
+
"arena_elo": 1149.25,
|
| 26076 |
+
"arena_rank": 284,
|
| 26077 |
"arena_votes": 19402
|
| 26078 |
},
|
| 26079 |
{
|
|
|
|
| 26197 |
"lb_language": 0.26477666666666666,
|
| 26198 |
"lb_if": 0.63167,
|
| 26199 |
"lb_data_analysis": 0.255,
|
| 26200 |
+
"arena_elo": 1229.12,
|
| 26201 |
+
"arena_rank": 239,
|
| 26202 |
"arena_votes": 51417
|
| 26203 |
},
|
| 26204 |
{
|
|
|
|
| 26231 |
"lb_language": 0.13761333333333334,
|
| 26232 |
"lb_if": 0.4480825,
|
| 26233 |
"lb_data_analysis": 0.1619,
|
| 26234 |
+
"arena_elo": 1196.76,
|
| 26235 |
+
"arena_rank": 256,
|
| 26236 |
"arena_votes": 73505
|
| 26237 |
},
|
| 26238 |
{
|
|
|
|
| 28559 |
"hf_math_lvl5": 0.07628398791540786,
|
| 28560 |
"hf_musr": 0.42543749999999997,
|
| 28561 |
"hf_avg": 22.70925524673515,
|
| 28562 |
+
"arena_elo": 1181.9,
|
| 28563 |
+
"arena_rank": 263,
|
| 28564 |
"arena_votes": 12636
|
| 28565 |
},
|
| 28566 |
{
|
|
|
|
| 28598 |
"hf_math_lvl5": 0.07250755287009064,
|
| 28599 |
"hf_musr": 0.4228645833333333,
|
| 28600 |
"hf_avg": 21.635827111564595,
|
| 28601 |
+
"arena_elo": 1181.82,
|
| 28602 |
+
"arena_rank": 264,
|
| 28603 |
"arena_votes": 7967
|
| 28604 |
},
|
| 28605 |
{
|
|
|
|
| 29752 |
"hf_math_lvl5": 0.07099697885196375,
|
| 29753 |
"hf_musr": 0.41232291666666665,
|
| 29754 |
"hf_avg": 22.3449346084354,
|
| 29755 |
+
"arena_elo": 1279.28,
|
| 29756 |
+
"arena_rank": 210,
|
| 29757 |
"arena_votes": 10069
|
| 29758 |
},
|
| 29759 |
{
|
|
|
|
| 33248 |
"lb_language": 0.11368333333333334,
|
| 33249 |
"lb_if": 0.52779,
|
| 33250 |
"lb_data_analysis": 0.1738,
|
| 33251 |
+
"arena_elo": 1174.74,
|
| 33252 |
+
"arena_rank": 270,
|
| 33253 |
"arena_votes": 5006
|
| 33254 |
},
|
| 33255 |
{
|
|
|
|
| 34302 |
"hf_math_lvl5": 0.05664652567975831,
|
| 34303 |
"hf_musr": 0.3899375,
|
| 34304 |
"hf_avg": 20.57236409322395,
|
| 34305 |
+
"arena_elo": 1151.97,
|
| 34306 |
+
"arena_rank": 281,
|
| 34307 |
"arena_votes": 4155
|
| 34308 |
},
|
| 34309 |
{
|
|
|
|
| 35718 |
"lb_language": 0.2793333333333334,
|
| 35719 |
"lb_if": 0.5561674999999999,
|
| 35720 |
"lb_data_analysis": 0.39766500000000005,
|
| 35721 |
+
"arena_elo": 1249.89,
|
| 35722 |
+
"arena_rank": 229,
|
| 35723 |
"arena_votes": 10141
|
| 35724 |
},
|
| 35725 |
{
|
|
|
|
| 35731 |
"lb_language": 0.23921666666666663,
|
| 35732 |
"lb_if": 0.7150825,
|
| 35733 |
"lb_data_analysis": 0.179,
|
| 35734 |
+
"arena_elo": 1261.56,
|
| 35735 |
+
"arena_rank": 222,
|
| 35736 |
"arena_votes": 77556,
|
| 35737 |
"aider_pass_rate": 0.218
|
| 35738 |
},
|
|
|
|
| 35746 |
"lb_language": 0.3086066666666667,
|
| 35747 |
"lb_if": 0.5761225,
|
| 35748 |
"lb_data_analysis": 0.492345,
|
| 35749 |
+
"arena_elo": 1276.12,
|
| 35750 |
+
"arena_rank": 212,
|
| 35751 |
"arena_votes": 9869
|
| 35752 |
},
|
| 35753 |
{
|
|
|
|
| 35759 |
"lb_language": 0.33044,
|
| 35760 |
"lb_if": 0.6718325,
|
| 35761 |
"lb_data_analysis": 0.3438,
|
| 35762 |
+
"arena_elo": 1263.93,
|
| 35763 |
+
"arena_rank": 221,
|
| 35764 |
"arena_votes": 15147
|
| 35765 |
},
|
| 35766 |
{
|
|
|
|
| 35865 |
"lb_language": 0.43553333333333333,
|
| 35866 |
"lb_if": 0.6392074999999999,
|
| 35867 |
"lb_data_analysis": 0.5308999999999999,
|
| 35868 |
+
"arena_elo": 1312.89,
|
| 35869 |
+
"arena_rank": 182,
|
| 35870 |
"arena_votes": 93439,
|
| 35871 |
"aider_pass_rate": 0.556
|
| 35872 |
},
|
|
|
|
| 35919 |
"lb_language": 0.28744,
|
| 35920 |
"lb_if": 0.6819175000000001,
|
| 35921 |
"lb_data_analysis": 0.3983,
|
| 35922 |
+
"arena_elo": 1242.06,
|
| 35923 |
+
"arena_rank": 230,
|
| 35924 |
"arena_votes": 62437
|
| 35925 |
},
|
| 35926 |
{
|
|
|
|
| 35932 |
"lb_language": 0.3979266666666667,
|
| 35933 |
"lb_if": 0.7184575000000001,
|
| 35934 |
"lb_data_analysis": 0.42910000000000004,
|
| 35935 |
+
"arena_elo": 1313.8,
|
| 35936 |
+
"arena_rank": 181,
|
| 35937 |
"arena_votes": 45460
|
| 35938 |
},
|
| 35939 |
{
|
|
|
|
| 35965 |
"lb_language": 0.11368333333333332,
|
| 35966 |
"lb_if": 0.5824975,
|
| 35967 |
"lb_data_analysis": 0.2347,
|
| 35968 |
+
"arena_elo": 1232.81,
|
| 35969 |
+
"arena_rank": 238,
|
| 35970 |
"arena_votes": 39296
|
| 35971 |
},
|
| 35972 |
{
|
|
|
|
| 35988 |
"lb_language": 0.07264333333333332,
|
| 35989 |
"lb_if": 0.3832075,
|
| 35990 |
"lb_data_analysis": 0.03,
|
| 35991 |
+
"arena_elo": 1171.24,
|
| 35992 |
+
"arena_rank": 272,
|
| 35993 |
"arena_votes": 16057
|
| 35994 |
},
|
| 35995 |
{
|
|
|
|
| 36044 |
"lb_language": 0.29534,
|
| 36045 |
"lb_if": 0.8454575,
|
| 36046 |
"lb_data_analysis": 0.39359999999999995,
|
| 36047 |
+
"arena_elo": 1309.58,
|
| 36048 |
+
"arena_rank": 185,
|
| 36049 |
"arena_votes": 34909
|
| 36050 |
},
|
| 36051 |
{
|
|
|
|
| 36057 |
"lb_language": 0.4740566666666666,
|
| 36058 |
"lb_if": 0.7774575,
|
| 36059 |
"lb_data_analysis": 0.5145,
|
| 36060 |
+
"arena_elo": 1351.1,
|
| 36061 |
+
"arena_rank": 129,
|
| 36062 |
"arena_votes": 55607,
|
| 36063 |
"aider_pass_rate": 0.496
|
| 36064 |
},
|
|
|
|
| 36212 |
"lb_language": 0.3427633333333333,
|
| 36213 |
"lb_if": 0.782835,
|
| 36214 |
"lb_data_analysis": 0.5621,
|
| 36215 |
+
"arena_elo": 1353.18,
|
| 36216 |
+
"arena_rank": 126,
|
| 36217 |
"arena_votes": 24951
|
| 36218 |
},
|
| 36219 |
{
|
|
|
|
| 36298 |
"lb_language": 0.40453333333333336,
|
| 36299 |
"lb_if": 0.67929,
|
| 36300 |
"lb_data_analysis": 0.541955,
|
| 36301 |
+
"arena_elo": 1304.91,
|
| 36302 |
+
"arena_rank": 191,
|
| 36303 |
"arena_votes": 28081,
|
| 36304 |
"aider_pass_rate": 0.466
|
| 36305 |
},
|
|
|
|
| 36404 |
"lb_language": 0.5836933333333333,
|
| 36405 |
"lb_if": 0.7534574999999999,
|
| 36406 |
"lb_data_analysis": 0.64271,
|
| 36407 |
+
"arena_elo": 1374.19,
|
| 36408 |
+
"arena_rank": 108,
|
| 36409 |
+
"arena_votes": 33189
|
| 36410 |
},
|
| 36411 |
{
|
| 36412 |
"name": "gpt-4.5-preview",
|
|
|
|
| 36449 |
"lb_language": 0.41314333333333336,
|
| 36450 |
"lb_if": 0.7490399999999999,
|
| 36451 |
"lb_data_analysis": 0.387965,
|
| 36452 |
+
"arena_elo": 1365.15,
|
| 36453 |
+
"arena_rank": 117,
|
| 36454 |
+
"arena_votes": 48420
|
| 36455 |
},
|
| 36456 |
{
|
| 36457 |
"name": "gemini-2.5-pro-exp-03-25",
|
|
|
|
| 36473 |
"lb_language": 0.4682266666666666,
|
| 36474 |
"lb_if": 0.8147075,
|
| 36475 |
"lb_data_analysis": 0.64019,
|
| 36476 |
+
"arena_elo": 1394.11,
|
| 36477 |
+
"arena_rank": 86,
|
| 36478 |
+
"arena_votes": 46409
|
| 36479 |
},
|
| 36480 |
{
|
| 36481 |
"name": "chatgpt-4o-latest-2025-03-27",
|
|
|
|
| 36519 |
"lb_language": 0.6319400000000001,
|
| 36520 |
"lb_if": 0.764915,
|
| 36521 |
"lb_data_analysis": 0.599645,
|
| 36522 |
+
"arena_elo": 1370.93,
|
| 36523 |
+
"arena_rank": 112,
|
| 36524 |
+
"arena_votes": 44242
|
| 36525 |
},
|
| 36526 |
{
|
| 36527 |
"name": "command-a-03-2025",
|
|
|
|
| 36533 |
"lb_language": 0.36696,
|
| 36534 |
"lb_if": 0.82904,
|
| 36535 |
"lb_data_analysis": 0.48457000000000006,
|
| 36536 |
+
"arena_elo": 1353.03,
|
| 36537 |
+
"arena_rank": 127,
|
| 36538 |
+
"arena_votes": 57059
|
| 36539 |
},
|
| 36540 |
{
|
| 36541 |
"name": "gemini-1.5-flash-8b-001",
|
|
|
|
| 36546 |
"lb_language": 0.22867666666666667,
|
| 36547 |
"lb_if": 0.6971649999999999,
|
| 36548 |
"lb_data_analysis": 0.4241,
|
| 36549 |
+
"arena_elo": 1258.54,
|
| 36550 |
+
"arena_rank": 226,
|
| 36551 |
"arena_votes": 35556
|
| 36552 |
},
|
| 36553 |
{
|
|
|
|
| 36559 |
"lb_language": 0.42386999999999997,
|
| 36560 |
"lb_if": 0.8578749999999999,
|
| 36561 |
"lb_data_analysis": 0.7332000000000001,
|
| 36562 |
+
"arena_elo": 1360.76,
|
| 36563 |
+
"arena_rank": 120,
|
| 36564 |
+
"arena_votes": 44666
|
| 36565 |
},
|
| 36566 |
{
|
| 36567 |
"name": "gemini-2.0-flash-lite-001",
|
|
|
|
| 36583 |
"lb_language": 0.3126966666666667,
|
| 36584 |
"lb_if": 0.7382925,
|
| 36585 |
"lb_data_analysis": 0.4284,
|
| 36586 |
+
"arena_elo": 1341.56,
|
| 36587 |
+
"arena_rank": 143,
|
| 36588 |
"arena_votes": 3829
|
| 36589 |
},
|
| 36590 |
{
|
|
|
|
| 36596 |
"lb_language": 0.15059333333333333,
|
| 36597 |
"lb_if": 0.6358325,
|
| 36598 |
"lb_data_analysis": 0.36950000000000005,
|
| 36599 |
+
"arena_elo": 1303.19,
|
| 36600 |
+
"arena_rank": 194,
|
| 36601 |
"arena_votes": 4177
|
| 36602 |
},
|
| 36603 |
{
|
|
|
|
| 36610 |
"lb_language": 0.6475866666666666,
|
| 36611 |
"lb_if": 0.72325,
|
| 36612 |
"lb_data_analysis": 0.600695,
|
| 36613 |
+
"arena_elo": 1444.19,
|
| 36614 |
+
"arena_rank": 28,
|
| 36615 |
"arena_votes": 14549
|
| 36616 |
},
|
| 36617 |
{
|
|
|
|
| 36623 |
"lb_language": 0.4964766666666667,
|
| 36624 |
"lb_if": 0.7574575,
|
| 36625 |
"lb_data_analysis": 0.6155,
|
| 36626 |
+
"arena_elo": 1327.43,
|
| 36627 |
+
"arena_rank": 160,
|
| 36628 |
+
"arena_votes": 40913
|
| 36629 |
},
|
| 36630 |
{
|
| 36631 |
"name": "grok-3-beta",
|
|
|
|
| 36659 |
"lb_language": 0.54551,
|
| 36660 |
"lb_if": 0.7704575,
|
| 36661 |
"lb_data_analysis": 0.66404,
|
| 36662 |
+
"arena_elo": 1413.31,
|
| 36663 |
+
"arena_rank": 63,
|
| 36664 |
+
"arena_votes": 51800
|
| 36665 |
},
|
| 36666 |
{
|
| 36667 |
"name": "gpt-4.1-mini-2025-04-14",
|
|
|
|
| 36673 |
"lb_language": 0.37996,
|
| 36674 |
"lb_if": 0.7030825,
|
| 36675 |
"lb_data_analysis": 0.6133799999999999,
|
| 36676 |
+
"arena_elo": 1381.67,
|
| 36677 |
+
"arena_rank": 103,
|
| 36678 |
+
"arena_votes": 40288
|
| 36679 |
},
|
| 36680 |
{
|
| 36681 |
"name": "gpt-4.1-nano-2025-04-14",
|
|
|
|
| 36687 |
"lb_language": 0.30958,
|
| 36688 |
"lb_if": 0.5753725,
|
| 36689 |
"lb_data_analysis": 0.498195,
|
| 36690 |
+
"arena_elo": 1321.62,
|
| 36691 |
+
"arena_rank": 169,
|
| 36692 |
"arena_votes": 6107
|
| 36693 |
},
|
| 36694 |
{
|
|
|
|
| 36778 |
"lb_language": 0.6482266666666666,
|
| 36779 |
"lb_if": 0.79954,
|
| 36780 |
"lb_data_analysis": 0.7153849999999999,
|
| 36781 |
+
"arena_elo": 1419.36,
|
| 36782 |
+
"arena_rank": 51,
|
| 36783 |
+
"arena_votes": 19153
|
| 36784 |
},
|
| 36785 |
{
|
| 36786 |
"name": "gemini-2.5-flash-preview-05-20",
|
|
|
|
| 36847 |
"lb_language": 0.44743666666666665,
|
| 36848 |
"lb_if": 0.7139575,
|
| 36849 |
"lb_data_analysis": 0.602025,
|
| 36850 |
+
"arena_elo": 1384.57,
|
| 36851 |
+
"arena_rank": 100,
|
| 36852 |
+
"arena_votes": 34358
|
| 36853 |
},
|
| 36854 |
{
|
| 36855 |
"name": "phi-4-reasoning-plus",
|
|
|
|
| 36883 |
"lb_language": 0.60609,
|
| 36884 |
"lb_if": 0.8772925,
|
| 36885 |
"lb_data_analysis": 0.68308,
|
| 36886 |
+
"arena_elo": 1374.68,
|
| 36887 |
+
"arena_rank": 107,
|
| 36888 |
+
"arena_votes": 27000
|
| 36889 |
},
|
| 36890 |
{
|
| 36891 |
"name": "qwen3-30b-a3b",
|
|
|
|
| 36897 |
"lb_language": 0.54465,
|
| 36898 |
"lb_if": 0.21108249999999998,
|
| 36899 |
"lb_data_analysis": 0.44922666666666666,
|
| 36900 |
+
"arena_elo": 1328.1,
|
| 36901 |
+
"arena_rank": 159,
|
| 36902 |
+
"arena_votes": 27260
|
| 36903 |
},
|
| 36904 |
{
|
| 36905 |
"name": "qwen3-32b",
|
|
|
|
| 36911 |
"lb_language": 0.5554233333333333,
|
| 36912 |
"lb_if": 0.1777075,
|
| 36913 |
"lb_data_analysis": 0.4654,
|
| 36914 |
+
"arena_elo": 1347.03,
|
| 36915 |
+
"arena_rank": 136,
|
| 36916 |
"arena_votes": 3932
|
| 36917 |
},
|
| 36918 |
{
|
|
|
|
| 36936 |
"lb_language": 0.7599833333333333,
|
| 36937 |
"lb_if": 0.2352075,
|
| 36938 |
"lb_data_analysis": 0.47005,
|
| 36939 |
+
"arena_elo": 1450.71,
|
| 36940 |
"arena_rank": 21,
|
| 36941 |
+
"arena_votes": 48736
|
| 36942 |
},
|
| 36943 |
{
|
| 36944 |
"name": "deepseek-v3.1-terminus",
|
|
|
|
| 36950 |
"lb_language": 0.63882,
|
| 36951 |
"lb_if": 0.8189575,
|
| 36952 |
"lb_data_analysis": 0.67298,
|
| 36953 |
+
"arena_elo": 1415.86,
|
| 36954 |
+
"arena_rank": 58,
|
| 36955 |
+
"arena_votes": 3744
|
| 36956 |
},
|
| 36957 |
{
|
| 36958 |
"name": "gemini-2.5-flash-06-05",
|
|
|
|
| 36997 |
"lb_language": 0.6534300000000001,
|
| 36998 |
"lb_if": 0.2767925,
|
| 36999 |
"lb_data_analysis": 0.6098266666666666,
|
| 37000 |
+
"arena_elo": 1404.39,
|
| 37001 |
+
"arena_rank": 72,
|
| 37002 |
+
"arena_votes": 32518
|
| 37003 |
},
|
| 37004 |
{
|
| 37005 |
"name": "gemini-2.5-pro-06-05",
|
|
|
|
| 37022 |
"lb_language": 0.6162266666666666,
|
| 37023 |
"lb_if": 0.8157925,
|
| 37024 |
"lb_data_analysis": 0.6628999999999999,
|
| 37025 |
+
"arena_elo": 1410.2,
|
| 37026 |
+
"arena_rank": 69,
|
| 37027 |
+
"arena_votes": 24595
|
| 37028 |
},
|
| 37029 |
{
|
| 37030 |
"name": "glm-4.5-air",
|
|
|
|
| 37036 |
"lb_language": 0.44289666666666666,
|
| 37037 |
"lb_if": 0.7883775000000001,
|
| 37038 |
"lb_data_analysis": 0.65962,
|
| 37039 |
+
"arena_elo": 1371.84,
|
| 37040 |
+
"arena_rank": 111,
|
| 37041 |
+
"arena_votes": 31132
|
| 37042 |
},
|
| 37043 |
{
|
| 37044 |
"name": "glm-4.6",
|
|
|
|
| 37050 |
"lb_language": 0.5898633333333333,
|
| 37051 |
"lb_if": 0.26192,
|
| 37052 |
"lb_data_analysis": 0.5194766666666667,
|
| 37053 |
+
"arena_elo": 1425.1,
|
| 37054 |
+
"arena_rank": 42,
|
| 37055 |
+
"arena_votes": 35102
|
| 37056 |
},
|
| 37057 |
{
|
| 37058 |
"name": "gpt-5",
|
|
|
|
| 37086 |
"lb_language": 0.8082699999999999,
|
| 37087 |
"lb_if": 0.8811249999999999,
|
| 37088 |
"lb_data_analysis": 0.716345,
|
| 37089 |
+
"arena_elo": 1433.87,
|
| 37090 |
+
"arena_rank": 36,
|
| 37091 |
+
"arena_votes": 32325
|
| 37092 |
},
|
| 37093 |
{
|
| 37094 |
"name": "gpt-5-low",
|
|
|
|
| 37122 |
"lb_language": 0.7552066666666667,
|
| 37123 |
"lb_if": 0.65271,
|
| 37124 |
"lb_data_analysis": 0.55195,
|
| 37125 |
+
"arena_elo": 1390,
|
| 37126 |
+
"arena_rank": 91,
|
| 37127 |
+
"arena_votes": 26935
|
| 37128 |
},
|
| 37129 |
{
|
| 37130 |
"name": "gpt-5-mini-low",
|
|
|
|
| 37180 |
"lb_language": 0.46841666666666665,
|
| 37181 |
"lb_if": 0.5569975,
|
| 37182 |
"lb_data_analysis": 0.4340566666666667,
|
| 37183 |
+
"arena_elo": 1337.45,
|
| 37184 |
+
"arena_rank": 147,
|
| 37185 |
+
"arena_votes": 8348
|
| 37186 |
},
|
| 37187 |
{
|
| 37188 |
"name": "gpt-5-nano-low",
|
|
|
|
| 37216 |
"lb_language": 0.62963,
|
| 37217 |
"lb_if": 0.7300425,
|
| 37218 |
"lb_data_analysis": 0.644815,
|
| 37219 |
+
"arena_elo": 1425.99,
|
| 37220 |
+
"arena_rank": 41,
|
| 37221 |
+
"arena_votes": 31590
|
| 37222 |
},
|
| 37223 |
{
|
| 37224 |
"name": "gpt-oss-120b",
|
|
|
|
| 37230 |
"lb_language": 0.48590666666666665,
|
| 37231 |
"lb_if": 0.5029175,
|
| 37232 |
"lb_data_analysis": 0.38804999999999995,
|
| 37233 |
+
"arena_elo": 1354.07,
|
| 37234 |
+
"arena_rank": 125,
|
| 37235 |
+
"arena_votes": 30747
|
| 37236 |
},
|
| 37237 |
{
|
| 37238 |
"name": "grok-4-0709",
|
|
|
|
| 37244 |
"lb_language": 0.76388,
|
| 37245 |
"lb_if": 0.29075,
|
| 37246 |
"lb_data_analysis": 0.6337666666666667,
|
| 37247 |
+
"arena_elo": 1409.36,
|
| 37248 |
+
"arena_rank": 70,
|
| 37249 |
+
"arena_votes": 41749
|
| 37250 |
},
|
| 37251 |
{
|
| 37252 |
"name": "grok-code-fast-1-0825",
|
|
|
|
| 37280 |
"lb_language": 0.6606966666666668,
|
| 37281 |
"lb_if": 0.2172075,
|
| 37282 |
"lb_data_analysis": 0.4471566666666667,
|
| 37283 |
+
"arena_elo": 1422.36,
|
| 37284 |
+
"arena_rank": 47,
|
| 37285 |
+
"arena_votes": 73293
|
| 37286 |
},
|
| 37287 |
{
|
| 37288 |
"name": "qwen3-235b-a22b-thinking-2507",
|
|
|
|
| 37294 |
"lb_language": 0.6952366666666666,
|
| 37295 |
"lb_if": 0.40641999999999995,
|
| 37296 |
"lb_data_analysis": 0.5218266666666667,
|
| 37297 |
+
"arena_elo": 1398.69,
|
| 37298 |
+
"arena_rank": 81,
|
| 37299 |
+
"arena_votes": 9177
|
| 37300 |
},
|
| 37301 |
{
|
| 37302 |
"name": "qwen3-coder-480b-a35b-instruct",
|
|
|
|
| 37308 |
"lb_language": 0.6426233333333333,
|
| 37309 |
"lb_if": 0.741625,
|
| 37310 |
"lb_data_analysis": 0.64683,
|
| 37311 |
+
"arena_elo": 1386.43,
|
| 37312 |
+
"arena_rank": 98,
|
| 37313 |
+
"arena_votes": 26393
|
| 37314 |
},
|
| 37315 |
{
|
| 37316 |
"name": "qwen3-max-2025-09-23",
|
|
|
|
| 37322 |
"lb_language": 0.7144733333333333,
|
| 37323 |
"lb_if": 0.76546,
|
| 37324 |
"lb_data_analysis": 0.6536649999999999,
|
| 37325 |
+
"arena_elo": 1424.61,
|
| 37326 |
+
"arena_rank": 43,
|
| 37327 |
+
"arena_votes": 9168
|
| 37328 |
},
|
| 37329 |
{
|
| 37330 |
"name": "qwen3-next-80b-a3b-instruct",
|
|
|
|
| 37336 |
"lb_language": 0.6633766666666666,
|
| 37337 |
"lb_if": 0.191875,
|
| 37338 |
"lb_data_analysis": 0.49784,
|
| 37339 |
+
"arena_elo": 1401.82,
|
| 37340 |
+
"arena_rank": 77,
|
| 37341 |
"arena_votes": 22670
|
| 37342 |
},
|
| 37343 |
{
|
|
|
|
| 37350 |
"lb_language": 0.5631166666666667,
|
| 37351 |
"lb_if": 0.41541999999999996,
|
| 37352 |
"lb_data_analysis": 0.5358333333333333,
|
| 37353 |
+
"arena_elo": 1368.83,
|
| 37354 |
+
"arena_rank": 113,
|
| 37355 |
"arena_votes": 13767
|
| 37356 |
},
|
| 37357 |
{
|
|
|
|
| 37364 |
"lb_language": 0.5704566666666667,
|
| 37365 |
"lb_if": 0.17754250000000002,
|
| 37366 |
"lb_data_analysis": 0.45124999999999993,
|
| 37367 |
+
"arena_elo": 1406.44,
|
| 37368 |
+
"arena_rank": 71,
|
| 37369 |
+
"arena_votes": 49326
|
| 37370 |
},
|
| 37371 |
{
|
| 37372 |
"name": "deepseek-v3.2-exp",
|
|
|
|
| 37378 |
"lb_language": 0.65596,
|
| 37379 |
"lb_if": 0.1932925,
|
| 37380 |
"lb_data_analysis": 0.4425866666666667,
|
| 37381 |
+
"arena_elo": 1423.79,
|
| 37382 |
+
"arena_rank": 44,
|
| 37383 |
+
"arena_votes": 11672
|
| 37384 |
},
|
| 37385 |
{
|
| 37386 |
"name": "minimax-m2",
|
|
|
|
| 37392 |
"lb_language": 0.47647666666666666,
|
| 37393 |
"lb_if": 0.810165,
|
| 37394 |
"lb_data_analysis": 0.6755800000000001,
|
| 37395 |
+
"arena_elo": 1346.88,
|
| 37396 |
+
"arena_rank": 137,
|
| 37397 |
+
"arena_votes": 6684
|
| 37398 |
},
|
| 37399 |
{
|
| 37400 |
"name": "kimi-k2",
|
|
|
|
| 37516 |
"lb_language": 0.7432699999999999,
|
| 37517 |
"lb_if": 0.28204,
|
| 37518 |
"lb_data_analysis": 0.52238,
|
| 37519 |
+
"arena_elo": 1430.45,
|
| 37520 |
+
"arena_rank": 39,
|
| 37521 |
+
"arena_votes": 33097
|
| 37522 |
},
|
| 37523 |
{
|
| 37524 |
"name": "claude-opus-4-5-20251101",
|
|
|
|
| 37530 |
"lb_language": 0.7709166666666668,
|
| 37531 |
"lb_if": 0.26591750000000003,
|
| 37532 |
"lb_data_analysis": 0.4561233333333334,
|
| 37533 |
+
"arena_elo": 1467.05,
|
| 37534 |
"arena_rank": 11,
|
| 37535 |
+
"arena_votes": 37462
|
| 37536 |
},
|
| 37537 |
{
|
| 37538 |
"name": "deepseek-v3.2",
|
|
|
|
| 37544 |
"lb_language": 0.6423933333333333,
|
| 37545 |
"lb_if": 0.230625,
|
| 37546 |
"lb_data_analysis": 0.45034,
|
| 37547 |
+
"arena_elo": 1420.9,
|
| 37548 |
+
"arena_rank": 49,
|
| 37549 |
+
"arena_votes": 32541
|
| 37550 |
},
|
| 37551 |
{
|
| 37552 |
"name": "deepseek-v3.2-speciale",
|
|
|
|
| 37602 |
"lb_language": 0.49737666666666663,
|
| 37603 |
"lb_if": 0.1706225,
|
| 37604 |
"lb_data_analysis": 0.46410666666666667,
|
| 37605 |
+
"arena_elo": 1377.87,
|
| 37606 |
+
"arena_rank": 105,
|
| 37607 |
+
"arena_votes": 2784
|
| 37608 |
},
|
| 37609 |
{
|
| 37610 |
"name": "gpt-5.1-2025-11-13-low",
|
|
|
|
| 37715 |
"lb_language": 0.6522633333333333,
|
| 37716 |
"lb_if": 0.3565825,
|
| 37717 |
"lb_data_analysis": 0.5517133333333334,
|
| 37718 |
+
"arena_elo": 1440.97,
|
| 37719 |
+
"arena_rank": 31,
|
| 37720 |
+
"arena_votes": 11936
|
| 37721 |
},
|
| 37722 |
{
|
| 37723 |
"name": "arcee-trinity-large-preview",
|
|
|
|
| 37740 |
"lb_language": 0.8326966666666666,
|
| 37741 |
"lb_if": 0.633125,
|
| 37742 |
"lb_data_analysis": 0.6989299999999999,
|
| 37743 |
+
"arena_elo": 1503.5,
|
| 37744 |
+
"arena_rank": 1,
|
| 37745 |
+
"arena_votes": 9170
|
| 37746 |
},
|
| 37747 |
{
|
| 37748 |
"name": "claude-sonnet-4-6",
|
|
|
|
| 37754 |
"lb_language": 0.7769333333333334,
|
| 37755 |
"lb_if": 0.639165,
|
| 37756 |
"lb_data_analysis": 0.7605666666666667,
|
| 37757 |
+
"arena_elo": 1456.86,
|
| 37758 |
+
"arena_rank": 16,
|
| 37759 |
+
"arena_votes": 5509
|
| 37760 |
},
|
| 37761 |
{
|
| 37762 |
"name": "gemini-3.1-pro-preview-high",
|
|
|
|
| 37769 |
"lb_if": 0.791,
|
| 37770 |
"lb_data_analysis": 0.7854133333333334
|
| 37771 |
},
|
| 37772 |
+
{
|
| 37773 |
+
"name": "gemini-3.1-flash-lite-preview-high",
|
| 37774 |
+
"lb_name": "gemini-3.1-flash-lite-preview-high",
|
| 37775 |
+
"lb_global": 0.6211417391304349,
|
| 37776 |
+
"lb_reasoning": 0.5965875,
|
| 37777 |
+
"lb_coding": 0.474096,
|
| 37778 |
+
"lb_math": 0.7355925,
|
| 37779 |
+
"lb_language": 0.7318266666666666,
|
| 37780 |
+
"lb_if": 0.6861675,
|
| 37781 |
+
"lb_data_analysis": 0.5489700000000001
|
| 37782 |
+
},
|
| 37783 |
{
|
| 37784 |
"name": "glm-5",
|
| 37785 |
"lb_name": "glm-5",
|
|
|
|
| 37790 |
"lb_language": 0.7752800000000001,
|
| 37791 |
"lb_if": 0.5532900000000001,
|
| 37792 |
"lb_data_analysis": 0.67896,
|
| 37793 |
+
"arena_elo": 1451.66,
|
| 37794 |
+
"arena_rank": 18,
|
| 37795 |
+
"arena_votes": 8095
|
| 37796 |
},
|
| 37797 |
{
|
| 37798 |
"name": "gpt-5.2-codex",
|
|
|
|
| 37827 |
"lb_if": 0.713415,
|
| 37828 |
"lb_data_analysis": 0.49679
|
| 37829 |
},
|
| 37830 |
+
{
|
| 37831 |
+
"name": "gpt-5.3-instant",
|
| 37832 |
+
"lb_name": "gpt-5.3-instant",
|
| 37833 |
+
"lb_global": 0.5982708695652174,
|
| 37834 |
+
"lb_reasoning": 0.631155,
|
| 37835 |
+
"lb_coding": 0.484514,
|
| 37836 |
+
"lb_math": 0.7240549999999999,
|
| 37837 |
+
"lb_language": 0.6999933333333334,
|
| 37838 |
+
"lb_if": 0.5940425,
|
| 37839 |
+
"lb_data_analysis": 0.4802233333333333
|
| 37840 |
+
},
|
| 37841 |
+
{
|
| 37842 |
+
"name": "gpt-5.4-high",
|
| 37843 |
+
"lb_name": "gpt-5.4-high",
|
| 37844 |
+
"lb_global": 0.7560426086956522,
|
| 37845 |
+
"lb_reasoning": 0.8565375,
|
| 37846 |
+
"lb_coding": 0.592738,
|
| 37847 |
+
"lb_math": 0.8998125,
|
| 37848 |
+
"lb_language": 0.8300899999999999,
|
| 37849 |
+
"lb_if": 0.6495425,
|
| 37850 |
+
"lb_data_analysis": 0.7704833333333333,
|
| 37851 |
+
"arena_elo": 1479.43,
|
| 37852 |
+
"arena_rank": 6,
|
| 37853 |
+
"arena_votes": 3503
|
| 37854 |
+
},
|
| 37855 |
+
{
|
| 37856 |
+
"name": "gpt-5.4-xhigh",
|
| 37857 |
+
"lb_name": "gpt-5.4-xhigh",
|
| 37858 |
+
"lb_global": 0.809063043478261,
|
| 37859 |
+
"lb_reasoning": 0.881155,
|
| 37860 |
+
"lb_coding": 0.730166,
|
| 37861 |
+
"lb_math": 0.9414800000000001,
|
| 37862 |
+
"lb_language": 0.8263366666666666,
|
| 37863 |
+
"lb_if": 0.7021674999999999,
|
| 37864 |
+
"lb_data_analysis": 0.7931333333333334
|
| 37865 |
+
},
|
| 37866 |
{
|
| 37867 |
"name": "kimi-k2.5",
|
| 37868 |
"lb_name": "kimi-k2.5-thinking",
|
|
|
|
| 37873 |
"lb_language": 0.77666,
|
| 37874 |
"lb_if": 0.574125,
|
| 37875 |
"lb_data_analysis": 0.6135766666666667,
|
| 37876 |
+
"arena_elo": 1451.33,
|
| 37877 |
"arena_rank": 19,
|
| 37878 |
+
"arena_votes": 12710
|
| 37879 |
},
|
| 37880 |
{
|
| 37881 |
"name": "minimax-m2.5",
|
|
|
|
| 37887 |
"lb_language": 0.5510100000000001,
|
| 37888 |
"lb_if": 0.5723325,
|
| 37889 |
"lb_data_analysis": 0.49605000000000005,
|
| 37890 |
+
"arena_elo": 1401.94,
|
| 37891 |
+
"arena_rank": 75,
|
| 37892 |
+
"arena_votes": 8017
|
| 37893 |
},
|
| 37894 |
{
|
| 37895 |
"name": "claude-opus-4-6-thinking",
|
| 37896 |
"arena_name": "claude-opus-4-6-thinking",
|
| 37897 |
"arena_org": "Anthropic",
|
| 37898 |
+
"arena_elo": 1502.2,
|
| 37899 |
+
"arena_rank": 2,
|
| 37900 |
+
"arena_votes": 8313
|
| 37901 |
},
|
| 37902 |
{
|
| 37903 |
"name": "gemini-3.1-pro-preview",
|
| 37904 |
"arena_name": "gemini-3.1-pro-preview",
|
| 37905 |
"arena_org": "Google",
|
| 37906 |
+
"arena_elo": 1500.28,
|
| 37907 |
"arena_rank": 3,
|
| 37908 |
+
"arena_votes": 4041
|
| 37909 |
},
|
| 37910 |
{
|
| 37911 |
"name": "grok-4.20-beta1",
|
| 37912 |
"arena_name": "grok-4.20-beta1",
|
| 37913 |
"arena_org": "xAI",
|
| 37914 |
+
"arena_elo": 1491.02,
|
| 37915 |
"arena_rank": 4,
|
| 37916 |
+
"arena_votes": 5280
|
| 37917 |
},
|
| 37918 |
{
|
| 37919 |
"name": "gemini-3-pro",
|
| 37920 |
"arena_name": "gemini-3-pro",
|
| 37921 |
"arena_org": "Google",
|
| 37922 |
+
"arena_elo": 1484.81,
|
| 37923 |
"arena_rank": 5,
|
| 37924 |
+
"arena_votes": 39923
|
| 37925 |
},
|
| 37926 |
{
|
| 37927 |
"name": "gpt-5.2-chat-latest-20260210",
|
| 37928 |
"arena_name": "gpt-5.2-chat-latest-20260210",
|
| 37929 |
"arena_org": "OpenAI",
|
| 37930 |
+
"arena_elo": 1478.51,
|
| 37931 |
+
"arena_rank": 7,
|
| 37932 |
+
"arena_votes": 5786
|
| 37933 |
},
|
| 37934 |
{
|
| 37935 |
"name": "gemini-3-flash",
|
| 37936 |
"arena_name": "gemini-3-flash",
|
| 37937 |
"arena_org": "Google",
|
| 37938 |
+
"arena_elo": 1472.72,
|
| 37939 |
+
"arena_rank": 8,
|
| 37940 |
+
"arena_votes": 30600
|
| 37941 |
},
|
| 37942 |
{
|
| 37943 |
"name": "grok-4.1-thinking",
|
| 37944 |
"arena_name": "grok-4.1-thinking",
|
| 37945 |
"arena_org": "xAI",
|
| 37946 |
+
"arena_elo": 1472.56,
|
| 37947 |
+
"arena_rank": 9,
|
| 37948 |
+
"arena_votes": 39309
|
| 37949 |
},
|
| 37950 |
{
|
| 37951 |
"name": "claude-opus-4-5-20251101-thinking-32k",
|
| 37952 |
"arena_name": "claude-opus-4-5-20251101-thinking-32k",
|
| 37953 |
"arena_org": "Anthropic",
|
| 37954 |
+
"arena_elo": 1470.41,
|
| 37955 |
+
"arena_rank": 10,
|
| 37956 |
+
"arena_votes": 32516
|
| 37957 |
},
|
| 37958 |
{
|
| 37959 |
"name": "dola-seed-2.0-preview",
|
| 37960 |
"arena_name": "dola-seed-2.0-preview",
|
| 37961 |
"arena_org": "Bytedance",
|
| 37962 |
+
"arena_elo": 1464.84,
|
| 37963 |
+
"arena_rank": 12,
|
| 37964 |
+
"arena_votes": 6712
|
| 37965 |
},
|
| 37966 |
{
|
| 37967 |
"name": "grok-4.1",
|
| 37968 |
"arena_name": "grok-4.1",
|
| 37969 |
"arena_org": "xAI",
|
| 37970 |
+
"arena_elo": 1462.35,
|
| 37971 |
+
"arena_rank": 13,
|
| 37972 |
+
"arena_votes": 43536
|
| 37973 |
},
|
| 37974 |
{
|
| 37975 |
"name": "gemini-3-flash (thinking-minimal)",
|
| 37976 |
"arena_name": "gemini-3-flash (thinking-minimal)",
|
| 37977 |
"arena_org": "Google",
|
| 37978 |
+
"arena_elo": 1461.84,
|
| 37979 |
+
"arena_rank": 14,
|
| 37980 |
+
"arena_votes": 22846
|
| 37981 |
},
|
| 37982 |
{
|
| 37983 |
+
"name": "gpt-5.4",
|
| 37984 |
+
"arena_name": "gpt-5.4",
|
| 37985 |
"arena_org": "OpenAI",
|
| 37986 |
+
"arena_elo": 1457.35,
|
| 37987 |
"arena_rank": 15,
|
| 37988 |
+
"arena_votes": 3417
|
| 37989 |
},
|
| 37990 |
{
|
| 37991 |
+
"name": "gpt-5.1-high",
|
| 37992 |
+
"arena_name": "gpt-5.1-high",
|
| 37993 |
+
"arena_org": "OpenAI",
|
| 37994 |
+
"arena_elo": 1455.32,
|
| 37995 |
"arena_rank": 17,
|
| 37996 |
+
"arena_votes": 36204
|
| 37997 |
},
|
| 37998 |
{
|
| 37999 |
"name": "ernie-5.0-0110",
|
| 38000 |
"arena_name": "ernie-5.0-0110",
|
| 38001 |
"arena_org": "Baidu",
|
| 38002 |
+
"arena_elo": 1450.91,
|
| 38003 |
+
"arena_rank": 20,
|
| 38004 |
+
"arena_votes": 15402
|
| 38005 |
+
},
|
| 38006 |
+
{
|
| 38007 |
+
"name": "qwen3.5-397b-a17b",
|
| 38008 |
+
"arena_name": "qwen3.5-397b-a17b",
|
| 38009 |
+
"arena_org": "Alibaba",
|
| 38010 |
+
"arena_elo": 1450.5,
|
| 38011 |
+
"arena_rank": 22,
|
| 38012 |
+
"arena_votes": 6836
|
| 38013 |
},
|
| 38014 |
{
|
| 38015 |
"name": "claude-sonnet-4-5-20250929-thinking-32k",
|
| 38016 |
"arena_name": "claude-sonnet-4-5-20250929-thinking-32k",
|
| 38017 |
"arena_org": "Anthropic",
|
| 38018 |
+
"arena_elo": 1449.91,
|
| 38019 |
+
"arena_rank": 23,
|
| 38020 |
+
"arena_votes": 50801
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38021 |
},
|
| 38022 |
{
|
| 38023 |
"name": "ernie-5.0-preview-1203",
|
| 38024 |
"arena_name": "ernie-5.0-preview-1203",
|
| 38025 |
"arena_org": "Baidu",
|
| 38026 |
+
"arena_elo": 1449.21,
|
| 38027 |
+
"arena_rank": 24,
|
| 38028 |
+
"arena_votes": 9712
|
| 38029 |
+
},
|
| 38030 |
+
{
|
| 38031 |
+
"name": "gemini-2.5-pro",
|
| 38032 |
+
"arena_name": "gemini-2.5-pro",
|
| 38033 |
+
"arena_org": "Google",
|
| 38034 |
+
"arena_elo": 1449.2,
|
| 38035 |
+
"arena_rank": 25,
|
| 38036 |
+
"arena_votes": 99103
|
| 38037 |
},
|
| 38038 |
{
|
| 38039 |
"name": "claude-opus-4-1-20250805-thinking-16k",
|
| 38040 |
"arena_name": "claude-opus-4-1-20250805-thinking-16k",
|
| 38041 |
"arena_org": "Anthropic",
|
| 38042 |
+
"arena_elo": 1448.47,
|
| 38043 |
+
"arena_rank": 26,
|
| 38044 |
+
"arena_votes": 49560
|
| 38045 |
},
|
| 38046 |
{
|
| 38047 |
"name": "claude-opus-4-1-20250805",
|
| 38048 |
"arena_name": "claude-opus-4-1-20250805",
|
| 38049 |
"arena_org": "Anthropic",
|
| 38050 |
+
"arena_elo": 1446.26,
|
| 38051 |
+
"arena_rank": 27,
|
| 38052 |
+
"arena_votes": 77173
|
| 38053 |
},
|
| 38054 |
{
|
| 38055 |
"name": "chatgpt-4o-latest-20250326",
|
| 38056 |
"arena_name": "chatgpt-4o-latest-20250326",
|
| 38057 |
"arena_org": "OpenAI",
|
| 38058 |
+
"arena_elo": 1442.85,
|
| 38059 |
+
"arena_rank": 29,
|
| 38060 |
+
"arena_votes": 82864
|
| 38061 |
},
|
| 38062 |
{
|
| 38063 |
"name": "gpt-5.2-high",
|
| 38064 |
"arena_name": "gpt-5.2-high",
|
| 38065 |
"arena_org": "OpenAI",
|
| 38066 |
+
"arena_elo": 1442.34,
|
| 38067 |
+
"arena_rank": 30,
|
| 38068 |
+
"arena_votes": 21161
|
| 38069 |
+
},
|
| 38070 |
+
{
|
| 38071 |
+
"name": "gpt-5.2",
|
| 38072 |
+
"arena_name": "gpt-5.2",
|
| 38073 |
+
"arena_org": "OpenAI",
|
| 38074 |
+
"arena_elo": 1438.54,
|
| 38075 |
+
"arena_rank": 32,
|
| 38076 |
+
"arena_votes": 18159
|
| 38077 |
},
|
| 38078 |
{
|
| 38079 |
"name": "gpt-5.1",
|
| 38080 |
"arena_name": "gpt-5.1",
|
| 38081 |
"arena_org": "OpenAI",
|
| 38082 |
+
"arena_elo": 1437.63,
|
| 38083 |
+
"arena_rank": 33,
|
| 38084 |
+
"arena_votes": 38793
|
| 38085 |
},
|
| 38086 |
{
|
| 38087 |
+
"name": "gemini-3.1-flash-lite-preview",
|
| 38088 |
+
"arena_name": "gemini-3.1-flash-lite-preview",
|
| 38089 |
+
"arena_org": "Google",
|
| 38090 |
+
"arena_elo": 1435.33,
|
| 38091 |
+
"arena_rank": 34,
|
| 38092 |
+
"arena_votes": 3829
|
| 38093 |
},
|
| 38094 |
{
|
| 38095 |
"name": "qwen3-max-preview",
|
| 38096 |
"arena_name": "qwen3-max-preview",
|
| 38097 |
"arena_org": "Alibaba",
|
| 38098 |
+
"arena_elo": 1434.25,
|
| 38099 |
+
"arena_rank": 35,
|
| 38100 |
+
"arena_votes": 27631
|
| 38101 |
},
|
| 38102 |
{
|
| 38103 |
"name": "kimi-k2.5-instant",
|
| 38104 |
"arena_name": "kimi-k2.5-instant",
|
| 38105 |
"arena_org": "Moonshot",
|
| 38106 |
+
"arena_elo": 1433.69,
|
| 38107 |
+
"arena_rank": 37,
|
| 38108 |
+
"arena_votes": 8823
|
| 38109 |
},
|
| 38110 |
{
|
| 38111 |
"name": "o3-2025-04-16",
|
| 38112 |
"arena_name": "o3-2025-04-16",
|
| 38113 |
"arena_org": "OpenAI",
|
| 38114 |
+
"arena_elo": 1432.14,
|
| 38115 |
+
"arena_rank": 38,
|
| 38116 |
+
"arena_votes": 60909
|
| 38117 |
},
|
| 38118 |
{
|
| 38119 |
"name": "kimi-k2-thinking-turbo",
|
| 38120 |
"arena_name": "kimi-k2-thinking-turbo",
|
| 38121 |
"arena_org": "Moonshot",
|
| 38122 |
+
"arena_elo": 1429.22,
|
| 38123 |
+
"arena_rank": 40,
|
| 38124 |
+
"arena_votes": 37861
|
| 38125 |
},
|
| 38126 |
{
|
| 38127 |
"name": "claude-opus-4-20250514-thinking-16k",
|
| 38128 |
"arena_name": "claude-opus-4-20250514-thinking-16k",
|
| 38129 |
"arena_org": "Anthropic",
|
| 38130 |
+
"arena_elo": 1423.51,
|
| 38131 |
+
"arena_rank": 45,
|
| 38132 |
+
"arena_votes": 37678
|
| 38133 |
},
|
| 38134 |
{
|
| 38135 |
"name": "deepseek-v3.2-exp-thinking",
|
| 38136 |
"arena_name": "deepseek-v3.2-exp-thinking",
|
| 38137 |
"arena_org": "DeepSeek",
|
| 38138 |
+
"arena_elo": 1423.37,
|
| 38139 |
+
"arena_rank": 46,
|
| 38140 |
+
"arena_votes": 8942
|
| 38141 |
},
|
| 38142 |
{
|
| 38143 |
"name": "grok-4-fast-chat",
|
| 38144 |
"arena_name": "grok-4-fast-chat",
|
| 38145 |
"arena_org": "xAI",
|
| 38146 |
+
"arena_elo": 1421.82,
|
| 38147 |
+
"arena_rank": 48,
|
| 38148 |
+
"arena_votes": 6964
|
| 38149 |
},
|
| 38150 |
{
|
| 38151 |
"name": "deepseek-v3.2-thinking",
|
| 38152 |
"arena_name": "deepseek-v3.2-thinking",
|
| 38153 |
"arena_org": "DeepSeek",
|
| 38154 |
+
"arena_elo": 1419.98,
|
| 38155 |
+
"arena_rank": 50,
|
| 38156 |
+
"arena_votes": 27370
|
| 38157 |
},
|
| 38158 |
{
|
| 38159 |
"name": "ernie-5.0-preview-1022",
|
| 38160 |
"arena_name": "ernie-5.0-preview-1022",
|
| 38161 |
"arena_org": "Baidu",
|
| 38162 |
+
"arena_elo": 1418.87,
|
| 38163 |
+
"arena_rank": 52,
|
| 38164 |
+
"arena_votes": 4555
|
| 38165 |
},
|
| 38166 |
{
|
| 38167 |
"name": "deepseek-v3.1",
|
| 38168 |
"arena_name": "deepseek-v3.1",
|
| 38169 |
"arena_org": "DeepSeek",
|
| 38170 |
+
"arena_elo": 1418.15,
|
| 38171 |
+
"arena_rank": 53,
|
| 38172 |
+
"arena_votes": 15192
|
| 38173 |
+
},
|
| 38174 |
+
{
|
| 38175 |
+
"name": "qwen3.5-122b-a10b",
|
| 38176 |
+
"arena_name": "qwen3.5-122b-a10b",
|
| 38177 |
+
"arena_org": "Alibaba",
|
| 38178 |
+
"arena_elo": 1418.02,
|
| 38179 |
+
"arena_rank": 54,
|
| 38180 |
+
"arena_votes": 3251
|
| 38181 |
},
|
| 38182 |
{
|
| 38183 |
"name": "deepseek-v3.1-thinking",
|
| 38184 |
"arena_name": "deepseek-v3.1-thinking",
|
| 38185 |
"arena_org": "DeepSeek",
|
| 38186 |
+
"arena_elo": 1417.13,
|
| 38187 |
+
"arena_rank": 55,
|
| 38188 |
+
"arena_votes": 11916
|
| 38189 |
},
|
| 38190 |
{
|
| 38191 |
"name": "kimi-k2-0905-preview",
|
| 38192 |
"arena_name": "kimi-k2-0905-preview",
|
| 38193 |
"arena_org": "Moonshot",
|
| 38194 |
+
"arena_elo": 1417.08,
|
| 38195 |
+
"arena_rank": 56,
|
| 38196 |
+
"arena_votes": 11907
|
| 38197 |
},
|
| 38198 |
{
|
| 38199 |
"name": "kimi-k2-0711-preview",
|
| 38200 |
"arena_name": "kimi-k2-0711-preview",
|
| 38201 |
"arena_org": "Moonshot",
|
| 38202 |
+
"arena_elo": 1416.59,
|
| 38203 |
+
"arena_rank": 57,
|
| 38204 |
+
"arena_votes": 28423
|
| 38205 |
},
|
| 38206 |
{
|
| 38207 |
"name": "deepseek-v3.1-terminus-thinking",
|
| 38208 |
"arena_name": "deepseek-v3.1-terminus-thinking",
|
| 38209 |
"arena_org": "DeepSeek",
|
| 38210 |
+
"arena_elo": 1415.64,
|
| 38211 |
+
"arena_rank": 59,
|
| 38212 |
+
"arena_votes": 3535
|
| 38213 |
},
|
| 38214 |
{
|
| 38215 |
+
"name": "qwen3-vl-235b-a22b-instruct",
|
| 38216 |
+
"arena_name": "qwen3-vl-235b-a22b-instruct",
|
| 38217 |
+
"arena_org": "Alibaba",
|
| 38218 |
+
"arena_elo": 1414.78,
|
| 38219 |
+
"arena_rank": 60,
|
| 38220 |
+
"arena_votes": 11596
|
| 38221 |
},
|
| 38222 |
{
|
| 38223 |
"name": "mistral-large-3",
|
| 38224 |
"arena_name": "mistral-large-3",
|
| 38225 |
"arena_org": "Mistral",
|
| 38226 |
+
"arena_elo": 1414.38,
|
| 38227 |
+
"arena_rank": 61,
|
| 38228 |
+
"arena_votes": 29075
|
| 38229 |
},
|
| 38230 |
{
|
| 38231 |
+
"name": "amazon-nova-experimental-chat-26-01-10",
|
| 38232 |
+
"arena_name": "amazon-nova-experimental-chat-26-01-10",
|
| 38233 |
+
"arena_org": "Amazon",
|
| 38234 |
+
"arena_elo": 1413.74,
|
| 38235 |
+
"arena_rank": 62,
|
| 38236 |
+
"arena_votes": 3387
|
| 38237 |
},
|
| 38238 |
{
|
| 38239 |
"name": "claude-opus-4-20250514",
|
| 38240 |
"arena_name": "claude-opus-4-20250514",
|
| 38241 |
"arena_org": "Anthropic",
|
| 38242 |
+
"arena_elo": 1412.74,
|
| 38243 |
+
"arena_rank": 64,
|
| 38244 |
+
"arena_votes": 45279
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38245 |
},
|
| 38246 |
{
|
| 38247 |
"name": "grok-3-preview-02-24",
|
| 38248 |
"arena_name": "grok-3-preview-02-24",
|
| 38249 |
"arena_org": "xAI",
|
| 38250 |
+
"arena_elo": 1411.22,
|
| 38251 |
+
"arena_rank": 65,
|
| 38252 |
+
"arena_votes": 33823
|
| 38253 |
},
|
| 38254 |
{
|
| 38255 |
"name": "gemini-2.5-flash",
|
| 38256 |
"arena_name": "gemini-2.5-flash",
|
| 38257 |
"arena_org": "Google",
|
| 38258 |
+
"arena_elo": 1410.57,
|
| 38259 |
+
"arena_rank": 66,
|
| 38260 |
+
"arena_votes": 98369
|
| 38261 |
+
},
|
| 38262 |
+
{
|
| 38263 |
+
"name": "qwen3.5-27b",
|
| 38264 |
+
"arena_name": "qwen3.5-27b",
|
| 38265 |
+
"arena_org": "Alibaba",
|
| 38266 |
+
"arena_elo": 1410.57,
|
| 38267 |
+
"arena_rank": 67,
|
| 38268 |
+
"arena_votes": 3368
|
| 38269 |
+
},
|
| 38270 |
+
{
|
| 38271 |
+
"name": "mistral-medium-2508",
|
| 38272 |
+
"arena_name": "mistral-medium-2508",
|
| 38273 |
+
"arena_org": "Mistral",
|
| 38274 |
+
"arena_elo": 1410.31,
|
| 38275 |
+
"arena_rank": 68,
|
| 38276 |
+
"arena_votes": 67631
|
| 38277 |
},
|
| 38278 |
{
|
| 38279 |
"name": "grok-4-fast-reasoning",
|
| 38280 |
"arena_name": "grok-4-fast-reasoning",
|
| 38281 |
"arena_org": "xAI",
|
| 38282 |
+
"arena_elo": 1403.45,
|
| 38283 |
+
"arena_rank": 73,
|
| 38284 |
+
"arena_votes": 18431
|
| 38285 |
+
},
|
| 38286 |
+
{
|
| 38287 |
+
"name": "qwen3.5-flash",
|
| 38288 |
+
"arena_name": "qwen3.5-flash",
|
| 38289 |
+
"arena_org": "Alibaba",
|
| 38290 |
+
"arena_elo": 1402,
|
| 38291 |
+
"arena_rank": 74,
|
| 38292 |
+
"arena_votes": 4330
|
| 38293 |
},
|
| 38294 |
{
|
| 38295 |
"name": "qwen3-235b-a22b-no-thinking",
|
| 38296 |
"arena_name": "qwen3-235b-a22b-no-thinking",
|
| 38297 |
"arena_org": "Alibaba",
|
| 38298 |
+
"arena_elo": 1401.92,
|
| 38299 |
+
"arena_rank": 76,
|
| 38300 |
+
"arena_votes": 39264
|
| 38301 |
},
|
| 38302 |
{
|
| 38303 |
"name": "longcat-flash-chat",
|
| 38304 |
"arena_name": "longcat-flash-chat",
|
| 38305 |
"arena_org": "Meituan",
|
| 38306 |
+
"arena_elo": 1400.09,
|
| 38307 |
+
"arena_rank": 79,
|
| 38308 |
"arena_votes": 11486
|
| 38309 |
},
|
| 38310 |
{
|
| 38311 |
"name": "claude-sonnet-4-20250514-thinking-32k",
|
| 38312 |
"arena_name": "claude-sonnet-4-20250514-thinking-32k",
|
| 38313 |
"arena_org": "Anthropic",
|
| 38314 |
+
"arena_elo": 1399.69,
|
| 38315 |
+
"arena_rank": 80,
|
| 38316 |
+
"arena_votes": 35948
|
| 38317 |
+
},
|
| 38318 |
+
{
|
| 38319 |
+
"name": "qwen3.5-35b-a3b",
|
| 38320 |
+
"arena_name": "qwen3.5-35b-a3b",
|
| 38321 |
+
"arena_org": "Alibaba",
|
| 38322 |
+
"arena_elo": 1395.41,
|
| 38323 |
+
"arena_rank": 83,
|
| 38324 |
+
"arena_votes": 3427
|
| 38325 |
},
|
| 38326 |
{
|
| 38327 |
"name": "qwen3-vl-235b-a22b-thinking",
|
| 38328 |
"arena_name": "qwen3-vl-235b-a22b-thinking",
|
| 38329 |
"arena_org": "Alibaba",
|
| 38330 |
+
"arena_elo": 1395.13,
|
| 38331 |
+
"arena_rank": 84,
|
| 38332 |
+
"arena_votes": 7919
|
| 38333 |
},
|
| 38334 |
{
|
| 38335 |
"name": "amazon-nova-experimental-chat-12-10",
|
| 38336 |
"arena_name": "amazon-nova-experimental-chat-12-10",
|
| 38337 |
"arena_org": "Amazon",
|
| 38338 |
+
"arena_elo": 1394.79,
|
| 38339 |
+
"arena_rank": 85,
|
| 38340 |
+
"arena_votes": 3696
|
| 38341 |
},
|
| 38342 |
{
|
| 38343 |
"name": "hunyuan-vision-1.5-thinking",
|
| 38344 |
"arena_name": "hunyuan-vision-1.5-thinking",
|
| 38345 |
"arena_org": "Tencent",
|
| 38346 |
+
"arena_elo": 1393.82,
|
| 38347 |
+
"arena_rank": 87,
|
| 38348 |
"arena_votes": 2216
|
| 38349 |
},
|
| 38350 |
{
|
| 38351 |
"name": "mai-1-preview",
|
| 38352 |
"arena_name": "mai-1-preview",
|
| 38353 |
"arena_org": "Microsoft AI",
|
| 38354 |
+
"arena_elo": 1391.92,
|
| 38355 |
+
"arena_rank": 88,
|
| 38356 |
+
"arena_votes": 18005
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38357 |
},
|
| 38358 |
{
|
| 38359 |
"name": "mimo-v2-flash (non-thinking)",
|
| 38360 |
"arena_name": "mimo-v2-flash (non-thinking)",
|
| 38361 |
"arena_org": "Xiaomi",
|
| 38362 |
+
"arena_elo": 1391.72,
|
| 38363 |
+
"arena_rank": 89,
|
| 38364 |
+
"arena_votes": 21546
|
| 38365 |
},
|
| 38366 |
{
|
| 38367 |
+
"name": "o4-mini-2025-04-16",
|
| 38368 |
+
"arena_name": "o4-mini-2025-04-16",
|
| 38369 |
+
"arena_org": "OpenAI",
|
| 38370 |
+
"arena_elo": 1390.66,
|
| 38371 |
+
"arena_rank": 90,
|
| 38372 |
+
"arena_votes": 46343
|
| 38373 |
},
|
| 38374 |
{
|
| 38375 |
"name": "claude-sonnet-4-20250514",
|
| 38376 |
"arena_name": "claude-sonnet-4-20250514",
|
| 38377 |
"arena_org": "Anthropic",
|
| 38378 |
+
"arena_elo": 1389.42,
|
| 38379 |
+
"arena_rank": 92,
|
| 38380 |
+
"arena_votes": 41340
|
| 38381 |
+
},
|
| 38382 |
+
{
|
| 38383 |
+
"name": "step-3.5-flash",
|
| 38384 |
+
"arena_name": "step-3.5-flash",
|
| 38385 |
+
"arena_org": "StepFun",
|
| 38386 |
+
"arena_elo": 1388.91,
|
| 38387 |
+
"arena_rank": 93,
|
| 38388 |
+
"arena_votes": 10401
|
| 38389 |
},
|
| 38390 |
{
|
| 38391 |
"name": "claude-3-7-sonnet-20250219-thinking-32k",
|
| 38392 |
"arena_name": "claude-3-7-sonnet-20250219-thinking-32k",
|
| 38393 |
"arena_org": "Anthropic",
|
| 38394 |
+
"arena_elo": 1387.87,
|
| 38395 |
+
"arena_rank": 95,
|
| 38396 |
+
"arena_votes": 39706
|
| 38397 |
},
|
| 38398 |
{
|
| 38399 |
"name": "mimo-v2-flash (thinking)",
|
| 38400 |
"arena_name": "mimo-v2-flash (thinking)",
|
| 38401 |
"arena_org": "Xiaomi",
|
| 38402 |
+
"arena_elo": 1386.76,
|
| 38403 |
+
"arena_rank": 96,
|
| 38404 |
+
"arena_votes": 10861
|
| 38405 |
},
|
| 38406 |
{
|
| 38407 |
"name": "hunyuan-t1-20250711",
|
| 38408 |
"arena_name": "hunyuan-t1-20250711",
|
| 38409 |
"arena_org": "Tencent",
|
| 38410 |
+
"arena_elo": 1386.64,
|
| 38411 |
+
"arena_rank": 97,
|
| 38412 |
+
"arena_votes": 4764
|
| 38413 |
},
|
| 38414 |
{
|
| 38415 |
"name": "minimax-m2.1-preview",
|
| 38416 |
"arena_name": "minimax-m2.1-preview",
|
| 38417 |
"arena_org": "MiniMax",
|
| 38418 |
+
"arena_elo": 1385.35,
|
| 38419 |
+
"arena_rank": 99,
|
| 38420 |
+
"arena_votes": 17078
|
| 38421 |
},
|
| 38422 |
{
|
| 38423 |
"name": "qwen3-30b-a3b-instruct-2507",
|
| 38424 |
"arena_name": "qwen3-30b-a3b-instruct-2507",
|
| 38425 |
"arena_org": "Alibaba",
|
| 38426 |
+
"arena_elo": 1383.69,
|
| 38427 |
+
"arena_rank": 101,
|
| 38428 |
+
"arena_votes": 23933
|
| 38429 |
},
|
| 38430 |
{
|
| 38431 |
"name": "hunyuan-turbos-20250416",
|
| 38432 |
"arena_name": "hunyuan-turbos-20250416",
|
| 38433 |
"arena_org": "Tencent",
|
| 38434 |
+
"arena_elo": 1382.65,
|
| 38435 |
+
"arena_rank": 102,
|
| 38436 |
+
"arena_votes": 10995
|
| 38437 |
},
|
| 38438 |
{
|
| 38439 |
"name": "gemini-2.5-flash-lite-preview-09-2025-no-thinking",
|
| 38440 |
"arena_name": "gemini-2.5-flash-lite-preview-09-2025-no-thinking",
|
| 38441 |
"arena_org": "Google",
|
| 38442 |
+
"arena_elo": 1379.69,
|
| 38443 |
+
"arena_rank": 104,
|
| 38444 |
+
"arena_votes": 46840
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38445 |
},
|
| 38446 |
{
|
| 38447 |
"name": "gemini-2.5-flash-lite-preview-06-17-thinking",
|
| 38448 |
"arena_name": "gemini-2.5-flash-lite-preview-06-17-thinking",
|
| 38449 |
"arena_org": "Google",
|
| 38450 |
+
"arena_elo": 1374.7,
|
| 38451 |
+
"arena_rank": 106,
|
| 38452 |
+
"arena_votes": 33641
|
| 38453 |
},
|
| 38454 |
{
|
| 38455 |
+
"name": "trinity-large",
|
| 38456 |
+
"arena_name": "trinity-large",
|
| 38457 |
+
"arena_org": "Arcee AI",
|
| 38458 |
+
"arena_elo": 1374.19,
|
| 38459 |
+
"arena_rank": 109,
|
| 38460 |
+
"arena_votes": 4191
|
| 38461 |
},
|
| 38462 |
{
|
| 38463 |
"name": "glm-4.7-flash",
|
| 38464 |
"arena_name": "glm-4.7-flash",
|
| 38465 |
"arena_org": "Z.ai",
|
| 38466 |
+
"arena_elo": 1366.99,
|
| 38467 |
+
"arena_rank": 114,
|
| 38468 |
+
"arena_votes": 11734
|
| 38469 |
+
},
|
| 38470 |
+
{
|
| 38471 |
+
"name": "minimax-m1",
|
| 38472 |
+
"arena_name": "minimax-m1",
|
| 38473 |
+
"arena_org": "MiniMax",
|
| 38474 |
+
"arena_elo": 1366.61,
|
| 38475 |
+
"arena_rank": 115,
|
| 38476 |
+
"arena_votes": 36424
|
| 38477 |
},
|
| 38478 |
{
|
| 38479 |
"name": "amazon-nova-experimental-chat-11-10",
|
| 38480 |
"arena_name": "amazon-nova-experimental-chat-11-10",
|
| 38481 |
"arena_org": "Amazon",
|
| 38482 |
+
"arena_elo": 1365.97,
|
| 38483 |
+
"arena_rank": 116,
|
| 38484 |
+
"arena_votes": 20934
|
| 38485 |
},
|
| 38486 |
{
|
| 38487 |
"name": "o3-mini-high",
|
| 38488 |
"arena_name": "o3-mini-high",
|
| 38489 |
"arena_org": "OpenAI",
|
| 38490 |
+
"arena_elo": 1363.85,
|
| 38491 |
+
"arena_rank": 118,
|
| 38492 |
"arena_votes": 18584
|
| 38493 |
},
|
| 38494 |
{
|
| 38495 |
"name": "grok-3-mini-high",
|
| 38496 |
"arena_name": "grok-3-mini-high",
|
| 38497 |
"arena_org": "xAI",
|
| 38498 |
+
"arena_elo": 1362.78,
|
| 38499 |
+
"arena_rank": 119,
|
| 38500 |
+
"arena_votes": 17400
|
| 38501 |
},
|
| 38502 |
{
|
| 38503 |
"name": "grok-3-mini-beta",
|
| 38504 |
"arena_name": "grok-3-mini-beta",
|
| 38505 |
"arena_org": "xAI",
|
| 38506 |
+
"arena_elo": 1356.88,
|
| 38507 |
+
"arena_rank": 122,
|
| 38508 |
+
"arena_votes": 23585
|
| 38509 |
},
|
| 38510 |
{
|
| 38511 |
"name": "intellect-3",
|
| 38512 |
"arena_name": "intellect-3",
|
| 38513 |
"arena_org": "Prime Intellect",
|
| 38514 |
+
"arena_elo": 1356.23,
|
| 38515 |
+
"arena_rank": 123,
|
| 38516 |
+
"arena_votes": 5285
|
| 38517 |
},
|
| 38518 |
{
|
| 38519 |
"name": "mistral-small-2506",
|
| 38520 |
"arena_name": "mistral-small-2506",
|
| 38521 |
"arena_org": "Mistral",
|
| 38522 |
+
"arena_elo": 1356.11,
|
| 38523 |
+
"arena_rank": 124,
|
| 38524 |
+
"arena_votes": 18219
|
| 38525 |
},
|
| 38526 |
{
|
| 38527 |
"name": "glm-4.5v",
|
| 38528 |
"arena_name": "glm-4.5v",
|
| 38529 |
"arena_org": "Z.ai",
|
| 38530 |
+
"arena_elo": 1352.95,
|
| 38531 |
+
"arena_rank": 128,
|
| 38532 |
+
"arena_votes": 4947
|
| 38533 |
},
|
| 38534 |
{
|
| 38535 |
"name": "amazon-nova-experimental-chat-10-20",
|
| 38536 |
"arena_name": "amazon-nova-experimental-chat-10-20",
|
| 38537 |
"arena_org": "Amazon",
|
| 38538 |
+
"arena_elo": 1350.71,
|
| 38539 |
+
"arena_rank": 130,
|
| 38540 |
+
"arena_votes": 11317
|
| 38541 |
},
|
| 38542 |
{
|
| 38543 |
"name": "hunyuan-turbos-20250226",
|
| 38544 |
"arena_name": "hunyuan-turbos-20250226",
|
| 38545 |
"arena_org": "Tencent",
|
| 38546 |
+
"arena_elo": 1348.74,
|
| 38547 |
+
"arena_rank": 131,
|
| 38548 |
"arena_votes": 2226
|
| 38549 |
},
|
| 38550 |
{
|
| 38551 |
"name": "amazon-nova-experimental-chat-10-09",
|
| 38552 |
"arena_name": "amazon-nova-experimental-chat-10-09",
|
| 38553 |
"arena_org": "Amazon",
|
| 38554 |
+
"arena_elo": 1347.31,
|
| 38555 |
+
"arena_rank": 133,
|
| 38556 |
+
"arena_votes": 2873
|
| 38557 |
+
},
|
| 38558 |
+
{
|
| 38559 |
+
"name": "ling-flash-2.0",
|
| 38560 |
+
"arena_name": "ling-flash-2.0",
|
| 38561 |
+
"arena_org": "Ant Group",
|
| 38562 |
+
"arena_elo": 1347.26,
|
| 38563 |
+
"arena_rank": 134,
|
| 38564 |
+
"arena_votes": 6988
|
| 38565 |
},
|
| 38566 |
{
|
| 38567 |
"name": "llama-3.1-nemotron-ultra-253b-v1",
|
| 38568 |
"arena_name": "llama-3.1-nemotron-ultra-253b-v1",
|
| 38569 |
"arena_org": "Nvidia",
|
| 38570 |
+
"arena_elo": 1347.18,
|
| 38571 |
+
"arena_rank": 135,
|
| 38572 |
"arena_votes": 2546
|
| 38573 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38574 |
{
|
| 38575 |
"name": "step-3",
|
| 38576 |
"arena_name": "step-3",
|
| 38577 |
"arena_org": "StepFun",
|
| 38578 |
+
"arena_elo": 1346.66,
|
| 38579 |
+
"arena_rank": 138,
|
| 38580 |
+
"arena_votes": 6565
|
| 38581 |
},
|
| 38582 |
{
|
| 38583 |
"name": "qwen-plus-0125",
|
| 38584 |
"arena_name": "qwen-plus-0125",
|
| 38585 |
"arena_org": "Alibaba",
|
| 38586 |
+
"arena_elo": 1346.13,
|
| 38587 |
+
"arena_rank": 139,
|
| 38588 |
"arena_votes": 5823
|
| 38589 |
},
|
| 38590 |
{
|
| 38591 |
"name": "glm-4-plus-0111",
|
| 38592 |
"arena_name": "glm-4-plus-0111",
|
| 38593 |
"arena_org": "Zhipu",
|
| 38594 |
+
"arena_elo": 1343.09,
|
| 38595 |
+
"arena_rank": 141,
|
| 38596 |
"arena_votes": 5760
|
| 38597 |
},
|
| 38598 |
{
|
| 38599 |
"name": "nvidia-llama-3.3-nemotron-super-49b-v1.5",
|
| 38600 |
"arena_name": "nvidia-llama-3.3-nemotron-super-49b-v1.5",
|
| 38601 |
"arena_org": "Nvidia",
|
| 38602 |
+
"arena_elo": 1341.09,
|
| 38603 |
+
"arena_rank": 144,
|
| 38604 |
+
"arena_votes": 3398
|
| 38605 |
},
|
| 38606 |
{
|
| 38607 |
"name": "hunyuan-turbo-0110",
|
| 38608 |
"arena_name": "hunyuan-turbo-0110",
|
| 38609 |
"arena_org": "Tencent",
|
| 38610 |
+
"arena_elo": 1340.39,
|
| 38611 |
+
"arena_rank": 145,
|
| 38612 |
"arena_votes": 2295
|
| 38613 |
},
|
| 38614 |
{
|
| 38615 |
"name": "nova-2-lite",
|
| 38616 |
"arena_name": "nova-2-lite",
|
| 38617 |
"arena_org": "Amazon",
|
| 38618 |
+
"arena_elo": 1337.58,
|
| 38619 |
+
"arena_rank": 146,
|
| 38620 |
+
"arena_votes": 12099
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38621 |
},
|
| 38622 |
{
|
| 38623 |
"name": "grok-2-2024-08-13",
|
| 38624 |
"arena_name": "grok-2-2024-08-13",
|
| 38625 |
"arena_org": "xAI",
|
| 38626 |
+
"arena_elo": 1335,
|
| 38627 |
+
"arena_rank": 150,
|
| 38628 |
"arena_votes": 63495
|
| 38629 |
},
|
| 38630 |
+
{
|
| 38631 |
+
"name": "llama-3.1-405b-instruct-bf16",
|
| 38632 |
+
"arena_name": "llama-3.1-405b-instruct-bf16",
|
| 38633 |
+
"arena_org": "Meta",
|
| 38634 |
+
"arena_elo": 1334.93,
|
| 38635 |
+
"arena_rank": 151,
|
| 38636 |
+
"arena_votes": 41392
|
| 38637 |
+
},
|
| 38638 |
{
|
| 38639 |
"name": "gemini-advanced-0514",
|
| 38640 |
"arena_name": "gemini-advanced-0514",
|
| 38641 |
"arena_org": "Google",
|
| 38642 |
+
"arena_elo": 1334.6,
|
| 38643 |
+
"arena_rank": 153,
|
| 38644 |
"arena_votes": 50142
|
| 38645 |
},
|
| 38646 |
{
|
| 38647 |
"name": "step-2-16k-exp-202412",
|
| 38648 |
"arena_name": "step-2-16k-exp-202412",
|
| 38649 |
"arena_org": "StepFun",
|
| 38650 |
+
"arena_elo": 1334.01,
|
| 38651 |
+
"arena_rank": 154,
|
| 38652 |
"arena_votes": 4829
|
| 38653 |
},
|
| 38654 |
{
|
| 38655 |
"name": "llama-3.1-405b-instruct-fp8",
|
| 38656 |
"arena_name": "llama-3.1-405b-instruct-fp8",
|
| 38657 |
"arena_org": "Meta",
|
| 38658 |
+
"arena_elo": 1333.11,
|
| 38659 |
+
"arena_rank": 155,
|
| 38660 |
"arena_votes": 59655
|
| 38661 |
},
|
| 38662 |
{
|
| 38663 |
"name": "olmo-3.1-32b-instruct",
|
| 38664 |
"arena_name": "olmo-3.1-32b-instruct",
|
| 38665 |
"arena_org": "Ai2",
|
| 38666 |
+
"arena_elo": 1330.62,
|
| 38667 |
+
"arena_rank": 156,
|
| 38668 |
+
"arena_votes": 12238
|
| 38669 |
},
|
| 38670 |
{
|
| 38671 |
"name": "molmo-2-8b",
|
| 38672 |
"arena_name": "molmo-2-8b",
|
| 38673 |
"arena_org": "Ai2",
|
| 38674 |
+
"arena_elo": 1328.67,
|
| 38675 |
+
"arena_rank": 157,
|
| 38676 |
+
"arena_votes": 812
|
| 38677 |
},
|
| 38678 |
{
|
| 38679 |
"name": "yi-lightning",
|
| 38680 |
"arena_name": "yi-lightning",
|
| 38681 |
"arena_org": "01 AI",
|
| 38682 |
+
"arena_elo": 1328.37,
|
| 38683 |
+
"arena_rank": 158,
|
| 38684 |
"arena_votes": 27340,
|
| 38685 |
"aider_pass_rate": 0.496
|
| 38686 |
},
|
|
|
|
| 38688 |
"name": "llama-3.3-nemotron-49b-super-v1",
|
| 38689 |
"arena_name": "llama-3.3-nemotron-49b-super-v1",
|
| 38690 |
"arena_org": "Nvidia",
|
| 38691 |
+
"arena_elo": 1327.08,
|
| 38692 |
+
"arena_rank": 161,
|
| 38693 |
"arena_votes": 2230
|
| 38694 |
},
|
| 38695 |
{
|
| 38696 |
"name": "hunyuan-large-2025-02-10",
|
| 38697 |
"arena_name": "hunyuan-large-2025-02-10",
|
| 38698 |
"arena_org": "Tencent",
|
| 38699 |
+
"arena_elo": 1326.31,
|
| 38700 |
+
"arena_rank": 162,
|
| 38701 |
"arena_votes": 3738
|
| 38702 |
},
|
| 38703 |
{
|
| 38704 |
"name": "deepseek-v2.5-1210",
|
| 38705 |
"arena_name": "deepseek-v2.5-1210",
|
| 38706 |
"arena_org": "DeepSeek",
|
| 38707 |
+
"arena_elo": 1323.15,
|
| 38708 |
+
"arena_rank": 164,
|
| 38709 |
"arena_votes": 6793,
|
| 38710 |
"aider_pass_rate": 0.586
|
| 38711 |
},
|
|
|
|
| 38713 |
"name": "gemini-1.5-pro-001",
|
| 38714 |
"arena_name": "gemini-1.5-pro-001",
|
| 38715 |
"arena_org": "Google",
|
| 38716 |
+
"arena_elo": 1322.88,
|
| 38717 |
+
"arena_rank": 166,
|
| 38718 |
"arena_votes": 79132,
|
| 38719 |
"aider_pass_rate": 0.45899999999999996
|
| 38720 |
},
|
|
|
|
| 38722 |
"name": "llama-4-scout-17b-16e-instruct",
|
| 38723 |
"arena_name": "llama-4-scout-17b-16e-instruct",
|
| 38724 |
"arena_org": "Meta",
|
| 38725 |
+
"arena_elo": 1322.18,
|
| 38726 |
+
"arena_rank": 167,
|
| 38727 |
+
"arena_votes": 31023
|
| 38728 |
},
|
| 38729 |
{
|
| 38730 |
"name": "step-1o-turbo-202506",
|
| 38731 |
"arena_name": "step-1o-turbo-202506",
|
| 38732 |
"arena_org": "StepFun",
|
| 38733 |
+
"arena_elo": 1321.94,
|
| 38734 |
+
"arena_rank": 168,
|
| 38735 |
+
"arena_votes": 9606
|
| 38736 |
},
|
| 38737 |
{
|
| 38738 |
"name": "ring-flash-2.0",
|
| 38739 |
"arena_name": "ring-flash-2.0",
|
| 38740 |
"arena_org": "Ant Group",
|
| 38741 |
+
"arena_elo": 1320.4,
|
| 38742 |
+
"arena_rank": 171,
|
| 38743 |
+
"arena_votes": 7147
|
| 38744 |
+
},
|
| 38745 |
+
{
|
| 38746 |
+
"name": "gemma-3n-e4b-it",
|
| 38747 |
+
"arena_name": "gemma-3n-e4b-it",
|
| 38748 |
+
"arena_org": "Google",
|
| 38749 |
+
"arena_elo": 1319.28,
|
| 38750 |
+
"arena_rank": 172,
|
| 38751 |
+
"arena_votes": 23170
|
| 38752 |
},
|
| 38753 |
{
|
| 38754 |
"name": "glm-4-plus",
|
| 38755 |
"arena_name": "glm-4-plus",
|
| 38756 |
"arena_org": "Zhipu AI",
|
| 38757 |
+
"arena_elo": 1319.23,
|
| 38758 |
+
"arena_rank": 173,
|
| 38759 |
"arena_votes": 26134
|
| 38760 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38761 |
{
|
| 38762 |
"name": "qwen-max-0919",
|
| 38763 |
"arena_name": "qwen-max-0919",
|
| 38764 |
"arena_org": "Alibaba",
|
| 38765 |
+
"arena_elo": 1317.9,
|
| 38766 |
+
"arena_rank": 175,
|
| 38767 |
"arena_votes": 16479
|
| 38768 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38769 |
{
|
| 38770 |
"name": "nvidia-nemotron-3-nano-30b-a3b-bf16",
|
| 38771 |
"arena_name": "nvidia-nemotron-3-nano-30b-a3b-bf16",
|
| 38772 |
"arena_org": "Nvidia",
|
| 38773 |
+
"arena_elo": 1317.23,
|
| 38774 |
+
"arena_rank": 177,
|
| 38775 |
+
"arena_votes": 15394
|
| 38776 |
+
},
|
| 38777 |
+
{
|
| 38778 |
+
"name": "gpt-oss-20b",
|
| 38779 |
+
"arena_name": "gpt-oss-20b",
|
| 38780 |
+
"arena_org": "OpenAI",
|
| 38781 |
+
"arena_elo": 1317.07,
|
| 38782 |
+
"arena_rank": 178,
|
| 38783 |
+
"arena_votes": 10751
|
| 38784 |
},
|
| 38785 |
{
|
| 38786 |
"name": "qwen2.5-plus-1127",
|
| 38787 |
"arena_name": "qwen2.5-plus-1127",
|
| 38788 |
"arena_org": "Alibaba",
|
| 38789 |
+
"arena_elo": 1315.3,
|
| 38790 |
+
"arena_rank": 179,
|
| 38791 |
"arena_votes": 10179
|
| 38792 |
},
|
| 38793 |
{
|
| 38794 |
"name": "athene-v2-chat",
|
| 38795 |
"arena_name": "athene-v2-chat",
|
| 38796 |
"arena_org": "NexusFlow",
|
| 38797 |
+
"arena_elo": 1314.36,
|
| 38798 |
+
"arena_rank": 180,
|
| 38799 |
"arena_votes": 24746
|
| 38800 |
},
|
| 38801 |
{
|
| 38802 |
"name": "gpt-4-1106-preview",
|
| 38803 |
"arena_name": "gpt-4-1106-preview",
|
| 38804 |
"arena_org": "OpenAI",
|
| 38805 |
+
"arena_elo": 1312.77,
|
| 38806 |
+
"arena_rank": 183,
|
| 38807 |
"arena_votes": 100107,
|
| 38808 |
"aider_pass_rate": 0.519
|
| 38809 |
},
|
|
|
|
| 38811 |
"name": "hunyuan-standard-2025-02-10",
|
| 38812 |
"arena_name": "hunyuan-standard-2025-02-10",
|
| 38813 |
"arena_org": "Tencent",
|
| 38814 |
+
"arena_elo": 1311.44,
|
| 38815 |
+
"arena_rank": 184,
|
| 38816 |
"arena_votes": 3905
|
| 38817 |
},
|
| 38818 |
{
|
| 38819 |
"name": "mercury",
|
| 38820 |
"arena_name": "mercury",
|
| 38821 |
"arena_org": "Inception AI",
|
| 38822 |
+
"arena_elo": 1308.22,
|
| 38823 |
+
"arena_rank": 186,
|
| 38824 |
+
"arena_votes": 1884
|
| 38825 |
},
|
| 38826 |
{
|
| 38827 |
"name": "grok-2-mini-2024-08-13",
|
| 38828 |
"arena_name": "grok-2-mini-2024-08-13",
|
| 38829 |
"arena_org": "xAI",
|
| 38830 |
+
"arena_elo": 1307.89,
|
| 38831 |
+
"arena_rank": 187,
|
| 38832 |
"arena_votes": 52574
|
| 38833 |
},
|
| 38834 |
+
{
|
| 38835 |
+
"name": "olmo-3-32b-think",
|
| 38836 |
+
"arena_name": "olmo-3-32b-think",
|
| 38837 |
+
"arena_org": "Ai2",
|
| 38838 |
+
"arena_elo": 1305.95,
|
| 38839 |
+
"arena_rank": 189,
|
| 38840 |
+
"arena_votes": 5863
|
| 38841 |
+
},
|
| 38842 |
{
|
| 38843 |
"name": "athene-70b-0725",
|
| 38844 |
"arena_name": "athene-70b-0725",
|
| 38845 |
"arena_org": "NexusFlow",
|
| 38846 |
+
"arena_elo": 1305.85,
|
| 38847 |
+
"arena_rank": 190,
|
| 38848 |
"arena_votes": 19622
|
| 38849 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38850 |
{
|
| 38851 |
"name": "magistral-medium-2506",
|
| 38852 |
"arena_name": "magistral-medium-2506",
|
| 38853 |
"arena_org": "Mistral",
|
| 38854 |
+
"arena_elo": 1304.55,
|
| 38855 |
+
"arena_rank": 192,
|
| 38856 |
+
"arena_votes": 11974
|
| 38857 |
},
|
| 38858 |
{
|
| 38859 |
"name": "mistral-small-3.1-24b-instruct-2503",
|
| 38860 |
"arena_name": "mistral-small-3.1-24b-instruct-2503",
|
| 38861 |
"arena_org": "Mistral",
|
| 38862 |
+
"arena_elo": 1304.27,
|
| 38863 |
+
"arena_rank": 193,
|
| 38864 |
+
"arena_votes": 33870
|
| 38865 |
},
|
| 38866 |
{
|
| 38867 |
"name": "hunyuan-large-vision",
|
| 38868 |
"arena_name": "hunyuan-large-vision",
|
| 38869 |
"arena_org": "Tencent",
|
| 38870 |
+
"arena_elo": 1295.76,
|
| 38871 |
+
"arena_rank": 197,
|
| 38872 |
+
"arena_votes": 5563
|
| 38873 |
},
|
| 38874 |
{
|
| 38875 |
"name": "amazon-nova-pro-v1.0",
|
| 38876 |
"arena_name": "amazon-nova-pro-v1.0",
|
| 38877 |
"arena_org": "Amazon",
|
| 38878 |
+
"arena_elo": 1290.05,
|
| 38879 |
+
"arena_rank": 199,
|
| 38880 |
"arena_votes": 24753
|
| 38881 |
},
|
| 38882 |
{
|
| 38883 |
"name": "reka-core-20240904",
|
| 38884 |
"arena_name": "reka-core-20240904",
|
| 38885 |
"arena_org": "Reka AI",
|
| 38886 |
+
"arena_elo": 1287.62,
|
| 38887 |
+
"arena_rank": 202,
|
| 38888 |
"arena_votes": 7309
|
| 38889 |
},
|
| 38890 |
{
|
| 38891 |
"name": "ibm-granite-h-small",
|
| 38892 |
"arena_name": "ibm-granite-h-small",
|
| 38893 |
"arena_org": "IBM",
|
| 38894 |
+
"arena_elo": 1286.66,
|
| 38895 |
+
"arena_rank": 203,
|
| 38896 |
+
"arena_votes": 5619
|
| 38897 |
},
|
| 38898 |
{
|
| 38899 |
"name": "gpt-4-0314",
|
| 38900 |
"arena_name": "gpt-4-0314",
|
| 38901 |
"arena_org": "OpenAI",
|
| 38902 |
+
"arena_elo": 1286.55,
|
| 38903 |
+
"arena_rank": 204,
|
| 38904 |
"arena_votes": 54167,
|
| 38905 |
"aider_pass_rate": 0.504
|
| 38906 |
},
|
|
|
|
| 38908 |
"name": "llama-3.1-nemotron-51b-instruct",
|
| 38909 |
"arena_name": "llama-3.1-nemotron-51b-instruct",
|
| 38910 |
"arena_org": "Nvidia",
|
| 38911 |
+
"arena_elo": 1285.97,
|
| 38912 |
+
"arena_rank": 206,
|
| 38913 |
"arena_votes": 3749
|
| 38914 |
},
|
| 38915 |
{
|
| 38916 |
"name": "gemini-1.5-flash-001",
|
| 38917 |
"arena_name": "gemini-1.5-flash-001",
|
| 38918 |
"arena_org": "Google",
|
| 38919 |
+
"arena_elo": 1285.44,
|
| 38920 |
+
"arena_rank": 207,
|
| 38921 |
"arena_votes": 62823
|
| 38922 |
},
|
| 38923 |
{
|
| 38924 |
"name": "olmo-3.1-32b-think",
|
| 38925 |
"arena_name": "olmo-3.1-32b-think",
|
| 38926 |
"arena_org": "Ai2",
|
| 38927 |
+
"arena_elo": 1284.9,
|
| 38928 |
+
"arena_rank": 208,
|
| 38929 |
+
"arena_votes": 8441
|
| 38930 |
},
|
| 38931 |
{
|
| 38932 |
"name": "nemotron-4-340b-instruct",
|
| 38933 |
"arena_name": "nemotron-4-340b-instruct",
|
| 38934 |
"arena_org": "Nvidia",
|
| 38935 |
+
"arena_elo": 1277.06,
|
| 38936 |
+
"arena_rank": 211,
|
| 38937 |
"arena_votes": 19661
|
| 38938 |
},
|
| 38939 |
{
|
| 38940 |
"name": "llama-3-70b-instruct",
|
| 38941 |
"arena_name": "llama-3-70b-instruct",
|
| 38942 |
"arena_org": "Meta",
|
| 38943 |
+
"arena_elo": 1275.7,
|
| 38944 |
+
"arena_rank": 213,
|
| 38945 |
"arena_votes": 156880
|
| 38946 |
},
|
| 38947 |
{
|
| 38948 |
"name": "mistral-small-24b-instruct-2501",
|
| 38949 |
"arena_name": "mistral-small-24b-instruct-2501",
|
| 38950 |
"arena_org": "Mistral",
|
| 38951 |
+
"arena_elo": 1273.69,
|
| 38952 |
+
"arena_rank": 215,
|
| 38953 |
"arena_votes": 14677
|
| 38954 |
},
|
| 38955 |
{
|
| 38956 |
"name": "glm-4-0520",
|
| 38957 |
"arena_name": "glm-4-0520",
|
| 38958 |
"arena_org": "Zhipu AI",
|
| 38959 |
+
"arena_elo": 1273.12,
|
| 38960 |
+
"arena_rank": 216,
|
| 38961 |
"arena_votes": 9788
|
| 38962 |
},
|
| 38963 |
{
|
| 38964 |
"name": "reka-flash-20240904",
|
| 38965 |
"arena_name": "reka-flash-20240904",
|
| 38966 |
"arena_org": "Reka AI",
|
| 38967 |
+
"arena_elo": 1271.86,
|
| 38968 |
+
"arena_rank": 217,
|
| 38969 |
"arena_votes": 7537
|
| 38970 |
},
|
| 38971 |
{
|
| 38972 |
"name": "c4ai-aya-expanse-32b",
|
| 38973 |
"arena_name": "c4ai-aya-expanse-32b",
|
| 38974 |
"arena_org": "Cohere",
|
| 38975 |
+
"arena_elo": 1266.85,
|
| 38976 |
+
"arena_rank": 219,
|
| 38977 |
"arena_votes": 27123
|
| 38978 |
},
|
| 38979 |
{
|
| 38980 |
"name": "amazon-nova-lite-v1.0",
|
| 38981 |
"arena_name": "amazon-nova-lite-v1.0",
|
| 38982 |
"arena_org": "Amazon",
|
| 38983 |
+
"arena_elo": 1260.52,
|
| 38984 |
+
"arena_rank": 225,
|
| 38985 |
"arena_votes": 19376
|
| 38986 |
},
|
| 38987 |
{
|
| 38988 |
"name": "olmo-2-0325-32b-instruct",
|
| 38989 |
"arena_name": "olmo-2-0325-32b-instruct",
|
| 38990 |
"arena_org": "Ai2",
|
| 38991 |
+
"arena_elo": 1251.78,
|
| 38992 |
+
"arena_rank": 228,
|
| 38993 |
"arena_votes": 3335
|
| 38994 |
},
|
| 38995 |
{
|
| 38996 |
"name": "amazon-nova-micro-v1.0",
|
| 38997 |
"arena_name": "amazon-nova-micro-v1.0",
|
| 38998 |
"arena_org": "Amazon",
|
| 38999 |
+
"arena_elo": 1240.73,
|
| 39000 |
+
"arena_rank": 231,
|
| 39001 |
"arena_votes": 19355
|
| 39002 |
},
|
| 39003 |
{
|
| 39004 |
"name": "ministral-8b-2410",
|
| 39005 |
"arena_name": "ministral-8b-2410",
|
| 39006 |
"arena_org": "Mistral",
|
| 39007 |
+
"arena_elo": 1236.86,
|
| 39008 |
+
"arena_rank": 233,
|
| 39009 |
"arena_votes": 4780
|
| 39010 |
},
|
| 39011 |
{
|
| 39012 |
"name": "gemini-pro-dev-api",
|
| 39013 |
"arena_name": "gemini-pro-dev-api",
|
| 39014 |
"arena_org": "Google",
|
| 39015 |
+
"arena_elo": 1234.71,
|
| 39016 |
+
"arena_rank": 234,
|
| 39017 |
"arena_votes": 18352
|
| 39018 |
},
|
| 39019 |
{
|
| 39020 |
"name": "hunyuan-standard-256k",
|
| 39021 |
"arena_name": "hunyuan-standard-256k",
|
| 39022 |
"arena_org": "Tencent",
|
| 39023 |
+
"arena_elo": 1233.25,
|
| 39024 |
+
"arena_rank": 236,
|
| 39025 |
"arena_votes": 2729
|
| 39026 |
},
|
| 39027 |
{
|
| 39028 |
"name": "reka-flash-21b-20240226-online",
|
| 39029 |
"arena_name": "reka-flash-21b-20240226-online",
|
| 39030 |
"arena_org": "Reka AI",
|
| 39031 |
+
"arena_elo": 1232.92,
|
| 39032 |
+
"arena_rank": 237,
|
| 39033 |
"arena_votes": 15451
|
| 39034 |
},
|
| 39035 |
{
|
| 39036 |
"name": "reka-flash-21b-20240226",
|
| 39037 |
"arena_name": "reka-flash-21b-20240226",
|
| 39038 |
"arena_org": "Reka AI",
|
| 39039 |
+
"arena_elo": 1226.17,
|
| 39040 |
+
"arena_rank": 241,
|
| 39041 |
"arena_votes": 24806
|
| 39042 |
},
|
| 39043 |
{
|
| 39044 |
"name": "c4ai-aya-expanse-8b",
|
| 39045 |
"arena_name": "c4ai-aya-expanse-8b",
|
| 39046 |
"arena_org": "Cohere",
|
| 39047 |
+
"arena_elo": 1222.84,
|
| 39048 |
+
"arena_rank": 244,
|
| 39049 |
"arena_votes": 9827
|
| 39050 |
},
|
| 39051 |
{
|
| 39052 |
"name": "mistral-medium",
|
| 39053 |
"arena_name": "mistral-medium",
|
| 39054 |
"arena_org": "Mistral",
|
| 39055 |
+
"arena_elo": 1222.6,
|
| 39056 |
+
"arena_rank": 245,
|
| 39057 |
"arena_votes": 34552
|
| 39058 |
},
|
| 39059 |
{
|
| 39060 |
"name": "gemini-pro",
|
| 39061 |
"arena_name": "gemini-pro",
|
| 39062 |
"arena_org": "Google",
|
| 39063 |
+
"arena_elo": 1221.39,
|
| 39064 |
+
"arena_rank": 246,
|
| 39065 |
"arena_votes": 6390
|
| 39066 |
},
|
| 39067 |
{
|
| 39068 |
"name": "gpt-3.5-turbo-1106",
|
| 39069 |
"arena_name": "gpt-3.5-turbo-1106",
|
| 39070 |
"arena_org": "OpenAI",
|
| 39071 |
+
"arena_elo": 1202,
|
| 39072 |
+
"arena_rank": 253,
|
| 39073 |
"arena_votes": 16616,
|
| 39074 |
"aider_pass_rate": 0.455
|
| 39075 |
},
|
|
|
|
| 39077 |
"name": "dbrx-instruct-preview",
|
| 39078 |
"arena_name": "dbrx-instruct-preview",
|
| 39079 |
"arena_org": "Databricks",
|
| 39080 |
+
"arena_elo": 1194.56,
|
| 39081 |
+
"arena_rank": 257,
|
| 39082 |
"arena_votes": 32196
|
| 39083 |
},
|
| 39084 |
{
|
| 39085 |
"name": "wizardlm-70b",
|
| 39086 |
"arena_name": "wizardlm-70b",
|
| 39087 |
"arena_org": "Microsoft",
|
| 39088 |
+
"arena_elo": 1184.28,
|
| 39089 |
+
"arena_rank": 260,
|
| 39090 |
"arena_votes": 8214
|
| 39091 |
},
|
| 39092 |
{
|
| 39093 |
"name": "snowflake-arctic-instruct",
|
| 39094 |
"arena_name": "snowflake-arctic-instruct",
|
| 39095 |
"arena_org": "Snowflake",
|
| 39096 |
+
"arena_elo": 1179.08,
|
| 39097 |
+
"arena_rank": 267,
|
| 39098 |
"arena_votes": 32836
|
| 39099 |
},
|
| 39100 |
{
|
| 39101 |
"name": "tulu-2-dpo-70b",
|
| 39102 |
"arena_name": "tulu-2-dpo-70b",
|
| 39103 |
"arena_org": "AllenAI/UW",
|
| 39104 |
+
"arena_elo": 1177.66,
|
| 39105 |
+
"arena_rank": 269,
|
| 39106 |
"arena_votes": 6534
|
| 39107 |
},
|
| 39108 |
{
|
| 39109 |
"name": "vicuna-33b",
|
| 39110 |
"arena_name": "vicuna-33b",
|
| 39111 |
"arena_org": "LMSYS",
|
| 39112 |
+
"arena_elo": 1172.41,
|
| 39113 |
+
"arena_rank": 271,
|
| 39114 |
"arena_votes": 22479
|
| 39115 |
},
|
| 39116 |
{
|
| 39117 |
"name": "llama-2-70b-chat",
|
| 39118 |
"arena_name": "llama-2-70b-chat",
|
| 39119 |
"arena_org": "Meta",
|
| 39120 |
+
"arena_elo": 1170.44,
|
| 39121 |
+
"arena_rank": 274,
|
| 39122 |
"arena_votes": 38491
|
| 39123 |
},
|
| 39124 |
{
|
| 39125 |
"name": "llama2-70b-steerlm-chat",
|
| 39126 |
"arena_name": "llama2-70b-steerlm-chat",
|
| 39127 |
"arena_org": "Nvidia",
|
| 39128 |
+
"arena_elo": 1154.95,
|
| 39129 |
+
"arena_rank": 280,
|
| 39130 |
"arena_votes": 3584
|
| 39131 |
},
|
| 39132 |
{
|
| 39133 |
"name": "dolphin-2.2.1-mistral-7b",
|
| 39134 |
"arena_name": "dolphin-2.2.1-mistral-7b",
|
| 39135 |
"arena_org": "Cognitive Computations",
|
| 39136 |
+
"arena_elo": 1151.59,
|
| 39137 |
+
"arena_rank": 282,
|
| 39138 |
"arena_votes": 1679
|
| 39139 |
},
|
| 39140 |
{
|
| 39141 |
"name": "mpt-30b-chat",
|
| 39142 |
"arena_name": "mpt-30b-chat",
|
| 39143 |
"arena_org": "MosaicML",
|
| 39144 |
+
"arena_elo": 1149.72,
|
| 39145 |
+
"arena_rank": 283,
|
| 39146 |
"arena_votes": 2571
|
| 39147 |
},
|
| 39148 |
{
|
| 39149 |
"name": "wizardlm-13b",
|
| 39150 |
"arena_name": "wizardlm-13b",
|
| 39151 |
"arena_org": "Microsoft",
|
| 39152 |
+
"arena_elo": 1148.75,
|
| 39153 |
+
"arena_rank": 285,
|
| 39154 |
"arena_votes": 7046
|
| 39155 |
},
|
| 39156 |
{
|
| 39157 |
"name": "falcon-180b-chat",
|
| 39158 |
"arena_name": "falcon-180b-chat",
|
| 39159 |
"arena_org": "TII",
|
| 39160 |
+
"arena_elo": 1146.58,
|
| 39161 |
+
"arena_rank": 286,
|
| 39162 |
"arena_votes": 1295
|
| 39163 |
},
|
| 39164 |
{
|
| 39165 |
"name": "phi-3-mini-4k-instruct-june-2024",
|
| 39166 |
"arena_name": "phi-3-mini-4k-instruct-june-2024",
|
| 39167 |
"arena_org": "Microsoft",
|
| 39168 |
+
"arena_elo": 1142.66,
|
| 39169 |
+
"arena_rank": 288,
|
| 39170 |
"arena_votes": 12296
|
| 39171 |
},
|
| 39172 |
{
|
| 39173 |
"name": "llama-2-13b-chat",
|
| 39174 |
"arena_name": "llama-2-13b-chat",
|
| 39175 |
"arena_org": "Meta",
|
| 39176 |
+
"arena_elo": 1141.1,
|
| 39177 |
+
"arena_rank": 289,
|
| 39178 |
"arena_votes": 19171
|
| 39179 |
},
|
| 39180 |
{
|
| 39181 |
"name": "vicuna-13b",
|
| 39182 |
"arena_name": "vicuna-13b",
|
| 39183 |
"arena_org": "LMSYS",
|
| 39184 |
+
"arena_elo": 1140.48,
|
| 39185 |
+
"arena_rank": 290,
|
| 39186 |
"arena_votes": 19366
|
| 39187 |
},
|
| 39188 |
{
|
| 39189 |
"name": "qwen-14b-chat",
|
| 39190 |
"arena_name": "qwen-14b-chat",
|
| 39191 |
"arena_org": "Alibaba",
|
| 39192 |
+
"arena_elo": 1138.13,
|
| 39193 |
+
"arena_rank": 291,
|
| 39194 |
"arena_votes": 4964
|
| 39195 |
},
|
| 39196 |
{
|
| 39197 |
"name": "palm-2",
|
| 39198 |
"arena_name": "palm-2",
|
| 39199 |
"arena_org": "Google",
|
| 39200 |
+
"arena_elo": 1136.74,
|
| 39201 |
+
"arena_rank": 292,
|
| 39202 |
"arena_votes": 8554
|
| 39203 |
},
|
| 39204 |
{
|
| 39205 |
"name": "codellama-34b-instruct",
|
| 39206 |
"arena_name": "codellama-34b-instruct",
|
| 39207 |
"arena_org": "Meta",
|
| 39208 |
+
"arena_elo": 1136.18,
|
| 39209 |
+
"arena_rank": 293,
|
| 39210 |
"arena_votes": 7363
|
| 39211 |
},
|
| 39212 |
{
|
| 39213 |
"name": "guanaco-33b",
|
| 39214 |
"arena_name": "guanaco-33b",
|
| 39215 |
"arena_org": "UW",
|
| 39216 |
+
"arena_elo": 1126.9,
|
| 39217 |
+
"arena_rank": 298,
|
| 39218 |
"arena_votes": 2921
|
| 39219 |
},
|
| 39220 |
{
|
| 39221 |
"name": "stripedhyena-nous-7b",
|
| 39222 |
"arena_name": "stripedhyena-nous-7b",
|
| 39223 |
"arena_org": "Together AI",
|
| 39224 |
+
"arena_elo": 1120.52,
|
| 39225 |
+
"arena_rank": 300,
|
| 39226 |
"arena_votes": 5184
|
| 39227 |
},
|
| 39228 |
{
|
| 39229 |
"name": "codellama-70b-instruct",
|
| 39230 |
"arena_name": "codellama-70b-instruct",
|
| 39231 |
"arena_org": "Meta",
|
| 39232 |
+
"arena_elo": 1118.64,
|
| 39233 |
+
"arena_rank": 301,
|
| 39234 |
"arena_votes": 1143
|
| 39235 |
},
|
| 39236 |
{
|
| 39237 |
"name": "vicuna-7b",
|
| 39238 |
"arena_name": "vicuna-7b",
|
| 39239 |
"arena_org": "LMSYS",
|
| 39240 |
+
"arena_elo": 1114.19,
|
| 39241 |
+
"arena_rank": 302,
|
| 39242 |
"arena_votes": 6923
|
| 39243 |
},
|
| 39244 |
{
|
| 39245 |
"name": "mistral-7b-instruct",
|
| 39246 |
"arena_name": "mistral-7b-instruct",
|
| 39247 |
"arena_org": "Mistral",
|
| 39248 |
+
"arena_elo": 1109.2,
|
| 39249 |
+
"arena_rank": 306,
|
| 39250 |
"arena_votes": 8977
|
| 39251 |
},
|
| 39252 |
{
|
| 39253 |
"name": "llama-2-7b-chat",
|
| 39254 |
"arena_name": "llama-2-7b-chat",
|
| 39255 |
"arena_org": "Meta",
|
| 39256 |
+
"arena_elo": 1107.78,
|
| 39257 |
+
"arena_rank": 307,
|
| 39258 |
"arena_votes": 14148
|
| 39259 |
},
|
| 39260 |
{
|
| 39261 |
"name": "olmo-7b-instruct",
|
| 39262 |
"arena_name": "olmo-7b-instruct",
|
| 39263 |
"arena_org": "Ai2",
|
| 39264 |
+
"arena_elo": 1074.23,
|
| 39265 |
+
"arena_rank": 310,
|
| 39266 |
"arena_votes": 6329
|
| 39267 |
},
|
| 39268 |
{
|
| 39269 |
"name": "koala-13b",
|
| 39270 |
"arena_name": "koala-13b",
|
| 39271 |
"arena_org": "UC Berkeley",
|
| 39272 |
+
"arena_elo": 1069.98,
|
| 39273 |
+
"arena_rank": 311,
|
| 39274 |
"arena_votes": 6964
|
| 39275 |
},
|
| 39276 |
{
|
| 39277 |
"name": "alpaca-13b",
|
| 39278 |
"arena_name": "alpaca-13b",
|
| 39279 |
"arena_org": "Stanford",
|
| 39280 |
+
"arena_elo": 1066.86,
|
| 39281 |
+
"arena_rank": 312,
|
| 39282 |
"arena_votes": 5745
|
| 39283 |
},
|
| 39284 |
{
|
| 39285 |
"name": "gpt4all-13b-snoozy",
|
| 39286 |
"arena_name": "gpt4all-13b-snoozy",
|
| 39287 |
"arena_org": "Nomic AI",
|
| 39288 |
+
"arena_elo": 1065.45,
|
| 39289 |
+
"arena_rank": 313,
|
| 39290 |
"arena_votes": 1743
|
| 39291 |
},
|
| 39292 |
{
|
| 39293 |
"name": "mpt-7b-chat",
|
| 39294 |
"arena_name": "mpt-7b-chat",
|
| 39295 |
"arena_org": "MosaicML",
|
| 39296 |
+
"arena_elo": 1061.33,
|
| 39297 |
+
"arena_rank": 314,
|
| 39298 |
"arena_votes": 3925
|
| 39299 |
},
|
| 39300 |
{
|
| 39301 |
"name": "chatglm3-6b",
|
| 39302 |
"arena_name": "chatglm3-6b",
|
| 39303 |
"arena_org": "Tsinghua",
|
| 39304 |
+
"arena_elo": 1055.59,
|
| 39305 |
+
"arena_rank": 315,
|
| 39306 |
"arena_votes": 4658
|
| 39307 |
},
|
| 39308 |
{
|
| 39309 |
"name": "RWKV-4-Raven-14B",
|
| 39310 |
"arena_name": "RWKV-4-Raven-14B",
|
| 39311 |
"arena_org": "RWKV",
|
| 39312 |
+
"arena_elo": 1040.82,
|
| 39313 |
+
"arena_rank": 316,
|
| 39314 |
"arena_votes": 4845
|
| 39315 |
},
|
| 39316 |
{
|
| 39317 |
"name": "chatglm2-6b",
|
| 39318 |
"arena_name": "chatglm2-6b",
|
| 39319 |
"arena_org": "Tsinghua",
|
| 39320 |
+
"arena_elo": 1023.73,
|
| 39321 |
+
"arena_rank": 317,
|
| 39322 |
"arena_votes": 2657
|
| 39323 |
},
|
| 39324 |
{
|
| 39325 |
"name": "oasst-pythia-12b",
|
| 39326 |
"arena_name": "oasst-pythia-12b",
|
| 39327 |
"arena_org": "OpenAssistant",
|
| 39328 |
+
"arena_elo": 1021.6,
|
| 39329 |
+
"arena_rank": 318,
|
| 39330 |
"arena_votes": 6311
|
| 39331 |
},
|
| 39332 |
{
|
| 39333 |
"name": "chatglm-6b",
|
| 39334 |
"arena_name": "chatglm-6b",
|
| 39335 |
"arena_org": "Tsinghua",
|
| 39336 |
+
"arena_elo": 995.12,
|
| 39337 |
+
"arena_rank": 319,
|
| 39338 |
"arena_votes": 4914
|
| 39339 |
},
|
| 39340 |
{
|
| 39341 |
"name": "fastchat-t5-3b",
|
| 39342 |
"arena_name": "fastchat-t5-3b",
|
| 39343 |
"arena_org": "LMSYS",
|
| 39344 |
+
"arena_elo": 990.805,
|
| 39345 |
+
"arena_rank": 320,
|
| 39346 |
"arena_votes": 4203
|
| 39347 |
},
|
| 39348 |
{
|
| 39349 |
"name": "stablelm-tuned-alpha-7b",
|
| 39350 |
"arena_name": "stablelm-tuned-alpha-7b",
|
| 39351 |
"arena_org": "Stability AI",
|
| 39352 |
+
"arena_elo": 952.127,
|
| 39353 |
+
"arena_rank": 323,
|
| 39354 |
"arena_votes": 3287
|
| 39355 |
},
|
| 39356 |
{
|