diff --git "a/data/benchmarks.json" "b/data/benchmarks.json" --- "a/data/benchmarks.json" +++ "b/data/benchmarks.json" @@ -19,8 +19,8 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1288.4, - "arena_rank": 210, + "arena_elo": 1288.33, + "arena_rank": 214, "arena_votes": 8662 }, { @@ -43,8 +43,8 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1238.57, - "arena_rank": 242, + "arena_elo": 1238.49, + "arena_rank": 246, "arena_votes": 8858 }, { @@ -158,9 +158,9 @@ "lb_language": 0.39707333333333333, "lb_if": 0.6187925, "lb_data_analysis": 0.5411900000000001, - "arena_elo": 1322.78, - "arena_rank": 175, - "arena_votes": 70248, + "arena_elo": 1322.62, + "arena_rank": 180, + "arena_votes": 70241, "aider_pass_rate": 0.617 }, { @@ -197,8 +197,8 @@ "lb_language": 0.56937, "lb_if": 0.7229999999999999, "lb_data_analysis": 0.5411, - "arena_elo": 1341.68, - "arena_rank": 153, + "arena_elo": 1341.53, + "arena_rank": 157, "arena_votes": 82419, "aider_pass_rate": 0.5710000000000001 }, @@ -239,9 +239,9 @@ "lb_language": 0.54477, "lb_if": 0.69296, "lb_data_analysis": 0.5618650000000001, - "arena_elo": 1371.72, - "arena_rank": 119, - "arena_votes": 88519, + "arena_elo": 1371.58, + "arena_rank": 123, + "arena_votes": 88515, "aider_pass_rate": 0.6920000000000001 }, { @@ -279,8 +279,8 @@ "lb_language": 0.30073333333333335, "lb_if": 0.6402924999999999, "lb_data_analysis": 0.3731, - "arena_elo": 1260.27, - "arena_rank": 234, + "arena_elo": 1260.1, + "arena_rank": 238, "arena_votes": 117701, "aider_pass_rate": 0.406 }, @@ -320,8 +320,8 @@ "lb_language": 0.53574, "lb_if": 0.6388750000000001, "lb_data_analysis": 0.5784, - "arena_elo": 1321.08, - "arena_rank": 179, + "arena_elo": 1320.92, + "arena_rank": 183, "arena_votes": 194909, "aider_pass_rate": 0.534 }, @@ -361,8 +361,8 @@ "lb_language": 0.38083333333333336, "lb_if": 0.6500425, "lb_data_analysis": 0.38839999999999997, - "arena_elo": 1280.21, - "arena_rank": 219, + "arena_elo": 1280.04, + "arena_rank": 223, "arena_votes": 109284, "aider_pass_rate": 0.436 }, @@ -394,8 +394,8 @@ "lb_language": 0.14644333333333334, "lb_if": 0.571625, "lb_data_analysis": 0.2354, - "arena_elo": 1226.11, - "arena_rank": 250, + "arena_elo": 1225.94, + "arena_rank": 254, "arena_votes": 54036 }, { @@ -431,7 +431,7 @@ "lb_if": 0.8050825, "lb_data_analysis": 0.69625, "arena_elo": 1397.55, - "arena_rank": 91, + "arena_rank": 95, "arena_votes": 18524 }, { @@ -464,8 +464,8 @@ "lb_language": 0.3518266666666667, "lb_if": 0.6915024999999999, "lb_data_analysis": 0.4417, - "arena_elo": 1306.76, - "arena_rank": 197, + "arena_elo": 1306.73, + "arena_rank": 201, "arena_votes": 24572, "aider_pass_rate": 0.5489999999999999 }, @@ -500,7 +500,7 @@ "lb_if": 0.75246, "lb_data_analysis": 0.6241, "arena_elo": 1358.25, - "arena_rank": 130, + "arena_rank": 134, "arena_votes": 21770 }, { @@ -659,8 +659,8 @@ "lb_language": 0.32621666666666665, "lb_if": 0.5810025000000001, "lb_data_analysis": 0.4481, - "arena_elo": 1287.74, - "arena_rank": 211, + "arena_elo": 1287.66, + "arena_rank": 215, "arena_votes": 75754 }, { @@ -703,8 +703,8 @@ "lb_language": 0.25531, "lb_if": 0.52621, "lb_data_analysis": 0.2959, - "arena_elo": 1265.21, - "arena_rank": 230, + "arena_elo": 1265.13, + "arena_rank": 234, "arena_votes": 54611 }, { @@ -775,8 +775,8 @@ "hf_math_lvl5": 0.3806646525679758, "hf_musr": 0.45806250000000004, "hf_avg": 43.409948245645786, - "arena_elo": 1292.97, - "arena_rank": 208, + "arena_elo": 1292.9, + "arena_rank": 212, "arena_votes": 55240, "aider_pass_rate": 0.436 }, @@ -824,8 +824,8 @@ "hf_math_lvl5": 0.1729607250755287, "hf_musr": 0.3845416666666666, "hf_avg": 28.01011138792457, - "arena_elo": 1211.15, - "arena_rank": 260, + "arena_elo": 1211.13, + "arena_rank": 264, "arena_votes": 49605, "aider_pass_rate": 0.263 }, @@ -884,8 +884,8 @@ "hf_math_lvl5": 0.17673716012084592, "hf_musr": 0.3528541666666667, "hf_avg": 24.204650807793456, - "arena_elo": 1165.97, - "arena_rank": 286, + "arena_elo": 1165.84, + "arena_rank": 290, "arena_votes": 7936 }, { @@ -940,9 +940,9 @@ "hf_math_lvl5": 0.48338368580060426, "hf_musr": 0.44612500000000005, "hf_avg": 44.84747145129876, - "arena_elo": 1318.24, - "arena_rank": 183, - "arena_votes": 54891, + "arena_elo": 1318.16, + "arena_rank": 187, + "arena_votes": 54886, "aider_pass_rate": 0.42100000000000004 }, { @@ -1065,8 +1065,8 @@ "lb_language": 0.29333666666666663, "lb_if": 0.5838349999999999, "lb_data_analysis": 0.4376, - "arena_elo": 1255.63, - "arena_rank": 237, + "arena_elo": 1255.56, + "arena_rank": 241, "arena_votes": 24126 }, { @@ -1241,8 +1241,8 @@ "lb_language": 0.31805, "lb_if": 0.6957099999999999, "lb_data_analysis": 0.33599999999999997, - "arena_elo": 1298.48, - "arena_rank": 206, + "arena_elo": 1298.49, + "arena_rank": 210, "arena_votes": 7140 }, { @@ -1277,8 +1277,8 @@ "lb_language": 0.2421633333333333, "lb_if": 0.604665, "lb_data_analysis": 0.3682, - "arena_elo": 1223.28, - "arena_rank": 252, + "arena_elo": 1223.07, + "arena_rank": 256, "arena_votes": 66207, "aider_pass_rate": 0.414 }, @@ -1315,8 +1315,8 @@ "lb_language": 0.4956833333333333, "lb_if": 0.717875, "lb_data_analysis": 0.39039999999999997, - "arena_elo": 1274.34, - "arena_rank": 224, + "arena_elo": 1274.14, + "arena_rank": 228, "arena_votes": 88723, "aider_pass_rate": 0.466 }, @@ -1350,8 +1350,8 @@ "lb_language": 0.45262, "lb_if": 0.713875, "lb_data_analysis": 0.4998, - "arena_elo": 1323.67, - "arena_rank": 173, + "arena_elo": 1323.58, + "arena_rank": 177, "arena_votes": 98114 }, { @@ -1386,8 +1386,8 @@ "lb_language": 0.5393533333333334, "lb_if": 0.7217100000000001, "lb_data_analysis": 0.4661, - "arena_elo": 1345.31, - "arena_rank": 150, + "arena_elo": 1345.24, + "arena_rank": 154, "arena_votes": 112881, "aider_pass_rate": 0.602 }, @@ -1420,8 +1420,8 @@ "lb_language": 0.4563466666666667, "lb_if": 0.6858299999999999, "lb_data_analysis": 0.6236999999999999, - "arena_elo": 1334.53, - "arena_rank": 161, + "arena_elo": 1334.45, + "arena_rank": 165, "arena_votes": 45499, "aider_pass_rate": 0.5710000000000001 }, @@ -1459,8 +1459,8 @@ "lb_language": 0.29879333333333336, "lb_if": 0.5679974999999999, "lb_data_analysis": 0.55099, - "arena_elo": 1317.33, - "arena_rank": 188, + "arena_elo": 1317.32, + "arena_rank": 192, "arena_votes": 68718, "aider_pass_rate": 0.406 }, @@ -1504,8 +1504,8 @@ "hf_math_lvl5": 0.37462235649546827, "hf_musr": 0.41384375, "hf_avg": 27.639223265636087, - "arena_elo": 1401.66, - "arena_rank": 84, + "arena_elo": 1401.61, + "arena_rank": 90, "arena_votes": 27807, "aider_pass_rate": 0.654 }, @@ -1528,8 +1528,8 @@ "arena_votes": "arena", "aider_pass_rate": "aider" }, - "arena_elo": 1336.64, - "arena_rank": 157, + "arena_elo": 1336.66, + "arena_rank": 161, "arena_votes": 51981, "aider_pass_rate": 0.5 }, @@ -1552,8 +1552,8 @@ "arena_votes": "arena", "aider_pass_rate": "aider" }, - "arena_elo": 1387.78, - "arena_rank": 102, + "arena_elo": 1387.74, + "arena_rank": 106, "arena_votes": 31122, "aider_pass_rate": 0.579 }, @@ -1583,9 +1583,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1347.52, - "arena_rank": 142, - "arena_votes": 57588 + "arena_elo": 1347.47, + "arena_rank": 146, + "arena_votes": 57581 }, { "slug": "openai/o3", @@ -1712,8 +1712,8 @@ "lb_language": 0.38114999999999993, "lb_if": 0.7550025, "lb_data_analysis": 0.4718, - "arena_elo": 1302.46, - "arena_rank": 205, + "arena_elo": 1302.44, + "arena_rank": 209, "arena_votes": 39406 }, { @@ -1788,8 +1788,8 @@ "lb_language": 0.23245333333333332, "lb_if": 0.5869175, "lb_data_analysis": 0.48810000000000003, - "arena_elo": 1270.12, - "arena_rank": 228, + "arena_elo": 1270.06, + "arena_rank": 232, "arena_votes": 5432, "aider_pass_rate": 0.594 }, @@ -1873,8 +1873,8 @@ "lb_language": 0.29213666666666666, "lb_if": 0.68271, "lb_data_analysis": 0.1636, - "arena_elo": 1261.08, - "arena_rank": 233, + "arena_elo": 1260.93, + "arena_rank": 237, "arena_votes": 37325, "aider_pass_rate": 0.444 }, @@ -1971,8 +1971,8 @@ "lb_language": 0.21091000000000001, "lb_if": 0.35587499999999994, "lb_data_analysis": 0.25925, - "arena_elo": 1156.1, - "arena_rank": 288, + "arena_elo": 1156.05, + "arena_rank": 292, "arena_votes": 3231 }, { @@ -2151,8 +2151,8 @@ "hf_math_lvl5": 0.277190332326284, "hf_musr": 0.4281979166666667, "hf_avg": 33.35799367075618, - "arena_elo": 1212.69, - "arena_rank": 258, + "arena_elo": 1212.58, + "arena_rank": 262, "arena_votes": 24146 }, { @@ -2379,8 +2379,8 @@ "hf_math_lvl5": 0.06268882175226587, "hf_musr": 0.39784375, "hf_avg": 24.226662652803373, - "arena_elo": 1183.04, - "arena_rank": 272, + "arena_elo": 1182.9, + "arena_rank": 276, "arena_votes": 15483 }, { @@ -2901,8 +2901,8 @@ "hf_math_lvl5": 0.03851963746223565, "hf_musr": 0.47709375000000004, "hf_avg": 14.343669671742774, - "arena_elo": 1222.5, - "arena_rank": 253, + "arena_elo": 1222.37, + "arena_rank": 257, "arena_votes": 104642 }, { @@ -11089,8 +11089,8 @@ "lb_language": 0.07196666666666666, "lb_if": 0.5279175, "lb_data_analysis": 0.081, - "arena_elo": 1126.04, - "arena_rank": 309, + "arena_elo": 1125.86, + "arena_rank": 313, "arena_votes": 1785 }, { @@ -11125,8 +11125,8 @@ "lb_language": 0.042846666666666665, "lb_if": 0.48317, "lb_data_analysis": 0.0762, - "arena_elo": 1130.16, - "arena_rank": 305, + "arena_elo": 1129.99, + "arena_rank": 309, "arena_votes": 11118 }, { @@ -11177,8 +11177,8 @@ "hf_math_lvl5": 0.20468277945619334, "hf_musr": 0.4465208333333333, "hf_avg": 34.125963384670946, - "arena_elo": 1211.92, - "arena_rank": 259, + "arena_elo": 1211.75, + "arena_rank": 263, "arena_votes": 4652 }, { @@ -11361,8 +11361,8 @@ "hf_math_lvl5": 0.0581570996978852, "hf_musr": 0.342125, "hf_avg": 15.02227766709556, - "arena_elo": 1113.53, - "arena_rank": 314, + "arena_elo": 1113.36, + "arena_rank": 318, "arena_votes": 2199 }, { @@ -18389,8 +18389,8 @@ "hf_math_lvl5": 0.12235649546827794, "hf_musr": 0.4595416666666667, "hf_avg": 27.353190438571634, - "arena_elo": 1163.88, - "arena_rank": 287, + "arena_elo": 1163.73, + "arena_rank": 291, "arena_votes": 3777 }, { @@ -20307,9 +20307,9 @@ "lb_language": 0.5148133333333332, "lb_if": 0.81829, "lb_data_analysis": 0.69529, - "arena_elo": 1335.53, - "arena_rank": 159, - "arena_votes": 25531 + "arena_elo": 1335.63, + "arena_rank": 163, + "arena_votes": 25526 }, { "hf_id": "Qwen/Qwen1.5-0.5B", @@ -20466,8 +20466,8 @@ "lb_language": 0.13224333333333332, "lb_if": 0.5526275, "lb_data_analysis": 0.20179999999999998, - "arena_elo": 1233.32, - "arena_rank": 245, + "arena_elo": 1233.18, + "arena_rank": 249, "arena_votes": 26195, "aider_pass_rate": 0.308 }, @@ -20519,8 +20519,8 @@ "hf_math_lvl5": 0.15256797583081572, "hf_musr": 0.43997916666666664, "hf_avg": 23.566106475051374, - "arena_elo": 1190.13, - "arena_rank": 269, + "arena_elo": 1189.97, + "arena_rank": 273, "arena_votes": 17839 }, { @@ -20571,8 +20571,8 @@ "hf_math_lvl5": 0.19561933534743203, "hf_musr": 0.4159791666666666, "hf_avg": 29.25746822860332, - "arena_elo": 1203.04, - "arena_rank": 262, + "arena_elo": 1202.86, + "arena_rank": 266, "arena_votes": 21741 }, { @@ -20629,8 +20629,8 @@ "lb_language": 0.05798333333333333, "lb_if": 0.27749999999999997, "lb_data_analysis": 0.0469, - "arena_elo": 1089.35, - "arena_rank": 319, + "arena_elo": 1089.14, + "arena_rank": 323, "arena_votes": 7597 }, { @@ -20687,8 +20687,8 @@ "lb_language": 0.061816666666666666, "lb_if": 0.4411675, "lb_data_analysis": 0.0435, - "arena_elo": 1142.97, - "arena_rank": 297, + "arena_elo": 1142.81, + "arena_rank": 301, "arena_votes": 4737 }, { @@ -30289,8 +30289,8 @@ "hf_math_lvl5": 0.4501510574018127, "hf_musr": 0.4948333333333334, "hf_avg": 42.33178738532094, - "arena_elo": 1286.02, - "arena_rank": 215, + "arena_elo": 1285.95, + "arena_rank": 219, "arena_votes": 2846 }, { @@ -30385,8 +30385,8 @@ "hf_math_lvl5": 0.19637462235649547, "hf_musr": 0.41746875, "hf_avg": 26.034998081672143, - "arena_elo": 1220.54, - "arena_rank": 257, + "arena_elo": 1220.48, + "arena_rank": 261, "arena_votes": 2896 }, { @@ -33947,8 +33947,8 @@ "hf_math_lvl5": 0.08383685800604229, "hf_musr": 0.41201041666666666, "hf_avg": 20.83936104726783, - "arena_elo": 1166.68, - "arena_rank": 285, + "arena_elo": 1166.52, + "arena_rank": 289, "arena_votes": 10224 }, { @@ -35729,8 +35729,8 @@ "hf_math_lvl5": 0.013595166163141994, "hf_musr": 0.37390625000000005, "hf_avg": 6.3704357034963754, - "arena_elo": 979.307, - "arena_rank": 331, + "arena_elo": 979.06, + "arena_rank": 335, "arena_votes": 3412 }, { @@ -35995,8 +35995,8 @@ "hf_math_lvl5": 0.09290030211480363, "hf_musr": 0.5058645833333334, "hf_avg": 27.310631874736753, - "arena_elo": 1183.57, - "arena_rank": 271, + "arena_elo": 1183.38, + "arena_rank": 275, "arena_votes": 4932 }, { @@ -38707,8 +38707,8 @@ "hf_math_lvl5": 0.01812688821752266, "hf_musr": 0.33939583333333334, "hf_avg": 8.053373854341979, - "arena_elo": 1113.73, - "arena_rank": 313, + "arena_elo": 1113.59, + "arena_rank": 317, "arena_votes": 10854 }, { @@ -38743,8 +38743,8 @@ "lb_language": 0.10647333333333332, "lb_if": 0.443375, "lb_data_analysis": 0.0726, - "arena_elo": 1179.85, - "arena_rank": 276, + "arena_elo": 1179.72, + "arena_rank": 280, "arena_votes": 23893 }, { @@ -38817,8 +38817,8 @@ "hf_math_lvl5": 0.0007552870090634441, "hf_musr": 0.39288541666666665, "hf_avg": 17.046939294966545, - "arena_elo": 1198.63, - "arena_rank": 264, + "arena_elo": 1198.58, + "arena_rank": 268, "arena_votes": 46616 }, { @@ -38913,8 +38913,8 @@ "hf_math_lvl5": 0.02039274924471299, "hf_musr": 0.334125, "hf_avg": 7.485804130315127, - "arena_elo": 1091.15, - "arena_rank": 318, + "arena_elo": 1091, + "arena_rank": 322, "arena_votes": 4780 }, { @@ -38965,8 +38965,8 @@ "hf_math_lvl5": 0.02945619335347432, "hf_musr": 0.42742708333333335, "hf_avg": 13.067087110466217, - "arena_elo": 1135.55, - "arena_rank": 304, + "arena_elo": 1135.41, + "arena_rank": 308, "arena_votes": 8925 }, { @@ -40809,8 +40809,8 @@ "hf_math_lvl5": 0.02039274924471299, "hf_musr": 0.34621875, "hf_avg": 9.39218439885523, - "arena_elo": 971.422, - "arena_rank": 332, + "arena_elo": 971.17, + "arena_rank": 336, "arena_votes": 2391 }, { @@ -41389,8 +41389,8 @@ "hf_math_lvl5": 0.09214501510574018, "hf_musr": 0.35148958333333336, "hf_avg": 18.396095114284222, - "arena_elo": 1155.26, - "arena_rank": 289, + "arena_elo": 1155.08, + "arena_rank": 293, "arena_votes": 6837 }, { @@ -41485,8 +41485,8 @@ "hf_math_lvl5": 0.1419939577039275, "hf_musr": 0.3900625, "hf_avg": 24.027678753483297, - "arena_elo": 1181.29, - "arena_rank": 275, + "arena_elo": 1181.1, + "arena_rank": 279, "arena_votes": 6638 }, { @@ -41581,8 +41581,8 @@ "hf_math_lvl5": 0.15256797583081572, "hf_musr": 0.3605416666666667, "hf_avg": 21.712212822028288, - "arena_elo": 1178.21, - "arena_rank": 278, + "arena_elo": 1178.07, + "arena_rank": 282, "arena_votes": 3188 }, { @@ -41677,8 +41677,8 @@ "hf_math_lvl5": 0.21978851963746224, "hf_musr": 0.47070833333333334, "hf_avg": 30.6030430081627, - "arena_elo": 1207.67, - "arena_rank": 261, + "arena_elo": 1207.52, + "arena_rank": 265, "arena_votes": 3090 }, { @@ -42539,8 +42539,8 @@ "hf_math_lvl5": 0.4078549848942598, "hf_musr": 0.4558229166666667, "hf_avg": 38.87959582082076, - "arena_elo": 1190.67, - "arena_rank": 268, + "arena_elo": 1190.58, + "arena_rank": 272, "arena_votes": 9901 }, { @@ -46597,8 +46597,8 @@ "hf_math_lvl5": 0.0702416918429003, "hf_musr": 0.3328541666666667, "hf_avg": 14.443126333711135, - "arena_elo": 1110.52, - "arena_rank": 315, + "arena_elo": 1110.39, + "arena_rank": 319, "arena_votes": 8045 }, { @@ -46897,8 +46897,8 @@ "lb_language": 0.13909000000000002, "lb_if": 0.5330400000000001, "lb_data_analysis": 0.2044, - "arena_elo": 1197.15, - "arena_rank": 265, + "arena_elo": 1196.99, + "arena_rank": 269, "arena_votes": 25055 }, { @@ -46933,8 +46933,8 @@ "lb_language": 0.09153666666666667, "lb_if": 0.39083500000000004, "lb_data_analysis": 0.26030000000000003, - "arena_elo": 1128.4, - "arena_rank": 306, + "arena_elo": 1128.21, + "arena_rank": 310, "arena_votes": 20685 }, { @@ -46969,8 +46969,8 @@ "lb_language": 0.08559, "lb_if": 0.363625, "lb_data_analysis": 0.2232, - "arena_elo": 1127.57, - "arena_rank": 307, + "arena_elo": 1127.42, + "arena_rank": 311, "arena_votes": 20118 }, { @@ -47061,8 +47061,8 @@ "lb_language": 0.12944, "lb_if": 0.472, "lb_data_analysis": 0.2343, - "arena_elo": 1170.29, - "arena_rank": 283, + "arena_elo": 1170.13, + "arena_rank": 287, "arena_votes": 17766 }, { @@ -47513,8 +47513,8 @@ "lb_language": 0.09055, "lb_if": 0.5165025, "lb_data_analysis": 0.059300000000000005, - "arena_elo": 1148.74, - "arena_rank": 294, + "arena_elo": 1148.57, + "arena_rank": 298, "arena_votes": 19402 }, { @@ -47731,8 +47731,8 @@ "lb_language": 0.26477666666666666, "lb_if": 0.63167, "lb_data_analysis": 0.255, - "arena_elo": 1228.63, - "arena_rank": 249, + "arena_elo": 1228.45, + "arena_rank": 253, "arena_votes": 51416 }, { @@ -47789,8 +47789,8 @@ "lb_language": 0.13761333333333334, "lb_if": 0.4480825, "lb_data_analysis": 0.1619, - "arena_elo": 1196.31, - "arena_rank": 266, + "arena_elo": 1196.14, + "arena_rank": 270, "arena_votes": 73503 }, { @@ -52059,8 +52059,8 @@ "hf_math_lvl5": 0.07628398791540786, "hf_musr": 0.42543749999999997, "hf_avg": 22.70925524673515, - "arena_elo": 1181.43, - "arena_rank": 273, + "arena_elo": 1181.25, + "arena_rank": 277, "arena_votes": 12637 }, { @@ -52133,8 +52133,8 @@ "hf_math_lvl5": 0.07250755287009064, "hf_musr": 0.4228645833333333, "hf_avg": 21.635827111564595, - "arena_elo": 1181.38, - "arena_rank": 274, + "arena_elo": 1181.19, + "arena_rank": 278, "arena_votes": 7968 }, { @@ -54251,8 +54251,8 @@ "hf_math_lvl5": 0.07099697885196375, "hf_musr": 0.41232291666666665, "hf_avg": 22.3449346084354, - "arena_elo": 1278.99, - "arena_rank": 220, + "arena_elo": 1278.92, + "arena_rank": 224, "arena_votes": 10072 }, { @@ -60655,8 +60655,8 @@ "lb_language": 0.11368333333333334, "lb_if": 0.52779, "lb_data_analysis": 0.1738, - "arena_elo": 1174.35, - "arena_rank": 280, + "arena_elo": 1174.16, + "arena_rank": 284, "arena_votes": 5006 }, { @@ -62583,8 +62583,8 @@ "hf_math_lvl5": 0.05664652567975831, "hf_musr": 0.3899375, "hf_avg": 20.57236409322395, - "arena_elo": 1151.5, - "arena_rank": 291, + "arena_elo": 1151.32, + "arena_rank": 295, "arena_votes": 4155 }, { @@ -65183,8 +65183,8 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1249.43, - "arena_rank": 239, + "arena_elo": 1249.27, + "arena_rank": 243, "arena_votes": 10140 }, { @@ -65211,8 +65211,8 @@ "arena_votes": "arena", "aider_pass_rate": "aider" }, - "arena_elo": 1261.12, - "arena_rank": 232, + "arena_elo": 1260.96, + "arena_rank": 236, "arena_votes": 77554, "aider_pass_rate": 0.218 }, @@ -65241,8 +65241,8 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1275.8, - "arena_rank": 222, + "arena_elo": 1275.67, + "arena_rank": 226, "arena_votes": 9866 }, { @@ -65268,8 +65268,8 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1263.7, - "arena_rank": 231, + "arena_elo": 1263.56, + "arena_rank": 235, "arena_votes": 15147 }, { @@ -65473,8 +65473,8 @@ "arena_votes": "arena", "aider_pass_rate": "aider" }, - "arena_elo": 1312.62, - "arena_rank": 192, + "arena_elo": 1312.51, + "arena_rank": 196, "arena_votes": 93439, "aider_pass_rate": 0.556 }, @@ -65577,8 +65577,8 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1241.58, - "arena_rank": 240, + "arena_elo": 1241.41, + "arena_rank": 244, "arena_votes": 62436 }, { @@ -65604,8 +65604,8 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1313.56, - "arena_rank": 191, + "arena_elo": 1313.48, + "arena_rank": 195, "arena_votes": 45459 }, { @@ -65669,8 +65669,8 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1232.4, - "arena_rank": 248, + "arena_elo": 1232.23, + "arena_rank": 252, "arena_votes": 39302 }, { @@ -65715,8 +65715,8 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1170.79, - "arena_rank": 282, + "arena_elo": 1170.66, + "arena_rank": 286, "arena_votes": 16056 }, { @@ -65824,8 +65824,8 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1309.25, - "arena_rank": 195, + "arena_elo": 1309.19, + "arena_rank": 199, "arena_votes": 34902 }, { @@ -65852,8 +65852,8 @@ "arena_votes": "arena", "aider_pass_rate": "aider" }, - "arena_elo": 1350.81, - "arena_rank": 138, + "arena_elo": 1350.73, + "arena_rank": 143, "arena_votes": 55606, "aider_pass_rate": 0.496 }, @@ -66149,8 +66149,8 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1352.87, - "arena_rank": 137, + "arena_elo": 1352.83, + "arena_rank": 141, "arena_votes": 24955 }, { @@ -66316,8 +66316,8 @@ "arena_votes": "arena", "aider_pass_rate": "aider" }, - "arena_elo": 1304.84, - "arena_rank": 201, + "arena_elo": 1304.78, + "arena_rank": 205, "arena_votes": 28073, "aider_pass_rate": 0.466 }, @@ -66519,9 +66519,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1374.2, - "arena_rank": 117, - "arena_votes": 32712 + "arena_elo": 1374.26, + "arena_rank": 120, + "arena_votes": 32707 }, { "name": "gpt-4.5-preview", @@ -66607,9 +66607,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1365.08, - "arena_rank": 124, - "arena_votes": 47859 + "arena_elo": 1365.11, + "arena_rank": 128, + "arena_votes": 47855 }, { "name": "gemini-2.5-pro-exp-03-25", @@ -66655,9 +66655,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1394.55, - "arena_rank": 95, - "arena_votes": 45807 + "arena_elo": 1394.63, + "arena_rank": 99, + "arena_votes": 45802 }, { "name": "chatgpt-4o-latest-2025-03-27", @@ -66745,9 +66745,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1370.67, - "arena_rank": 120, - "arena_votes": 43405 + "arena_elo": 1370.52, + "arena_rank": 124, + "arena_votes": 43398 }, { "name": "command-a-03-2025", @@ -66774,9 +66774,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1353.46, - "arena_rank": 135, - "arena_votes": 56679 + "arena_elo": 1353.48, + "arena_rank": 139, + "arena_votes": 56677 }, { "name": "gemini-1.5-flash-8b-001", @@ -66801,8 +66801,8 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1258.3, - "arena_rank": 236, + "arena_elo": 1258.23, + "arena_rank": 240, "arena_votes": 35558 }, { @@ -66828,9 +66828,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1359.97, - "arena_rank": 129, - "arena_votes": 43919 + "arena_elo": 1359.9, + "arena_rank": 133, + "arena_votes": 43918 }, { "name": "gemini-2.0-flash-lite-001", @@ -66877,7 +66877,7 @@ "arena_votes": "arena" }, "arena_elo": 1341.43, - "arena_rank": 154, + "arena_rank": 158, "arena_votes": 3829 }, { @@ -66903,8 +66903,8 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1302.83, - "arena_rank": 203, + "arena_elo": 1302.81, + "arena_rank": 208, "arena_votes": 4171 }, { @@ -66932,8 +66932,8 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1444.25, - "arena_rank": 33, + "arena_elo": 1444.26, + "arena_rank": 35, "arena_votes": 14547 }, { @@ -66959,9 +66959,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1326.87, - "arena_rank": 170, - "arena_votes": 40218 + "arena_elo": 1326.79, + "arena_rank": 174, + "arena_votes": 40211 }, { "name": "grok-3-beta", @@ -67030,9 +67030,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1413.17, - "arena_rank": 69, - "arena_votes": 51429 + "arena_elo": 1413.13, + "arena_rank": 73, + "arena_votes": 51424 }, { "name": "gpt-4.1-mini-2025-04-14", @@ -67059,9 +67059,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1382.26, - "arena_rank": 111, - "arena_votes": 39559 + "arena_elo": 1382.22, + "arena_rank": 115, + "arena_votes": 39550 }, { "name": "gpt-4.1-nano-2025-04-14", @@ -67088,8 +67088,8 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1321.55, - "arena_rank": 178, + "arena_elo": 1321.53, + "arena_rank": 182, "arena_votes": 6103 }, { @@ -67264,8 +67264,8 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1421.68, - "arena_rank": 56, + "arena_elo": 1421.72, + "arena_rank": 60, "arena_votes": 18593 }, { @@ -67398,9 +67398,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1386.5, - "arena_rank": 107, - "arena_votes": 33451 + "arena_elo": 1386.46, + "arena_rank": 111, + "arena_votes": 33444 }, { "name": "phi-4-reasoning-plus", @@ -67469,9 +67469,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1374.31, - "arena_rank": 115, - "arena_votes": 26433 + "arena_elo": 1374.37, + "arena_rank": 119, + "arena_votes": 26431 }, { "name": "qwen3-30b-a3b", @@ -67498,9 +67498,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1327.52, - "arena_rank": 168, - "arena_votes": 26650 + "arena_elo": 1327.6, + "arena_rank": 172, + "arena_votes": 26653 }, { "name": "qwen3-32b", @@ -67527,8 +67527,8 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1347.01, - "arena_rank": 145, + "arena_elo": 1347.07, + "arena_rank": 148, "arena_votes": 3926 }, { @@ -67577,9 +67577,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1452.68, + "arena_elo": 1452.67, "arena_rank": 25, - "arena_votes": 54590 + "arena_votes": 56199 }, { "name": "deepseek-v3.1-terminus", @@ -67606,8 +67606,8 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1415.65, - "arena_rank": 67, + "arena_elo": 1415.82, + "arena_rank": 70, "arena_votes": 3724 }, { @@ -67698,9 +67698,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1405.11, - "arena_rank": 80, - "arena_votes": 33171 + "arena_elo": 1405.12, + "arena_rank": 82, + "arena_votes": 33165 }, { "name": "gemini-2.5-pro-06-05", @@ -67748,9 +67748,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1410.99, - "arena_rank": 72, - "arena_votes": 24516 + "arena_elo": 1411.13, + "arena_rank": 76, + "arena_votes": 24513 }, { "name": "glm-4.5-air", @@ -67777,9 +67777,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1372.63, - "arena_rank": 118, - "arena_votes": 31391 + "arena_elo": 1372.78, + "arena_rank": 122, + "arena_votes": 31390 }, { "name": "glm-4.6", @@ -67806,9 +67806,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1425.86, - "arena_rank": 48, - "arena_votes": 35981 + "arena_elo": 1425.94, + "arena_rank": 52, + "arena_votes": 35979 }, { "name": "gpt-5", @@ -67877,9 +67877,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1433.72, - "arena_rank": 41, - "arena_votes": 32259 + "arena_elo": 1433.56, + "arena_rank": 45, + "arena_votes": 32246 }, { "name": "gpt-5-low", @@ -67948,8 +67948,8 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1389.67, - "arena_rank": 100, + "arena_elo": 1389.6, + "arena_rank": 104, "arena_votes": 27266 }, { @@ -68061,9 +68061,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1336.52, - "arena_rank": 158, - "arena_votes": 8323 + "arena_elo": 1336.54, + "arena_rank": 162, + "arena_votes": 8319 }, { "name": "gpt-5-nano-low", @@ -68132,9 +68132,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1426.18, - "arena_rank": 47, - "arena_votes": 31854 + "arena_elo": 1426.16, + "arena_rank": 51, + "arena_votes": 31860 }, { "name": "gpt-oss-120b", @@ -68161,9 +68161,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1353.78, - "arena_rank": 134, - "arena_votes": 30921 + "arena_elo": 1353.86, + "arena_rank": 138, + "arena_votes": 30920 }, { "name": "grok-4-0709", @@ -68190,9 +68190,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1410.29, - "arena_rank": 75, - "arena_votes": 41777 + "arena_elo": 1410.27, + "arena_rank": 78, + "arena_votes": 41766 }, { "name": "grok-code-fast-1-0825", @@ -68261,9 +68261,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1422.16, - "arena_rank": 54, - "arena_votes": 78407 + "arena_elo": 1422.26, + "arena_rank": 59, + "arena_votes": 79772 }, { "name": "qwen3-235b-a22b-thinking-2507", @@ -68290,9 +68290,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1399.63, - "arena_rank": 89, - "arena_votes": 9060 + "arena_elo": 1399.73, + "arena_rank": 93, + "arena_votes": 9059 }, { "name": "qwen3-coder-480b-a35b-instruct", @@ -68319,9 +68319,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1387.3, - "arena_rank": 103, - "arena_votes": 25977 + "arena_elo": 1387.24, + "arena_rank": 109, + "arena_votes": 25975 }, { "name": "qwen3-max-2025-09-23", @@ -68348,9 +68348,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1424.48, - "arena_rank": 50, - "arena_votes": 9242 + "arena_elo": 1424.61, + "arena_rank": 54, + "arena_votes": 9239 }, { "name": "qwen3-next-80b-a3b-instruct", @@ -68377,9 +68377,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1401.6, - "arena_rank": 85, - "arena_votes": 23100 + "arena_elo": 1401.72, + "arena_rank": 89, + "arena_votes": 23103 }, { "name": "qwen3-next-80b-a3b", @@ -68406,9 +68406,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1369.01, - "arena_rank": 121, - "arena_votes": 13843 + "arena_elo": 1368.94, + "arena_rank": 125, + "arena_votes": 13836 }, { "name": "claude-haiku-4-5-20251001", @@ -68435,9 +68435,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1407.14, - "arena_rank": 76, - "arena_votes": 55274 + "arena_elo": 1407.38, + "arena_rank": 80, + "arena_votes": 56977 }, { "name": "deepseek-v3.2-exp", @@ -68464,9 +68464,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1422.85, - "arena_rank": 53, - "arena_votes": 12031 + "arena_elo": 1422.91, + "arena_rank": 57, + "arena_votes": 12028 }, { "name": "minimax-m2", @@ -68493,9 +68493,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1346.61, - "arena_rank": 147, - "arena_votes": 6919 + "arena_elo": 1346.6, + "arena_rank": 151, + "arena_votes": 6920 }, { "name": "kimi-k2", @@ -68732,9 +68732,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1431.05, - "arena_rank": 44, - "arena_votes": 38588 + "arena_elo": 1431.31, + "arena_rank": 47, + "arena_votes": 40189 }, { "name": "claude-opus-4-5-20251101", @@ -68761,9 +68761,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1468.43, - "arena_rank": 12, - "arena_votes": 43078 + "arena_elo": 1468.26, + "arena_rank": 13, + "arena_votes": 44715 }, { "name": "deepseek-v3.2", @@ -68790,9 +68790,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1423.93, - "arena_rank": 52, - "arena_votes": 37482 + "arena_elo": 1424.13, + "arena_rank": 55, + "arena_votes": 38958 }, { "name": "deepseek-v3.2-speciale", @@ -68903,8 +68903,8 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1377.98, - "arena_rank": 113, + "arena_elo": 1378.02, + "arena_rank": 117, "arena_votes": 2823 }, { @@ -69121,8 +69121,8 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1442.9, - "arena_rank": 35, + "arena_elo": 1443.02, + "arena_rank": 38, "arena_votes": 12201 }, { @@ -69171,9 +69171,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1499.5, + "arena_elo": 1498.99, "arena_rank": 2, - "arena_votes": 13553 + "arena_votes": 14934 }, { "name": "claude-sonnet-4-6", @@ -69200,9 +69200,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1462.64, - "arena_rank": 17, - "arena_votes": 11064 + "arena_elo": 1462.41, + "arena_rank": 18, + "arena_votes": 11082 }, { "name": "gemini-3.1-pro-preview-high", @@ -69246,6 +69246,27 @@ "lb_data_analysis": "livebench" } }, + { + "name": "gemma-4-31b-it", + "lb_name": "gemma-4-31b-it", + "lb_global": 0.6237817391304348, + "lb_reasoning": 0.5941825000000001, + "lb_coding": 0.48133599999999993, + "lb_math": 0.739365, + "lb_language": 0.7133766666666667, + "lb_if": 0.6757924999999999, + "lb_data_analysis": 0.5876033333333334, + "sources": { + "lb_name": "livebench", + "lb_global": "livebench", + "lb_reasoning": "livebench", + "lb_coding": "livebench", + "lb_math": "livebench", + "lb_language": "livebench", + "lb_if": "livebench", + "lb_data_analysis": "livebench" + } + }, { "name": "glm-5", "lb_name": "glm-5", @@ -69271,9 +69292,30 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1455.62, - "arena_rank": 20, - "arena_votes": 11101 + "arena_elo": 1455.9, + "arena_rank": 21, + "arena_votes": 12177 + }, + { + "name": "glm-5v-turbo", + "lb_name": "glm-5v-turbo", + "lb_global": 0.4877795652173914, + "lb_reasoning": 0.5610575, + "lb_coding": 0.31559000000000004, + "lb_math": 0.7041124999999999, + "lb_language": 0.6227533333333334, + "lb_if": 0.272, + "lb_data_analysis": 0.5413466666666666, + "sources": { + "lb_name": "livebench", + "lb_global": "livebench", + "lb_reasoning": "livebench", + "lb_coding": "livebench", + "lb_math": "livebench", + "lb_language": "livebench", + "lb_if": "livebench", + "lb_data_analysis": "livebench" + } }, { "name": "gpt-5.2-codex", @@ -69384,9 +69426,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1484.03, + "arena_elo": 1484.42, "arena_rank": 6, - "arena_votes": 5570 + "arena_votes": 7160 }, { "name": "gpt-5.4-xhigh", @@ -69455,9 +69497,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1453.81, + "arena_elo": 1455.46, "arena_rank": 22, - "arena_votes": 2860 + "arena_votes": 4514 }, { "name": "gpt-5.4-mini-low", @@ -69568,9 +69610,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1400.58, - "arena_rank": 88, - "arena_votes": 2215 + "arena_elo": 1404.12, + "arena_rank": 85, + "arena_votes": 3905 }, { "name": "gpt-5.4-nano-low", @@ -69681,9 +69723,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1483.48, + "arena_elo": 1481.2, "arena_rank": 7, - "arena_votes": 5702 + "arena_votes": 7344 }, { "name": "kimi-k2.5", @@ -69710,9 +69752,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1453.54, - "arena_rank": 23, - "arena_votes": 17271 + "arena_elo": 1452.98, + "arena_rank": 24, + "arena_votes": 17818 }, { "name": "minimax-m2.5", @@ -69739,9 +69781,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1405.61, - "arena_rank": 78, - "arena_votes": 13000 + "arena_elo": 1406.48, + "arena_rank": 81, + "arena_votes": 14615 }, { "name": "mimo-v2-pro", @@ -69768,9 +69810,9 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1445.77, - "arena_rank": 32, - "arena_votes": 4794 + "arena_elo": 1444.23, + "arena_rank": 36, + "arena_votes": 6342 }, { "name": "minimax-m2.7", @@ -69797,17 +69839,17 @@ "arena_rank": "arena", "arena_votes": "arena" }, - "arena_elo": 1406.06, - "arena_rank": 77, - "arena_votes": 3259 + "arena_elo": 1403.18, + "arena_rank": 86, + "arena_votes": 4666 }, { "name": "claude-opus-4-6-thinking", "arena_name": "claude-opus-4-6-thinking", "arena_org": "Anthropic", - "arena_elo": 1504.08, + "arena_elo": 1503.81, "arena_rank": 1, - "arena_votes": 12730, + "arena_votes": 13979, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -69820,9 +69862,9 @@ "name": "gemini-3.1-pro-preview", "arena_name": "gemini-3.1-pro-preview", "arena_org": "Google", - "arena_elo": 1493.17, + "arena_elo": 1494.17, "arena_rank": 3, - "arena_votes": 15809, + "arena_votes": 17559, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -69835,9 +69877,9 @@ "name": "grok-4.20-beta1", "arena_name": "grok-4.20-beta1", "arena_org": "xAI", - "arena_elo": 1491.37, + "arena_elo": 1491.07, "arena_rank": 4, - "arena_votes": 7378, + "arena_votes": 7380, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -69850,9 +69892,9 @@ "name": "gemini-3-pro", "arena_name": "gemini-3-pro", "arena_org": "Google", - "arena_elo": 1486.39, + "arena_elo": 1486.36, "arena_rank": 5, - "arena_votes": 41631, + "arena_votes": 41632, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -69865,9 +69907,9 @@ "name": "gpt-5.2-chat-latest-20260210", "arena_name": "gpt-5.2-chat-latest-20260210", "arena_org": "OpenAI", - "arena_elo": 1480.18, + "arena_elo": 1478.45, "arena_rank": 8, - "arena_votes": 11405, + "arena_votes": 13083, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -69880,9 +69922,9 @@ "name": "gemini-3-flash", "arena_name": "gemini-3-flash", "arena_org": "Google", - "arena_elo": 1474.42, + "arena_elo": 1474.41, "arena_rank": 9, - "arena_votes": 30962, + "arena_votes": 30966, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -69892,12 +69934,12 @@ } }, { - "name": "claude-opus-4-5-20251101-thinking-32k", - "arena_name": "claude-opus-4-5-20251101-thinking-32k", - "arena_org": "Anthropic", - "arena_elo": 1473.8, + "name": "grok-4.20-multi-agent-beta-0309", + "arena_name": "grok-4.20-multi-agent-beta-0309", + "arena_org": "xAI", + "arena_elo": 1474.07, "arena_rank": 10, - "arena_votes": 37448, + "arena_votes": 7815, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -69907,12 +69949,12 @@ } }, { - "name": "grok-4.1-thinking", - "arena_name": "grok-4.1-thinking", - "arena_org": "xAI", - "arena_elo": 1471.44, + "name": "claude-opus-4-5-20251101-thinking-32k", + "arena_name": "claude-opus-4-5-20251101-thinking-32k", + "arena_org": "Anthropic", + "arena_elo": 1473.64, "arena_rank": 11, - "arena_votes": 44840, + "arena_votes": 37467, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -69922,12 +69964,12 @@ } }, { - "name": "gpt-5.4", - "arena_name": "gpt-5.4", - "arena_org": "OpenAI", - "arena_elo": 1465.6, - "arena_rank": 13, - "arena_votes": 5618, + "name": "grok-4.1-thinking", + "arena_name": "grok-4.1-thinking", + "arena_org": "xAI", + "arena_elo": 1471.06, + "arena_rank": 12, + "arena_votes": 45399, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -69940,9 +69982,9 @@ "name": "qwen3.5-max-preview", "arena_name": "qwen3.5-max-preview", "arena_org": "Alibaba", - "arena_elo": 1464.81, + "arena_elo": 1466.93, "arena_rank": 14, - "arena_votes": 4504, + "arena_votes": 5899, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -69952,12 +69994,12 @@ } }, { - "name": "gpt-5.3-chat-latest", - "arena_name": "gpt-5.3-chat-latest", - "arena_org": "OpenAI", - "arena_elo": 1464.48, + "name": "dola-seed-2.0-pro", + "arena_name": "dola-seed-2.0-pro", + "arena_org": "Bytedance", + "arena_elo": 1465.07, "arena_rank": 15, - "arena_votes": 10137, + "arena_votes": 2986, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -69967,12 +70009,12 @@ } }, { - "name": "gemini-3-flash (thinking-minimal)", - "arena_name": "gemini-3-flash (thinking-minimal)", - "arena_org": "Google", - "arena_elo": 1463.44, + "name": "gpt-5.4", + "arena_name": "gpt-5.4", + "arena_org": "OpenAI", + "arena_elo": 1464.14, "arena_rank": 16, - "arena_votes": 28681, + "arena_votes": 7261, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -69982,12 +70024,12 @@ } }, { - "name": "dola-seed-2.0-preview", - "arena_name": "dola-seed-2.0-preview", - "arena_org": "Bytedance", - "arena_elo": 1462.55, - "arena_rank": 18, - "arena_votes": 11780, + "name": "gemini-3-flash (thinking-minimal)", + "arena_name": "gemini-3-flash (thinking-minimal)", + "arena_org": "Google", + "arena_elo": 1463.33, + "arena_rank": 17, + "arena_votes": 30448, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70000,9 +70042,24 @@ "name": "grok-4.1", "arena_name": "grok-4.1", "arena_org": "xAI", - "arena_elo": 1460.82, + "arena_elo": 1461.49, "arena_rank": 19, - "arena_votes": 48755, + "arena_votes": 49287, + "sources": { + "arena_name": "arena", + "arena_org": "arena", + "arena_elo": "arena", + "arena_rank": "arena", + "arena_votes": "arena" + } + }, + { + "name": "gpt-5.3-chat-latest", + "arena_name": "gpt-5.3-chat-latest", + "arena_org": "OpenAI", + "arena_elo": 1460.5, + "arena_rank": 20, + "arena_votes": 11781, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70015,9 +70072,9 @@ "name": "gpt-5.1-high", "arena_name": "gpt-5.1-high", "arena_org": "OpenAI", - "arena_elo": 1454.84, - "arena_rank": 21, - "arena_votes": 41164, + "arena_elo": 1454.83, + "arena_rank": 23, + "arena_votes": 41170, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70030,9 +70087,9 @@ "name": "claude-sonnet-4-5-20250929-thinking-32k", "arena_name": "claude-sonnet-4-5-20250929-thinking-32k", "arena_org": "Anthropic", - "arena_elo": 1452.97, - "arena_rank": 24, - "arena_votes": 56697, + "arena_elo": 1452.47, + "arena_rank": 26, + "arena_votes": 58247, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70042,12 +70099,12 @@ } }, { - "name": "ernie-5.0-0110", - "arena_name": "ernie-5.0-0110", - "arena_org": "Baidu", - "arena_elo": 1451.78, - "arena_rank": 26, - "arena_votes": 19567, + "name": "gemma-4-31b", + "arena_name": "gemma-4-31b", + "arena_org": "Google", + "arena_elo": 1452.08, + "arena_rank": 27, + "arena_votes": 4679, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70057,12 +70114,12 @@ } }, { - "name": "qwen3.5-397b-a17b", - "arena_name": "qwen3.5-397b-a17b", - "arena_org": "Alibaba", - "arena_elo": 1450.4, - "arena_rank": 27, - "arena_votes": 11497, + "name": "ernie-5.0-0110", + "arena_name": "ernie-5.0-0110", + "arena_org": "Baidu", + "arena_elo": 1450.31, + "arena_rank": 28, + "arena_votes": 20836, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70075,9 +70132,24 @@ "name": "ernie-5.0-preview-1203", "arena_name": "ernie-5.0-preview-1203", "arena_org": "Baidu", - "arena_elo": 1449.72, - "arena_rank": 28, - "arena_votes": 9826, + "arena_elo": 1449.68, + "arena_rank": 29, + "arena_votes": 9824, + "sources": { + "arena_name": "arena", + "arena_org": "arena", + "arena_elo": "arena", + "arena_rank": "arena", + "arena_votes": "arena" + } + }, + { + "name": "qwen3.5-397b-a17b", + "arena_name": "qwen3.5-397b-a17b", + "arena_org": "Alibaba", + "arena_elo": 1449.45, + "arena_rank": 30, + "arena_votes": 12994, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70090,9 +70162,9 @@ "name": "claude-opus-4-1-20250805-thinking-16k", "arena_name": "claude-opus-4-1-20250805-thinking-16k", "arena_org": "Anthropic", - "arena_elo": 1448.66, - "arena_rank": 29, - "arena_votes": 50189, + "arena_elo": 1448.62, + "arena_rank": 31, + "arena_votes": 50174, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70105,9 +70177,9 @@ "name": "gemini-2.5-pro", "arena_name": "gemini-2.5-pro", "arena_org": "Google", - "arena_elo": 1448.14, - "arena_rank": 30, - "arena_votes": 103891, + "arena_elo": 1448.05, + "arena_rank": 32, + "arena_votes": 105423, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70120,9 +70192,24 @@ "name": "claude-opus-4-1-20250805", "arena_name": "claude-opus-4-1-20250805", "arena_org": "Anthropic", - "arena_elo": 1446.85, - "arena_rank": 31, - "arena_votes": 77953, + "arena_elo": 1446.86, + "arena_rank": 33, + "arena_votes": 77943, + "sources": { + "arena_name": "arena", + "arena_org": "arena", + "arena_elo": "arena", + "arena_rank": "arena", + "arena_votes": "arena" + } + }, + { + "name": "longcat-flash-chat-2602-exp", + "arena_name": "longcat-flash-chat-2602-exp", + "arena_org": "Meituan", + "arena_elo": 1444.92, + "arena_rank": 34, + "arena_votes": 3897, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70135,8 +70222,8 @@ "name": "chatgpt-4o-latest-20250326", "arena_name": "chatgpt-4o-latest-20250326", "arena_org": "OpenAI", - "arena_elo": 1443.03, - "arena_rank": 34, + "arena_elo": 1443.1, + "arena_rank": 37, "arena_votes": 83051, "sources": { "arena_name": "arena", @@ -70150,9 +70237,24 @@ "name": "gpt-5.2-high", "arena_name": "gpt-5.2-high", "arena_org": "OpenAI", - "arena_elo": 1441.65, - "arena_rank": 36, - "arena_votes": 26402, + "arena_elo": 1441.51, + "arena_rank": 39, + "arena_votes": 28004, + "sources": { + "arena_name": "arena", + "arena_org": "arena", + "arena_elo": "arena", + "arena_rank": "arena", + "arena_votes": "arena" + } + }, + { + "name": "gemma-4-26b-a4b", + "arena_name": "gemma-4-26b-a4b", + "arena_org": "Google", + "arena_elo": 1440.64, + "arena_rank": 40, + "arena_votes": 4548, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70165,9 +70267,9 @@ "name": "gpt-5.2", "arena_name": "gpt-5.2", "arena_org": "OpenAI", - "arena_elo": 1440.15, - "arena_rank": 37, - "arena_votes": 23341, + "arena_elo": 1440.27, + "arena_rank": 41, + "arena_votes": 24946, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70180,9 +70282,9 @@ "name": "gpt-5.1", "arena_name": "gpt-5.1", "arena_org": "OpenAI", - "arena_elo": 1438.57, - "arena_rank": 38, - "arena_votes": 43858, + "arena_elo": 1438.55, + "arena_rank": 42, + "arena_votes": 43869, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70195,9 +70297,9 @@ "name": "gemini-3.1-flash-lite-preview", "arena_name": "gemini-3.1-flash-lite-preview", "arena_org": "Google", - "arena_elo": 1437.6, - "arena_rank": 39, - "arena_votes": 11945, + "arena_elo": 1438, + "arena_rank": 43, + "arena_votes": 13569, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70210,9 +70312,9 @@ "name": "qwen3-max-preview", "arena_name": "qwen3-max-preview", "arena_org": "Alibaba", - "arena_elo": 1434.92, - "arena_rank": 40, - "arena_votes": 27958, + "arena_elo": 1435.05, + "arena_rank": 44, + "arena_votes": 27955, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70225,8 +70327,8 @@ "name": "kimi-k2.5-instant", "arena_name": "kimi-k2.5-instant", "arena_org": "Moonshot", - "arena_elo": 1433.18, - "arena_rank": 42, + "arena_elo": 1433.17, + "arena_rank": 46, "arena_votes": 8232, "sources": { "arena_name": "arena", @@ -70240,9 +70342,9 @@ "name": "o3-2025-04-16", "arena_name": "o3-2025-04-16", "arena_org": "OpenAI", - "arena_elo": 1431.43, - "arena_rank": 43, - "arena_votes": 60188, + "arena_elo": 1431.29, + "arena_rank": 48, + "arena_votes": 60179, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70255,9 +70357,9 @@ "name": "kimi-k2-thinking-turbo", "arena_name": "kimi-k2-thinking-turbo", "arena_org": "Moonshot", - "arena_elo": 1429.59, - "arena_rank": 45, - "arena_votes": 42603, + "arena_elo": 1429.83, + "arena_rank": 49, + "arena_votes": 43994, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70270,9 +70372,9 @@ "name": "amazon-nova-experimental-chat-26-02-10", "arena_name": "amazon-nova-experimental-chat-26-02-10", "arena_org": "Amazon", - "arena_elo": 1429.28, - "arena_rank": 46, - "arena_votes": 3448, + "arena_elo": 1428.58, + "arena_rank": 50, + "arena_votes": 3461, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70285,9 +70387,9 @@ "name": "deepseek-v3.2-exp-thinking", "arena_name": "deepseek-v3.2-exp-thinking", "arena_org": "DeepSeek", - "arena_elo": 1424.86, - "arena_rank": 49, - "arena_votes": 9148, + "arena_elo": 1424.89, + "arena_rank": 53, + "arena_votes": 9147, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70300,9 +70402,9 @@ "name": "claude-opus-4-20250514-thinking-16k", "arena_name": "claude-opus-4-20250514-thinking-16k", "arena_org": "Anthropic", - "arena_elo": 1424, - "arena_rank": 51, - "arena_votes": 37201, + "arena_elo": 1423.91, + "arena_rank": 56, + "arena_votes": 37192, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70315,9 +70417,9 @@ "name": "deepseek-v3.2-thinking", "arena_name": "deepseek-v3.2-thinking", "arena_org": "DeepSeek", - "arena_elo": 1421.84, - "arena_rank": 55, - "arena_votes": 32014, + "arena_elo": 1422.66, + "arena_rank": 58, + "arena_votes": 33449, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70330,9 +70432,9 @@ "name": "grok-4-fast-chat", "arena_name": "grok-4-fast-chat", "arena_org": "xAI", - "arena_elo": 1420.88, - "arena_rank": 57, - "arena_votes": 6869, + "arena_elo": 1420.81, + "arena_rank": 61, + "arena_votes": 6870, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70345,9 +70447,9 @@ "name": "ernie-5.0-preview-1022", "arena_name": "ernie-5.0-preview-1022", "arena_org": "Baidu", - "arena_elo": 1419.17, - "arena_rank": 58, - "arena_votes": 4764, + "arena_elo": 1419.07, + "arena_rank": 62, + "arena_votes": 4765, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70360,24 +70462,9 @@ "name": "deepseek-v3.1", "arena_name": "deepseek-v3.1", "arena_org": "DeepSeek", - "arena_elo": 1417.89, - "arena_rank": 59, - "arena_votes": 15080, - "sources": { - "arena_name": "arena", - "arena_org": "arena", - "arena_elo": "arena", - "arena_rank": "arena", - "arena_votes": "arena" - } - }, - { - "name": "qwen3.5-122b-a10b", - "arena_name": "qwen3.5-122b-a10b", - "arena_org": "Alibaba", - "arena_elo": 1417.82, - "arena_rank": 60, - "arena_votes": 8124, + "arena_elo": 1418.02, + "arena_rank": 63, + "arena_votes": 15082, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70390,9 +70477,9 @@ "name": "kimi-k2-0905-preview", "arena_name": "kimi-k2-0905-preview", "arena_org": "Moonshot", - "arena_elo": 1417.67, - "arena_rank": 61, - "arena_votes": 11873, + "arena_elo": 1417.75, + "arena_rank": 64, + "arena_votes": 11867, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70405,9 +70492,9 @@ "name": "kimi-k2-0711-preview", "arena_name": "kimi-k2-0711-preview", "arena_org": "Moonshot", - "arena_elo": 1417.15, - "arena_rank": 62, - "arena_votes": 27871, + "arena_elo": 1417.12, + "arena_rank": 65, + "arena_votes": 27875, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70420,8 +70507,8 @@ "name": "deepseek-v3.1-thinking", "arena_name": "deepseek-v3.1-thinking", "arena_org": "DeepSeek", - "arena_elo": 1416.76, - "arena_rank": 63, + "arena_elo": 1417.02, + "arena_rank": 66, "arena_votes": 11822, "sources": { "arena_name": "arena", @@ -70435,9 +70522,24 @@ "name": "deepseek-v3.1-terminus-thinking", "arena_name": "deepseek-v3.1-terminus-thinking", "arena_org": "DeepSeek", - "arena_elo": 1416.07, - "arena_rank": 64, - "arena_votes": 3492, + "arena_elo": 1416.42, + "arena_rank": 67, + "arena_votes": 3487, + "sources": { + "arena_name": "arena", + "arena_org": "arena", + "arena_elo": "arena", + "arena_rank": "arena", + "arena_votes": "arena" + } + }, + { + "name": "qwen3.5-122b-a10b", + "arena_name": "qwen3.5-122b-a10b", + "arena_org": "Alibaba", + "arena_elo": 1416.03, + "arena_rank": 68, + "arena_votes": 9686, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70450,9 +70552,9 @@ "name": "qwen3-vl-235b-a22b-instruct", "arena_name": "qwen3-vl-235b-a22b-instruct", "arena_org": "Alibaba", - "arena_elo": 1415.72, - "arena_rank": 65, - "arena_votes": 11614, + "arena_elo": 1415.86, + "arena_rank": 69, + "arena_votes": 11611, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70465,9 +70567,9 @@ "name": "mistral-large-3", "arena_name": "mistral-large-3", "arena_org": "Mistral", - "arena_elo": 1415.67, - "arena_rank": 66, - "arena_votes": 34238, + "arena_elo": 1415.64, + "arena_rank": 71, + "arena_votes": 35825, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70480,9 +70582,9 @@ "name": "amazon-nova-experimental-chat-26-01-10", "arena_name": "amazon-nova-experimental-chat-26-01-10", "arena_org": "Amazon", - "arena_elo": 1414.96, - "arena_rank": 68, - "arena_votes": 3440, + "arena_elo": 1415.04, + "arena_rank": 72, + "arena_votes": 3442, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70495,9 +70597,9 @@ "name": "claude-opus-4-20250514", "arena_name": "claude-opus-4-20250514", "arena_org": "Anthropic", - "arena_elo": 1412.34, - "arena_rank": 70, - "arena_votes": 44573, + "arena_elo": 1412.26, + "arena_rank": 74, + "arena_votes": 44565, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70510,9 +70612,9 @@ "name": "grok-3-preview-02-24", "arena_name": "grok-3-preview-02-24", "arena_org": "xAI", - "arena_elo": 1411.68, - "arena_rank": 71, - "arena_votes": 33055, + "arena_elo": 1411.74, + "arena_rank": 75, + "arena_votes": 33052, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70525,9 +70627,9 @@ "name": "gemini-2.5-flash", "arena_name": "gemini-2.5-flash", "arena_org": "Google", - "arena_elo": 1410.72, - "arena_rank": 73, - "arena_votes": 103268, + "arena_elo": 1410.89, + "arena_rank": 77, + "arena_votes": 104876, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70540,24 +70642,9 @@ "name": "mistral-medium-2508", "arena_name": "mistral-medium-2508", "arena_org": "Mistral", - "arena_elo": 1410.51, - "arena_rank": 74, - "arena_votes": 73290, - "sources": { - "arena_name": "arena", - "arena_org": "arena", - "arena_elo": "arena", - "arena_rank": "arena", - "arena_votes": "arena" - } - }, - { - "name": "qwen3.5-27b", - "arena_name": "qwen3.5-27b", - "arena_org": "Alibaba", - "arena_elo": 1405.26, + "arena_elo": 1410.23, "arena_rank": 79, - "arena_votes": 8055, + "arena_votes": 74890, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70570,9 +70657,9 @@ "name": "grok-4-fast-reasoning", "arena_name": "grok-4-fast-reasoning", "arena_org": "xAI", - "arena_elo": 1404.71, - "arena_rank": 81, - "arena_votes": 18901, + "arena_elo": 1404.72, + "arena_rank": 83, + "arena_votes": 18898, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70582,12 +70669,12 @@ } }, { - "name": "qwen3-235b-a22b-no-thinking", - "arena_name": "qwen3-235b-a22b-no-thinking", + "name": "qwen3.5-27b", + "arena_name": "qwen3.5-27b", "arena_org": "Alibaba", - "arena_elo": 1402.66, - "arena_rank": 82, - "arena_votes": 38481, + "arena_elo": 1404.35, + "arena_rank": 84, + "arena_votes": 9564, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70597,12 +70684,12 @@ } }, { - "name": "qwen3.5-35b-a3b", - "arena_name": "qwen3.5-35b-a3b", + "name": "qwen3-235b-a22b-no-thinking", + "arena_name": "qwen3-235b-a22b-no-thinking", "arena_org": "Alibaba", - "arena_elo": 1402.2, - "arena_rank": 83, - "arena_votes": 8393, + "arena_elo": 1402.75, + "arena_rank": 87, + "arena_votes": 38471, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70615,9 +70702,9 @@ "name": "qwen3.5-flash", "arena_name": "qwen3.5-flash", "arena_org": "Alibaba", - "arena_elo": 1400.74, - "arena_rank": 86, - "arena_votes": 8897, + "arena_elo": 1401.86, + "arena_rank": 88, + "arena_votes": 10307, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70630,8 +70717,8 @@ "name": "longcat-flash-chat", "arena_name": "longcat-flash-chat", "arena_org": "Meituan", - "arena_elo": 1400.74, - "arena_rank": 87, + "arena_elo": 1401.06, + "arena_rank": 91, "arena_votes": 11478, "sources": { "arena_name": "arena", @@ -70641,13 +70728,28 @@ "arena_votes": "arena" } }, + { + "name": "qwen3.5-35b-a3b", + "arena_name": "qwen3.5-35b-a3b", + "arena_org": "Alibaba", + "arena_elo": 1400.03, + "arena_rank": 92, + "arena_votes": 9959, + "sources": { + "arena_name": "arena", + "arena_org": "arena", + "arena_elo": "arena", + "arena_rank": "arena", + "arena_votes": "arena" + } + }, { "name": "claude-sonnet-4-20250514-thinking-32k", "arena_name": "claude-sonnet-4-20250514-thinking-32k", "arena_org": "Anthropic", - "arena_elo": 1398.73, - "arena_rank": 90, - "arena_votes": 35436, + "arena_elo": 1398.54, + "arena_rank": 94, + "arena_votes": 35423, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70660,8 +70762,8 @@ "name": "hunyuan-vision-1.5-thinking", "arena_name": "hunyuan-vision-1.5-thinking", "arena_org": "Tencent", - "arena_elo": 1396.86, - "arena_rank": 92, + "arena_elo": 1396.6, + "arena_rank": 96, "arena_votes": 2228, "sources": { "arena_name": "arena", @@ -70675,9 +70777,9 @@ "name": "amazon-nova-experimental-chat-12-10", "arena_name": "amazon-nova-experimental-chat-12-10", "arena_org": "Amazon", - "arena_elo": 1395.74, - "arena_rank": 93, - "arena_votes": 3705, + "arena_elo": 1395.88, + "arena_rank": 97, + "arena_votes": 3706, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70690,9 +70792,9 @@ "name": "qwen3-vl-235b-a22b-thinking", "arena_name": "qwen3-vl-235b-a22b-thinking", "arena_org": "Alibaba", - "arena_elo": 1395.56, - "arena_rank": 94, - "arena_votes": 8027, + "arena_elo": 1395.58, + "arena_rank": 98, + "arena_votes": 8021, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70705,9 +70807,9 @@ "name": "mai-1-preview", "arena_name": "mai-1-preview", "arena_org": "Microsoft AI", - "arena_elo": 1392.76, - "arena_rank": 96, - "arena_votes": 18027, + "arena_elo": 1392.77, + "arena_rank": 100, + "arena_votes": 18020, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70720,9 +70822,9 @@ "name": "mimo-v2-flash (non-thinking)", "arena_name": "mimo-v2-flash (non-thinking)", "arena_org": "Xiaomi", - "arena_elo": 1391.9, - "arena_rank": 97, - "arena_votes": 26410, + "arena_elo": 1392.69, + "arena_rank": 101, + "arena_votes": 27959, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70735,9 +70837,9 @@ "name": "step-3.5-flash", "arena_name": "step-3.5-flash", "arena_org": "StepFun", - "arena_elo": 1390.82, - "arena_rank": 98, - "arena_votes": 14862, + "arena_elo": 1392.29, + "arena_rank": 102, + "arena_votes": 16327, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70750,9 +70852,9 @@ "name": "o4-mini-2025-04-16", "arena_name": "o4-mini-2025-04-16", "arena_org": "OpenAI", - "arena_elo": 1390.05, - "arena_rank": 99, - "arena_votes": 45775, + "arena_elo": 1389.98, + "arena_rank": 103, + "arena_votes": 45771, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70765,9 +70867,9 @@ "name": "claude-sonnet-4-20250514", "arena_name": "claude-sonnet-4-20250514", "arena_org": "Anthropic", - "arena_elo": 1388.89, - "arena_rank": 101, - "arena_votes": 40668, + "arena_elo": 1388.73, + "arena_rank": 105, + "arena_votes": 40664, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70780,9 +70882,9 @@ "name": "mimo-v2-flash (thinking)", "arena_name": "mimo-v2-flash (thinking)", "arena_org": "Xiaomi", - "arena_elo": 1387.29, - "arena_rank": 104, - "arena_votes": 11012, + "arena_elo": 1387.36, + "arena_rank": 107, + "arena_votes": 11014, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70795,9 +70897,9 @@ "name": "hunyuan-t1-20250711", "arena_name": "hunyuan-t1-20250711", "arena_org": "Tencent", - "arena_elo": 1387.19, - "arena_rank": 105, - "arena_votes": 4721, + "arena_elo": 1387.24, + "arena_rank": 108, + "arena_votes": 4720, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70810,9 +70912,9 @@ "name": "claude-3-7-sonnet-20250219-thinking-32k", "arena_name": "claude-3-7-sonnet-20250219-thinking-32k", "arena_org": "Anthropic", - "arena_elo": 1386.7, - "arena_rank": 106, - "arena_votes": 39002, + "arena_elo": 1386.47, + "arena_rank": 110, + "arena_votes": 38993, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70825,9 +70927,9 @@ "name": "minimax-m2.1-preview", "arena_name": "minimax-m2.1-preview", "arena_org": "MiniMax", - "arena_elo": 1386.24, - "arena_rank": 108, - "arena_votes": 17228, + "arena_elo": 1386.26, + "arena_rank": 112, + "arena_votes": 17225, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70840,9 +70942,9 @@ "name": "qwen3-30b-a3b-instruct-2507", "arena_name": "qwen3-30b-a3b-instruct-2507", "arena_org": "Alibaba", - "arena_elo": 1383.12, - "arena_rank": 109, - "arena_votes": 23958, + "arena_elo": 1383.11, + "arena_rank": 113, + "arena_votes": 23947, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70856,8 +70958,8 @@ "arena_name": "hunyuan-turbos-20250416", "arena_org": "Tencent", "arena_elo": 1382.88, - "arena_rank": 110, - "arena_votes": 10779, + "arena_rank": 114, + "arena_votes": 10774, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70870,9 +70972,9 @@ "name": "gemini-2.5-flash-lite-preview-09-2025-no-thinking", "arena_name": "gemini-2.5-flash-lite-preview-09-2025-no-thinking", "arena_org": "Google", - "arena_elo": 1379.94, - "arena_rank": 112, - "arena_votes": 47598, + "arena_elo": 1380, + "arena_rank": 116, + "arena_votes": 47594, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70885,9 +70987,9 @@ "name": "trinity-large", "arena_name": "trinity-large", "arena_org": "Arcee AI", - "arena_elo": 1376.01, - "arena_rank": 114, - "arena_votes": 9281, + "arena_elo": 1375.26, + "arena_rank": 118, + "arena_votes": 11005, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70900,9 +71002,9 @@ "name": "gemini-2.5-flash-lite-preview-06-17-thinking", "arena_name": "gemini-2.5-flash-lite-preview-06-17-thinking", "arena_org": "Google", - "arena_elo": 1374.29, - "arena_rank": 116, - "arena_votes": 33189, + "arena_elo": 1374.26, + "arena_rank": 121, + "arena_votes": 33181, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70915,9 +71017,9 @@ "name": "glm-4.7-flash", "arena_name": "glm-4.7-flash", "arena_org": "Z.ai", - "arena_elo": 1368.7, - "arena_rank": 122, - "arena_votes": 11829, + "arena_elo": 1368.59, + "arena_rank": 126, + "arena_votes": 11830, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70930,9 +71032,9 @@ "name": "amazon-nova-experimental-chat-11-10", "arena_name": "amazon-nova-experimental-chat-11-10", "arena_org": "Amazon", - "arena_elo": 1368.1, - "arena_rank": 123, - "arena_votes": 25506, + "arena_elo": 1367.87, + "arena_rank": 127, + "arena_votes": 25671, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70945,9 +71047,9 @@ "name": "nvidia-nemotron-3-super-120b-a12b", "arena_name": "nvidia-nemotron-3-super-120b-a12b", "arena_org": "Nvidia", - "arena_elo": 1364.59, - "arena_rank": 125, - "arena_votes": 3626, + "arena_elo": 1364.61, + "arena_rank": 129, + "arena_votes": 3624, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70960,8 +71062,8 @@ "name": "o3-mini-high", "arena_name": "o3-mini-high", "arena_org": "OpenAI", - "arena_elo": 1363.32, - "arena_rank": 126, + "arena_elo": 1363.28, + "arena_rank": 130, "arena_votes": 18589, "sources": { "arena_name": "arena", @@ -70975,9 +71077,9 @@ "name": "minimax-m1", "arena_name": "minimax-m1", "arena_org": "MiniMax", - "arena_elo": 1363.14, - "arena_rank": 127, - "arena_votes": 35529, + "arena_elo": 1363.22, + "arena_rank": 131, + "arena_votes": 35520, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -70990,9 +71092,9 @@ "name": "grok-3-mini-high", "arena_name": "grok-3-mini-high", "arena_org": "xAI", - "arena_elo": 1362.9, - "arena_rank": 128, - "arena_votes": 17077, + "arena_elo": 1362.88, + "arena_rank": 132, + "arena_votes": 17076, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -71005,9 +71107,9 @@ "name": "grok-3-mini-beta", "arena_name": "grok-3-mini-beta", "arena_org": "xAI", - "arena_elo": 1357.56, - "arena_rank": 131, - "arena_votes": 22883, + "arena_elo": 1357.5, + "arena_rank": 135, + "arena_votes": 22878, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -71020,9 +71122,9 @@ "name": "mistral-small-2506", "arena_name": "mistral-small-2506", "arena_org": "Mistral", - "arena_elo": 1356.85, - "arena_rank": 132, - "arena_votes": 17852, + "arena_elo": 1357.01, + "arena_rank": 136, + "arena_votes": 17848, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -71035,9 +71137,9 @@ "name": "intellect-3", "arena_name": "intellect-3", "arena_org": "Prime Intellect", - "arena_elo": 1356.44, - "arena_rank": 133, - "arena_votes": 5365, + "arena_elo": 1356.4, + "arena_rank": 137, + "arena_votes": 5363, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -71050,9 +71152,9 @@ "name": "glm-4.5v", "arena_name": "glm-4.5v", "arena_org": "Z.ai", - "arena_elo": 1353.28, - "arena_rank": 136, - "arena_votes": 4979, + "arena_elo": 1353.43, + "arena_rank": 140, + "arena_votes": 4976, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -71065,9 +71167,9 @@ "name": "amazon-nova-experimental-chat-10-20", "arena_name": "amazon-nova-experimental-chat-10-20", "arena_org": "Amazon", - "arena_elo": 1350.8, - "arena_rank": 139, - "arena_votes": 11543, + "arena_elo": 1350.9, + "arena_rank": 142, + "arena_votes": 11544, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -71081,7 +71183,7 @@ "arena_name": "hunyuan-turbos-20250226", "arena_org": "Tencent", "arena_elo": 1348.54, - "arena_rank": 140, + "arena_rank": 144, "arena_votes": 2220, "sources": { "arena_name": "arena", @@ -71092,12 +71194,12 @@ } }, { - "name": "mercury-2", - "arena_name": "mercury-2", - "arena_org": "Inception AI", - "arena_elo": 1347.55, - "arena_rank": 141, - "arena_votes": 3136, + "name": "step-3", + "arena_name": "step-3", + "arena_org": "StepFun", + "arena_elo": 1347.56, + "arena_rank": 145, + "arena_votes": 6582, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -71107,12 +71209,12 @@ } }, { - "name": "step-3", - "arena_name": "step-3", - "arena_org": "StepFun", - "arena_elo": 1347.48, - "arena_rank": 143, - "arena_votes": 6581, + "name": "amazon-nova-experimental-chat-10-09", + "arena_name": "amazon-nova-experimental-chat-10-09", + "arena_org": "Amazon", + "arena_elo": 1347.14, + "arena_rank": 147, + "arena_votes": 2859, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -71122,12 +71224,12 @@ } }, { - "name": "amazon-nova-experimental-chat-10-09", - "arena_name": "amazon-nova-experimental-chat-10-09", - "arena_org": "Amazon", - "arena_elo": 1347.02, - "arena_rank": 144, - "arena_votes": 2858, + "name": "mercury-2", + "arena_name": "mercury-2", + "arena_org": "Inception AI", + "arena_elo": 1347.05, + "arena_rank": 149, + "arena_votes": 3136, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -71140,8 +71242,8 @@ "name": "llama-3.1-nemotron-ultra-253b-v1", "arena_name": "llama-3.1-nemotron-ultra-253b-v1", "arena_org": "Nvidia", - "arena_elo": 1346.9, - "arena_rank": 146, + "arena_elo": 1346.89, + "arena_rank": 150, "arena_votes": 2549, "sources": { "arena_name": "arena", @@ -71155,9 +71257,9 @@ "name": "ling-flash-2.0", "arena_name": "ling-flash-2.0", "arena_org": "Ant Group", - "arena_elo": 1346.2, - "arena_rank": 148, - "arena_votes": 7087, + "arena_elo": 1346.26, + "arena_rank": 152, + "arena_votes": 7085, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -71170,8 +71272,8 @@ "name": "qwen-plus-0125", "arena_name": "qwen-plus-0125", "arena_org": "Alibaba", - "arena_elo": 1345.98, - "arena_rank": 149, + "arena_elo": 1346, + "arena_rank": 153, "arena_votes": 5819, "sources": { "arena_name": "arena", @@ -71185,9 +71287,9 @@ "name": "nvidia-llama-3.3-nemotron-super-49b-v1.5", "arena_name": "nvidia-llama-3.3-nemotron-super-49b-v1.5", "arena_org": "Nvidia", - "arena_elo": 1342.64, - "arena_rank": 151, - "arena_votes": 3369, + "arena_elo": 1342.78, + "arena_rank": 155, + "arena_votes": 3368, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -71200,8 +71302,8 @@ "name": "glm-4-plus-0111", "arena_name": "glm-4-plus-0111", "arena_org": "Zhipu", - "arena_elo": 1342.57, - "arena_rank": 152, + "arena_elo": 1342.55, + "arena_rank": 156, "arena_votes": 5760, "sources": { "arena_name": "arena", @@ -71216,7 +71318,7 @@ "arena_name": "hunyuan-turbo-0110", "arena_org": "Tencent", "arena_elo": 1340.3, - "arena_rank": 155, + "arena_rank": 159, "arena_votes": 2290, "sources": { "arena_name": "arena", @@ -71230,9 +71332,9 @@ "name": "nova-2-lite", "arena_name": "nova-2-lite", "arena_org": "Amazon", - "arena_elo": 1337.46, - "arena_rank": 156, - "arena_votes": 12310, + "arena_elo": 1337.9, + "arena_rank": 160, + "arena_votes": 12312, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -71245,8 +71347,8 @@ "name": "grok-2-2024-08-13", "arena_name": "grok-2-2024-08-13", "arena_org": "xAI", - "arena_elo": 1334.88, - "arena_rank": 160, + "arena_elo": 1334.85, + "arena_rank": 164, "arena_votes": 63498, "sources": { "arena_name": "arena", @@ -71260,8 +71362,8 @@ "name": "llama-3.1-405b-instruct-bf16", "arena_name": "llama-3.1-405b-instruct-bf16", "arena_org": "Meta", - "arena_elo": 1334.53, - "arena_rank": 162, + "arena_elo": 1334.4, + "arena_rank": 166, "arena_votes": 41375, "sources": { "arena_name": "arena", @@ -71275,8 +71377,8 @@ "name": "gemini-advanced-0514", "arena_name": "gemini-advanced-0514", "arena_org": "Google", - "arena_elo": 1334.38, - "arena_rank": 163, + "arena_elo": 1334.28, + "arena_rank": 167, "arena_votes": 50148, "sources": { "arena_name": "arena", @@ -71290,8 +71392,8 @@ "name": "step-2-16k-exp-202412", "arena_name": "step-2-16k-exp-202412", "arena_org": "StepFun", - "arena_elo": 1333.91, - "arena_rank": 164, + "arena_elo": 1333.95, + "arena_rank": 168, "arena_votes": 4833, "sources": { "arena_name": "arena", @@ -71305,8 +71407,8 @@ "name": "llama-3.1-405b-instruct-fp8", "arena_name": "llama-3.1-405b-instruct-fp8", "arena_org": "Meta", - "arena_elo": 1332.73, - "arena_rank": 165, + "arena_elo": 1332.6, + "arena_rank": 169, "arena_votes": 59656, "sources": { "arena_name": "arena", @@ -71320,9 +71422,9 @@ "name": "olmo-3.1-32b-instruct", "arena_name": "olmo-3.1-32b-instruct", "arena_org": "Ai2", - "arena_elo": 1330.76, - "arena_rank": 166, - "arena_votes": 12281, + "arena_elo": 1330.9, + "arena_rank": 170, + "arena_votes": 12282, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -71335,8 +71437,8 @@ "name": "yi-lightning", "arena_name": "yi-lightning", "arena_org": "01 AI", - "arena_elo": 1328.23, - "arena_rank": 167, + "arena_elo": 1328.21, + "arena_rank": 171, "arena_votes": 27332, "sources": { "arena_name": "arena", @@ -71352,8 +71454,8 @@ "name": "llama-3.3-nemotron-49b-super-v1", "arena_name": "llama-3.3-nemotron-49b-super-v1", "arena_org": "Nvidia", - "arena_elo": 1327.37, - "arena_rank": 169, + "arena_elo": 1327.41, + "arena_rank": 173, "arena_votes": 2218, "sources": { "arena_name": "arena", @@ -71367,9 +71469,9 @@ "name": "molmo-2-8b", "arena_name": "molmo-2-8b", "arena_org": "Ai2", - "arena_elo": 1326.42, - "arena_rank": 171, - "arena_votes": 806, + "arena_elo": 1326.75, + "arena_rank": 175, + "arena_votes": 805, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -71382,8 +71484,8 @@ "name": "hunyuan-large-2025-02-10", "arena_name": "hunyuan-large-2025-02-10", "arena_org": "Tencent", - "arena_elo": 1325.81, - "arena_rank": 172, + "arena_elo": 1325.78, + "arena_rank": 176, "arena_votes": 3738, "sources": { "arena_name": "arena", @@ -71398,7 +71500,7 @@ "arena_name": "deepseek-v2.5-1210", "arena_org": "DeepSeek", "arena_elo": 1323.18, - "arena_rank": 174, + "arena_rank": 178, "arena_votes": 6795, "sources": { "arena_name": "arena", @@ -71414,8 +71516,8 @@ "name": "gemini-1.5-pro-001", "arena_name": "gemini-1.5-pro-001", "arena_org": "Google", - "arena_elo": 1322.7, - "arena_rank": 176, + "arena_elo": 1322.63, + "arena_rank": 179, "arena_votes": 79138, "sources": { "arena_name": "arena", @@ -71431,9 +71533,9 @@ "name": "llama-4-scout-17b-16e-instruct", "arena_name": "llama-4-scout-17b-16e-instruct", "arena_org": "Meta", - "arena_elo": 1322.09, - "arena_rank": 177, - "arena_votes": 30517, + "arena_elo": 1322.03, + "arena_rank": 181, + "arena_votes": 30505, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -71446,9 +71548,9 @@ "name": "ring-flash-2.0", "arena_name": "ring-flash-2.0", "arena_org": "Ant Group", - "arena_elo": 1320.82, - "arena_rank": 180, - "arena_votes": 7222, + "arena_elo": 1320.92, + "arena_rank": 184, + "arena_votes": 7223, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -71461,9 +71563,9 @@ "name": "step-1o-turbo-202506", "arena_name": "step-1o-turbo-202506", "arena_org": "StepFun", - "arena_elo": 1320.22, - "arena_rank": 181, - "arena_votes": 9114, + "arena_elo": 1320.44, + "arena_rank": 185, + "arena_votes": 9110, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -71476,8 +71578,8 @@ "name": "glm-4-plus", "arena_name": "glm-4-plus", "arena_org": "Zhipu AI", - "arena_elo": 1319.03, - "arena_rank": 182, + "arena_elo": 1319.01, + "arena_rank": 186, "arena_votes": 26126, "sources": { "arena_name": "arena", @@ -71491,9 +71593,9 @@ "name": "gpt-oss-20b", "arena_name": "gpt-oss-20b", "arena_org": "OpenAI", - "arena_elo": 1318.2, - "arena_rank": 184, - "arena_votes": 10706, + "arena_elo": 1318.16, + "arena_rank": 188, + "arena_votes": 10704, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -71506,9 +71608,9 @@ "name": "gemma-3n-e4b-it", "arena_name": "gemma-3n-e4b-it", "arena_org": "Google", - "arena_elo": 1318.08, - "arena_rank": 185, - "arena_votes": 22730, + "arena_elo": 1318.07, + "arena_rank": 189, + "arena_votes": 22728, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -71521,9 +71623,9 @@ "name": "nvidia-nemotron-3-nano-30b-a3b-bf16", "arena_name": "nvidia-nemotron-3-nano-30b-a3b-bf16", "arena_org": "Nvidia", - "arena_elo": 1317.76, - "arena_rank": 186, - "arena_votes": 15606, + "arena_elo": 1317.92, + "arena_rank": 190, + "arena_votes": 15607, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -71536,8 +71638,8 @@ "name": "qwen-max-0919", "arena_name": "qwen-max-0919", "arena_org": "Alibaba", - "arena_elo": 1317.63, - "arena_rank": 187, + "arena_elo": 1317.59, + "arena_rank": 191, "arena_votes": 16478, "sources": { "arena_name": "arena", @@ -71551,8 +71653,8 @@ "name": "qwen2.5-plus-1127", "arena_name": "qwen2.5-plus-1127", "arena_org": "Alibaba", - "arena_elo": 1314.99, - "arena_rank": 189, + "arena_elo": 1315.01, + "arena_rank": 193, "arena_votes": 10187, "sources": { "arena_name": "arena", @@ -71567,7 +71669,7 @@ "arena_name": "athene-v2-chat", "arena_org": "NexusFlow", "arena_elo": 1314.17, - "arena_rank": 190, + "arena_rank": 194, "arena_votes": 24739, "sources": { "arena_name": "arena", @@ -71581,8 +71683,8 @@ "name": "gpt-4-1106-preview", "arena_name": "gpt-4-1106-preview", "arena_org": "OpenAI", - "arena_elo": 1312.31, - "arena_rank": 193, + "arena_elo": 1312.18, + "arena_rank": 197, "arena_votes": 100105, "sources": { "arena_name": "arena", @@ -71598,8 +71700,8 @@ "name": "hunyuan-standard-2025-02-10", "arena_name": "hunyuan-standard-2025-02-10", "arena_org": "Tencent", - "arena_elo": 1310.82, - "arena_rank": 194, + "arena_elo": 1310.79, + "arena_rank": 198, "arena_votes": 3904, "sources": { "arena_name": "arena", @@ -71613,8 +71715,8 @@ "name": "grok-2-mini-2024-08-13", "arena_name": "grok-2-mini-2024-08-13", "arena_org": "xAI", - "arena_elo": 1307.77, - "arena_rank": 196, + "arena_elo": 1307.76, + "arena_rank": 200, "arena_votes": 52567, "sources": { "arena_name": "arena", @@ -71628,8 +71730,8 @@ "name": "mercury", "arena_name": "mercury", "arena_org": "Inception AI", - "arena_elo": 1306.31, - "arena_rank": 198, + "arena_elo": 1306.25, + "arena_rank": 202, "arena_votes": 1982, "sources": { "arena_name": "arena", @@ -71643,8 +71745,8 @@ "name": "athene-70b-0725", "arena_name": "athene-70b-0725", "arena_org": "NexusFlow", - "arena_elo": 1305.62, - "arena_rank": 199, + "arena_elo": 1305.56, + "arena_rank": 203, "arena_votes": 19621, "sources": { "arena_name": "arena", @@ -71658,9 +71760,9 @@ "name": "olmo-3-32b-think", "arena_name": "olmo-3-32b-think", "arena_org": "Ai2", - "arena_elo": 1305.41, - "arena_rank": 200, - "arena_votes": 5999, + "arena_elo": 1305.54, + "arena_rank": 204, + "arena_votes": 5995, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -71673,9 +71775,9 @@ "name": "magistral-medium-2506", "arena_name": "magistral-medium-2506", "arena_org": "Mistral", - "arena_elo": 1303.34, - "arena_rank": 202, - "arena_votes": 11728, + "arena_elo": 1303.18, + "arena_rank": 206, + "arena_votes": 11723, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -71688,9 +71790,9 @@ "name": "mistral-small-3.1-24b-instruct-2503", "arena_name": "mistral-small-3.1-24b-instruct-2503", "arena_org": "Mistral", - "arena_elo": 1302.78, - "arena_rank": 204, - "arena_votes": 33485, + "arena_elo": 1302.85, + "arena_rank": 207, + "arena_votes": 33474, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -71703,9 +71805,9 @@ "name": "hunyuan-large-vision", "arena_name": "hunyuan-large-vision", "arena_org": "Tencent", - "arena_elo": 1293.85, - "arena_rank": 207, - "arena_votes": 5401, + "arena_elo": 1293.83, + "arena_rank": 211, + "arena_votes": 5399, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -71718,8 +71820,8 @@ "name": "amazon-nova-pro-v1.0", "arena_name": "amazon-nova-pro-v1.0", "arena_org": "Amazon", - "arena_elo": 1289.72, - "arena_rank": 209, + "arena_elo": 1289.69, + "arena_rank": 213, "arena_votes": 24745, "sources": { "arena_name": "arena", @@ -71733,8 +71835,8 @@ "name": "reka-core-20240904", "arena_name": "reka-core-20240904", "arena_org": "Reka AI", - "arena_elo": 1287.4, - "arena_rank": 212, + "arena_elo": 1287.33, + "arena_rank": 216, "arena_votes": 7312, "sources": { "arena_name": "arena", @@ -71748,9 +71850,9 @@ "name": "ibm-granite-h-small", "arena_name": "ibm-granite-h-small", "arena_org": "IBM", - "arena_elo": 1286.98, - "arena_rank": 213, - "arena_votes": 5748, + "arena_elo": 1286.93, + "arena_rank": 217, + "arena_votes": 5747, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -71763,8 +71865,8 @@ "name": "gpt-4-0314", "arena_name": "gpt-4-0314", "arena_org": "OpenAI", - "arena_elo": 1286.2, - "arena_rank": 214, + "arena_elo": 1286, + "arena_rank": 218, "arena_votes": 54173, "sources": { "arena_name": "arena", @@ -71777,12 +71879,12 @@ "aider_pass_rate": 0.504 }, { - "name": "llama-3.1-nemotron-51b-instruct", - "arena_name": "llama-3.1-nemotron-51b-instruct", - "arena_org": "Nvidia", - "arena_elo": 1285.6, - "arena_rank": 216, - "arena_votes": 3749, + "name": "olmo-3.1-32b-think", + "arena_name": "olmo-3.1-32b-think", + "arena_org": "Ai2", + "arena_elo": 1285.68, + "arena_rank": 220, + "arena_votes": 8550, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -71792,12 +71894,12 @@ } }, { - "name": "olmo-3.1-32b-think", - "arena_name": "olmo-3.1-32b-think", - "arena_org": "Ai2", - "arena_elo": 1285.52, - "arena_rank": 217, - "arena_votes": 8544, + "name": "llama-3.1-nemotron-51b-instruct", + "arena_name": "llama-3.1-nemotron-51b-instruct", + "arena_org": "Nvidia", + "arena_elo": 1285.46, + "arena_rank": 221, + "arena_votes": 3749, "sources": { "arena_name": "arena", "arena_org": "arena", @@ -71810,8 +71912,8 @@ "name": "gemini-1.5-flash-001", "arena_name": "gemini-1.5-flash-001", "arena_org": "Google", - "arena_elo": 1285.27, - "arena_rank": 218, + "arena_elo": 1285.21, + "arena_rank": 222, "arena_votes": 62833, "sources": { "arena_name": "arena", @@ -71825,8 +71927,8 @@ "name": "nemotron-4-340b-instruct", "arena_name": "nemotron-4-340b-instruct", "arena_org": "Nvidia", - "arena_elo": 1276.58, - "arena_rank": 221, + "arena_elo": 1276.43, + "arena_rank": 225, "arena_votes": 19659, "sources": { "arena_name": "arena", @@ -71840,8 +71942,8 @@ "name": "llama-3-70b-instruct", "arena_name": "llama-3-70b-instruct", "arena_org": "Meta", - "arena_elo": 1275.42, - "arena_rank": 223, + "arena_elo": 1275.29, + "arena_rank": 227, "arena_votes": 156876, "sources": { "arena_name": "arena", @@ -71855,8 +71957,8 @@ "name": "mistral-small-24b-instruct-2501", "arena_name": "mistral-small-24b-instruct-2501", "arena_org": "Mistral", - "arena_elo": 1273.63, - "arena_rank": 225, + "arena_elo": 1273.57, + "arena_rank": 229, "arena_votes": 14681, "sources": { "arena_name": "arena", @@ -71870,8 +71972,8 @@ "name": "glm-4-0520", "arena_name": "glm-4-0520", "arena_org": "Zhipu AI", - "arena_elo": 1272.78, - "arena_rank": 226, + "arena_elo": 1272.67, + "arena_rank": 230, "arena_votes": 9788, "sources": { "arena_name": "arena", @@ -71885,8 +71987,8 @@ "name": "reka-flash-20240904", "arena_name": "reka-flash-20240904", "arena_org": "Reka AI", - "arena_elo": 1271.42, - "arena_rank": 227, + "arena_elo": 1271.29, + "arena_rank": 231, "arena_votes": 7536, "sources": { "arena_name": "arena", @@ -71900,8 +72002,8 @@ "name": "c4ai-aya-expanse-32b", "arena_name": "c4ai-aya-expanse-32b", "arena_org": "Cohere", - "arena_elo": 1266.48, - "arena_rank": 229, + "arena_elo": 1266.39, + "arena_rank": 233, "arena_votes": 27124, "sources": { "arena_name": "arena", @@ -71915,8 +72017,8 @@ "name": "amazon-nova-lite-v1.0", "arena_name": "amazon-nova-lite-v1.0", "arena_org": "Amazon", - "arena_elo": 1260.12, - "arena_rank": 235, + "arena_elo": 1260.08, + "arena_rank": 239, "arena_votes": 19372, "sources": { "arena_name": "arena", @@ -71930,8 +72032,8 @@ "name": "olmo-2-0325-32b-instruct", "arena_name": "olmo-2-0325-32b-instruct", "arena_org": "Ai2", - "arena_elo": 1251.41, - "arena_rank": 238, + "arena_elo": 1251.32, + "arena_rank": 242, "arena_votes": 3334, "sources": { "arena_name": "arena", @@ -71945,8 +72047,8 @@ "name": "amazon-nova-micro-v1.0", "arena_name": "amazon-nova-micro-v1.0", "arena_org": "Amazon", - "arena_elo": 1240.42, - "arena_rank": 241, + "arena_elo": 1240.38, + "arena_rank": 245, "arena_votes": 19364, "sources": { "arena_name": "arena", @@ -71960,8 +72062,8 @@ "name": "ministral-8b-2410", "arena_name": "ministral-8b-2410", "arena_org": "Mistral", - "arena_elo": 1236.78, - "arena_rank": 243, + "arena_elo": 1236.71, + "arena_rank": 247, "arena_votes": 4781, "sources": { "arena_name": "arena", @@ -71975,8 +72077,8 @@ "name": "gemini-pro-dev-api", "arena_name": "gemini-pro-dev-api", "arena_org": "Google", - "arena_elo": 1234.47, - "arena_rank": 244, + "arena_elo": 1234.3, + "arena_rank": 248, "arena_votes": 18354, "sources": { "arena_name": "arena", @@ -71990,8 +72092,8 @@ "name": "hunyuan-standard-256k", "arena_name": "hunyuan-standard-256k", "arena_org": "Tencent", - "arena_elo": 1233.1, - "arena_rank": 246, + "arena_elo": 1233.09, + "arena_rank": 250, "arena_votes": 2728, "sources": { "arena_name": "arena", @@ -72005,8 +72107,8 @@ "name": "reka-flash-21b-20240226-online", "arena_name": "reka-flash-21b-20240226-online", "arena_org": "Reka AI", - "arena_elo": 1232.49, - "arena_rank": 247, + "arena_elo": 1232.32, + "arena_rank": 251, "arena_votes": 15450, "sources": { "arena_name": "arena", @@ -72020,8 +72122,8 @@ "name": "reka-flash-21b-20240226", "arena_name": "reka-flash-21b-20240226", "arena_org": "Reka AI", - "arena_elo": 1225.73, - "arena_rank": 251, + "arena_elo": 1225.57, + "arena_rank": 255, "arena_votes": 24806, "sources": { "arena_name": "arena", @@ -72035,8 +72137,8 @@ "name": "c4ai-aya-expanse-8b", "arena_name": "c4ai-aya-expanse-8b", "arena_org": "Cohere", - "arena_elo": 1222.42, - "arena_rank": 254, + "arena_elo": 1222.35, + "arena_rank": 258, "arena_votes": 9818, "sources": { "arena_name": "arena", @@ -72050,8 +72152,8 @@ "name": "mistral-medium", "arena_name": "mistral-medium", "arena_org": "Mistral", - "arena_elo": 1222.16, - "arena_rank": 255, + "arena_elo": 1221.99, + "arena_rank": 259, "arena_votes": 34550, "sources": { "arena_name": "arena", @@ -72065,8 +72167,8 @@ "name": "gemini-pro", "arena_name": "gemini-pro", "arena_org": "Google", - "arena_elo": 1221.17, - "arena_rank": 256, + "arena_elo": 1220.99, + "arena_rank": 260, "arena_votes": 6390, "sources": { "arena_name": "arena", @@ -72080,8 +72182,8 @@ "name": "gpt-3.5-turbo-1106", "arena_name": "gpt-3.5-turbo-1106", "arena_org": "OpenAI", - "arena_elo": 1201.7, - "arena_rank": 263, + "arena_elo": 1201.47, + "arena_rank": 267, "arena_votes": 16619, "sources": { "arena_name": "arena", @@ -72097,8 +72199,8 @@ "name": "dbrx-instruct-preview", "arena_name": "dbrx-instruct-preview", "arena_org": "Databricks", - "arena_elo": 1194.24, - "arena_rank": 267, + "arena_elo": 1194.06, + "arena_rank": 271, "arena_votes": 32191, "sources": { "arena_name": "arena", @@ -72112,8 +72214,8 @@ "name": "wizardlm-70b", "arena_name": "wizardlm-70b", "arena_org": "Microsoft", - "arena_elo": 1183.82, - "arena_rank": 270, + "arena_elo": 1183.65, + "arena_rank": 274, "arena_votes": 8214, "sources": { "arena_name": "arena", @@ -72127,8 +72229,8 @@ "name": "snowflake-arctic-instruct", "arena_name": "snowflake-arctic-instruct", "arena_org": "Snowflake", - "arena_elo": 1178.66, - "arena_rank": 277, + "arena_elo": 1178.47, + "arena_rank": 281, "arena_votes": 32832, "sources": { "arena_name": "arena", @@ -72142,8 +72244,8 @@ "name": "tulu-2-dpo-70b", "arena_name": "tulu-2-dpo-70b", "arena_org": "AllenAI/UW", - "arena_elo": 1177.16, - "arena_rank": 279, + "arena_elo": 1177, + "arena_rank": 283, "arena_votes": 6535, "sources": { "arena_name": "arena", @@ -72157,8 +72259,8 @@ "name": "vicuna-33b", "arena_name": "vicuna-33b", "arena_org": "LMSYS", - "arena_elo": 1171.93, - "arena_rank": 281, + "arena_elo": 1171.75, + "arena_rank": 285, "arena_votes": 22479, "sources": { "arena_name": "arena", @@ -72172,8 +72274,8 @@ "name": "llama-2-70b-chat", "arena_name": "llama-2-70b-chat", "arena_org": "Meta", - "arena_elo": 1169.94, - "arena_rank": 284, + "arena_elo": 1169.79, + "arena_rank": 288, "arena_votes": 38492, "sources": { "arena_name": "arena", @@ -72187,8 +72289,8 @@ "name": "llama2-70b-steerlm-chat", "arena_name": "llama2-70b-steerlm-chat", "arena_org": "Nvidia", - "arena_elo": 1154.45, - "arena_rank": 290, + "arena_elo": 1154.28, + "arena_rank": 294, "arena_votes": 3585, "sources": { "arena_name": "arena", @@ -72202,8 +72304,8 @@ "name": "dolphin-2.2.1-mistral-7b", "arena_name": "dolphin-2.2.1-mistral-7b", "arena_org": "Cognitive Computations", - "arena_elo": 1151.15, - "arena_rank": 292, + "arena_elo": 1150.97, + "arena_rank": 296, "arena_votes": 1679, "sources": { "arena_name": "arena", @@ -72217,8 +72319,8 @@ "name": "mpt-30b-chat", "arena_name": "mpt-30b-chat", "arena_org": "MosaicML", - "arena_elo": 1149.26, - "arena_rank": 293, + "arena_elo": 1149.06, + "arena_rank": 297, "arena_votes": 2572, "sources": { "arena_name": "arena", @@ -72232,8 +72334,8 @@ "name": "wizardlm-13b", "arena_name": "wizardlm-13b", "arena_org": "Microsoft", - "arena_elo": 1148.35, - "arena_rank": 295, + "arena_elo": 1148.17, + "arena_rank": 299, "arena_votes": 7044, "sources": { "arena_name": "arena", @@ -72247,8 +72349,8 @@ "name": "falcon-180b-chat", "arena_name": "falcon-180b-chat", "arena_org": "TII", - "arena_elo": 1146.22, - "arena_rank": 296, + "arena_elo": 1146.01, + "arena_rank": 300, "arena_votes": 1295, "sources": { "arena_name": "arena", @@ -72262,8 +72364,8 @@ "name": "phi-3-mini-4k-instruct-june-2024", "arena_name": "phi-3-mini-4k-instruct-june-2024", "arena_org": "Microsoft", - "arena_elo": 1142.27, - "arena_rank": 298, + "arena_elo": 1142.12, + "arena_rank": 302, "arena_votes": 12297, "sources": { "arena_name": "arena", @@ -72277,8 +72379,8 @@ "name": "llama-2-13b-chat", "arena_name": "llama-2-13b-chat", "arena_org": "Meta", - "arena_elo": 1140.65, - "arena_rank": 299, + "arena_elo": 1140.5, + "arena_rank": 303, "arena_votes": 19174, "sources": { "arena_name": "arena", @@ -72292,8 +72394,8 @@ "name": "vicuna-13b", "arena_name": "vicuna-13b", "arena_org": "LMSYS", - "arena_elo": 1140.07, - "arena_rank": 300, + "arena_elo": 1139.88, + "arena_rank": 304, "arena_votes": 19367, "sources": { "arena_name": "arena", @@ -72307,8 +72409,8 @@ "name": "qwen-14b-chat", "arena_name": "qwen-14b-chat", "arena_org": "Alibaba", - "arena_elo": 1137.74, - "arena_rank": 301, + "arena_elo": 1137.54, + "arena_rank": 305, "arena_votes": 4964, "sources": { "arena_name": "arena", @@ -72322,8 +72424,8 @@ "name": "palm-2", "arena_name": "palm-2", "arena_org": "Google", - "arena_elo": 1136.5, - "arena_rank": 302, + "arena_elo": 1136.28, + "arena_rank": 306, "arena_votes": 8554, "sources": { "arena_name": "arena", @@ -72337,8 +72439,8 @@ "name": "codellama-34b-instruct", "arena_name": "codellama-34b-instruct", "arena_org": "Meta", - "arena_elo": 1135.71, - "arena_rank": 303, + "arena_elo": 1135.53, + "arena_rank": 307, "arena_votes": 7366, "sources": { "arena_name": "arena", @@ -72352,8 +72454,8 @@ "name": "guanaco-33b", "arena_name": "guanaco-33b", "arena_org": "UW", - "arena_elo": 1126.45, - "arena_rank": 308, + "arena_elo": 1126.26, + "arena_rank": 312, "arena_votes": 2921, "sources": { "arena_name": "arena", @@ -72367,8 +72469,8 @@ "name": "stripedhyena-nous-7b", "arena_name": "stripedhyena-nous-7b", "arena_org": "Together AI", - "arena_elo": 1120.14, - "arena_rank": 310, + "arena_elo": 1119.94, + "arena_rank": 314, "arena_votes": 5182, "sources": { "arena_name": "arena", @@ -72382,8 +72484,8 @@ "name": "codellama-70b-instruct", "arena_name": "codellama-70b-instruct", "arena_org": "Meta", - "arena_elo": 1118.24, - "arena_rank": 311, + "arena_elo": 1118.09, + "arena_rank": 315, "arena_votes": 1143, "sources": { "arena_name": "arena", @@ -72397,8 +72499,8 @@ "name": "vicuna-7b", "arena_name": "vicuna-7b", "arena_org": "LMSYS", - "arena_elo": 1113.79, - "arena_rank": 312, + "arena_elo": 1113.59, + "arena_rank": 316, "arena_votes": 6923, "sources": { "arena_name": "arena", @@ -72412,8 +72514,8 @@ "name": "mistral-7b-instruct", "arena_name": "mistral-7b-instruct", "arena_org": "Mistral", - "arena_elo": 1108.77, - "arena_rank": 316, + "arena_elo": 1108.57, + "arena_rank": 320, "arena_votes": 8977, "sources": { "arena_name": "arena", @@ -72427,8 +72529,8 @@ "name": "llama-2-7b-chat", "arena_name": "llama-2-7b-chat", "arena_org": "Meta", - "arena_elo": 1107.28, - "arena_rank": 317, + "arena_elo": 1107.13, + "arena_rank": 321, "arena_votes": 14148, "sources": { "arena_name": "arena", @@ -72442,8 +72544,8 @@ "name": "olmo-7b-instruct", "arena_name": "olmo-7b-instruct", "arena_org": "Ai2", - "arena_elo": 1073.67, - "arena_rank": 320, + "arena_elo": 1073.53, + "arena_rank": 324, "arena_votes": 6328, "sources": { "arena_name": "arena", @@ -72457,8 +72559,8 @@ "name": "koala-13b", "arena_name": "koala-13b", "arena_org": "UC Berkeley", - "arena_elo": 1069.55, - "arena_rank": 321, + "arena_elo": 1069.36, + "arena_rank": 325, "arena_votes": 6965, "sources": { "arena_name": "arena", @@ -72472,8 +72574,8 @@ "name": "alpaca-13b", "arena_name": "alpaca-13b", "arena_org": "Stanford", - "arena_elo": 1066.69, - "arena_rank": 322, + "arena_elo": 1066.43, + "arena_rank": 326, "arena_votes": 5745, "sources": { "arena_name": "arena", @@ -72487,8 +72589,8 @@ "name": "gpt4all-13b-snoozy", "arena_name": "gpt4all-13b-snoozy", "arena_org": "Nomic AI", - "arena_elo": 1065.16, - "arena_rank": 323, + "arena_elo": 1064.94, + "arena_rank": 327, "arena_votes": 1743, "sources": { "arena_name": "arena", @@ -72502,8 +72604,8 @@ "name": "mpt-7b-chat", "arena_name": "mpt-7b-chat", "arena_org": "MosaicML", - "arena_elo": 1061, - "arena_rank": 324, + "arena_elo": 1060.78, + "arena_rank": 328, "arena_votes": 3924, "sources": { "arena_name": "arena", @@ -72517,8 +72619,8 @@ "name": "chatglm3-6b", "arena_name": "chatglm3-6b", "arena_org": "Tsinghua", - "arena_elo": 1055.18, - "arena_rank": 325, + "arena_elo": 1054.98, + "arena_rank": 329, "arena_votes": 4658, "sources": { "arena_name": "arena", @@ -72532,8 +72634,8 @@ "name": "RWKV-4-Raven-14B", "arena_name": "RWKV-4-Raven-14B", "arena_org": "RWKV", - "arena_elo": 1040.46, - "arena_rank": 326, + "arena_elo": 1040.25, + "arena_rank": 330, "arena_votes": 4845, "sources": { "arena_name": "arena", @@ -72547,8 +72649,8 @@ "name": "chatglm2-6b", "arena_name": "chatglm2-6b", "arena_org": "Tsinghua", - "arena_elo": 1023.29, - "arena_rank": 327, + "arena_elo": 1023.09, + "arena_rank": 331, "arena_votes": 2658, "sources": { "arena_name": "arena", @@ -72562,8 +72664,8 @@ "name": "oasst-pythia-12b", "arena_name": "oasst-pythia-12b", "arena_org": "OpenAssistant", - "arena_elo": 1021.23, - "arena_rank": 328, + "arena_elo": 1021.01, + "arena_rank": 332, "arena_votes": 6310, "sources": { "arena_name": "arena", @@ -72577,8 +72679,8 @@ "name": "chatglm-6b", "arena_name": "chatglm-6b", "arena_org": "Tsinghua", - "arena_elo": 994.677, - "arena_rank": 329, + "arena_elo": 994.492, + "arena_rank": 333, "arena_votes": 4914, "sources": { "arena_name": "arena", @@ -72592,8 +72694,8 @@ "name": "fastchat-t5-3b", "arena_name": "fastchat-t5-3b", "arena_org": "LMSYS", - "arena_elo": 990.473, - "arena_rank": 330, + "arena_elo": 990.269, + "arena_rank": 334, "arena_votes": 4203, "sources": { "arena_name": "arena", @@ -72607,8 +72709,8 @@ "name": "stablelm-tuned-alpha-7b", "arena_name": "stablelm-tuned-alpha-7b", "arena_org": "Stability AI", - "arena_elo": 951.724, - "arena_rank": 333, + "arena_elo": 951.532, + "arena_rank": 337, "arena_votes": 3287, "sources": { "arena_name": "arena",