j_yoon.song commited on
Commit
ac0e84a
·
1 Parent(s): 055c28e

add models

Browse files
src/data/open/length_data.json CHANGED
@@ -1,4 +1,72 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "Claude 4.1 Opus (20250805) (think)": {
3
  "Overall": {
4
  "Min": -10,
@@ -475,6 +543,74 @@
475
  "Med Resp": -3.0
476
  }
477
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
478
  "Solar Pro Preview (top_p:0.95, temp: 0.7)": {
479
  "Overall": {
480
  "Min": 1,
@@ -1087,6 +1223,74 @@
1087
  "Med Resp": 2282.5
1088
  }
1089
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1090
  "GLM-4.5 FP8 (think)": {
1091
  "Overall": {
1092
  "Min": 75,
@@ -1223,6 +1427,74 @@
1223
  "Med Resp": 1208.5
1224
  }
1225
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1226
  "gpt-oss-120B (Reasoning: medium)": {
1227
  "Overall": {
1228
  "Min": 43,
@@ -1563,6 +1835,74 @@
1563
  "Med Resp": 1728.5
1564
  }
1565
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1566
  "Claude 4 Opus (20250514) (think)": {
1567
  "Overall": {
1568
  "Min": -10,
@@ -1971,6 +2311,142 @@
1971
  "Med Resp": -3.0
1972
  }
1973
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1974
  "gpt-oss-20B (Reasoning: medium)": {
1975
  "Overall": {
1976
  "Min": 32,
@@ -2107,74 +2583,6 @@
2107
  "Med Resp": -3.0
2108
  }
2109
  },
2110
- "Grok-4": {
2111
- "Overall": {
2112
- "Min": -10,
2113
- "Max": -2,
2114
- "Med": -2.0,
2115
- "Med Resp": -1.0
2116
- },
2117
- "Content Generation": {
2118
- "Min": -2,
2119
- "Max": -2,
2120
- "Med": -2.0,
2121
- "Med Resp": -1.0
2122
- },
2123
- "Editing": {
2124
- "Min": -2,
2125
- "Max": -2,
2126
- "Med": -2.0,
2127
- "Med Resp": -1.0
2128
- },
2129
- "Data Analysis": {
2130
- "Min": -2,
2131
- "Max": -2,
2132
- "Med": -2.0,
2133
- "Med Resp": -1.0
2134
- },
2135
- "Reasoning": {
2136
- "Min": -2,
2137
- "Max": -2,
2138
- "Med": -2.0,
2139
- "Med Resp": -1.0
2140
- },
2141
- "Hallucination": {
2142
- "Min": -2,
2143
- "Max": -2,
2144
- "Med": -2.0,
2145
- "Med Resp": -1.0
2146
- },
2147
- "Safety": {
2148
- "Min": -2,
2149
- "Max": -2,
2150
- "Med": -2.0,
2151
- "Med Resp": -1.0
2152
- },
2153
- "Repetition": {
2154
- "Min": -2,
2155
- "Max": -2,
2156
- "Med": -2.0,
2157
- "Med Resp": -1.0
2158
- },
2159
- "Summarization": {
2160
- "Min": -2,
2161
- "Max": -2,
2162
- "Med": -2.0,
2163
- "Med Resp": -1.0
2164
- },
2165
- "Translation": {
2166
- "Min": -2,
2167
- "Max": -2,
2168
- "Med": -2.0,
2169
- "Med Resp": -1.0
2170
- },
2171
- "Multi-Turn": {
2172
- "Min": -10,
2173
- "Max": -4,
2174
- "Med": -6.0,
2175
- "Med Resp": -3.0
2176
- }
2177
- },
2178
  "Apriel 1.5 15B Thinker": {
2179
  "Overall": {
2180
  "Min": 118,
 
1
  {
2
+ "Olmo 3 32B Think": {
3
+ "Overall": {
4
+ "Min": 210,
5
+ "Max": 65454,
6
+ "Med": 3360.5,
7
+ "Med Resp": 473.0
8
+ },
9
+ "Content Generation": {
10
+ "Min": 683,
11
+ "Max": 65300,
12
+ "Med": 3224.0,
13
+ "Med Resp": 606.0
14
+ },
15
+ "Editing": {
16
+ "Min": 580,
17
+ "Max": 14539,
18
+ "Med": 2859.0,
19
+ "Med Resp": 419.5
20
+ },
21
+ "Data Analysis": {
22
+ "Min": 369,
23
+ "Max": 23205,
24
+ "Med": 2624.0,
25
+ "Med Resp": 332.0
26
+ },
27
+ "Reasoning": {
28
+ "Min": 779,
29
+ "Max": 27491,
30
+ "Med": 3911.5,
31
+ "Med Resp": 467.5
32
+ },
33
+ "Hallucination": {
34
+ "Min": 254,
35
+ "Max": 10751,
36
+ "Med": 2472.0,
37
+ "Med Resp": 768.0
38
+ },
39
+ "Safety": {
40
+ "Min": 210,
41
+ "Max": 7162,
42
+ "Med": 2025.0,
43
+ "Med Resp": 646.0
44
+ },
45
+ "Repetition": {
46
+ "Min": 1336,
47
+ "Max": 65454,
48
+ "Med": 5205.5,
49
+ "Med Resp": 575.5
50
+ },
51
+ "Summarization": {
52
+ "Min": 380,
53
+ "Max": 17517,
54
+ "Med": 2254.0,
55
+ "Med Resp": 248.0
56
+ },
57
+ "Translation": {
58
+ "Min": 846,
59
+ "Max": 15667,
60
+ "Med": 4546.5,
61
+ "Med Resp": 349.5
62
+ },
63
+ "Multi-Turn": {
64
+ "Min": 1010,
65
+ "Max": 24077,
66
+ "Med": 6999.5,
67
+ "Med Resp": 1764.5
68
+ }
69
+ },
70
  "Claude 4.1 Opus (20250805) (think)": {
71
  "Overall": {
72
  "Min": -10,
 
543
  "Med Resp": -3.0
544
  }
545
  },
546
+ "Gemini 3 Pro Preview (Thinking Level: High)": {
547
+ "Overall": {
548
+ "Min": 0,
549
+ "Max": 18460,
550
+ "Med": 1930.5,
551
+ "Med Resp": 378.0
552
+ },
553
+ "Content Generation": {
554
+ "Min": 0,
555
+ "Max": 12404,
556
+ "Med": 1967.5,
557
+ "Med Resp": 570.5
558
+ },
559
+ "Editing": {
560
+ "Min": 433,
561
+ "Max": 7372,
562
+ "Med": 1684.0,
563
+ "Med Resp": 322.0
564
+ },
565
+ "Data Analysis": {
566
+ "Min": 386,
567
+ "Max": 10791,
568
+ "Med": 1357.0,
569
+ "Med Resp": 172.0
570
+ },
571
+ "Reasoning": {
572
+ "Min": 0,
573
+ "Max": 18460,
574
+ "Med": 1821.0,
575
+ "Med Resp": 502.5
576
+ },
577
+ "Hallucination": {
578
+ "Min": 524,
579
+ "Max": 6228,
580
+ "Med": 1833.0,
581
+ "Med Resp": 468.0
582
+ },
583
+ "Safety": {
584
+ "Min": 832,
585
+ "Max": 6324,
586
+ "Med": 1802.0,
587
+ "Med Resp": 291.0
588
+ },
589
+ "Repetition": {
590
+ "Min": 516,
591
+ "Max": 5086,
592
+ "Med": 1910.0,
593
+ "Med Resp": 314.0
594
+ },
595
+ "Summarization": {
596
+ "Min": 663,
597
+ "Max": 3857,
598
+ "Med": 1295.5,
599
+ "Med Resp": 184.5
600
+ },
601
+ "Translation": {
602
+ "Min": 964,
603
+ "Max": 9535,
604
+ "Med": 2286.5,
605
+ "Med Resp": 357.0
606
+ },
607
+ "Multi-Turn": {
608
+ "Min": 608,
609
+ "Max": 10590,
610
+ "Med": 4601.5,
611
+ "Med Resp": 1377.0
612
+ }
613
+ },
614
  "Solar Pro Preview (top_p:0.95, temp: 0.7)": {
615
  "Overall": {
616
  "Min": 1,
 
1223
  "Med Resp": 2282.5
1224
  }
1225
  },
1226
+ "Claude 4.5 Opus (think)": {
1227
+ "Overall": {
1228
+ "Min": -10,
1229
+ "Max": -2,
1230
+ "Med": -2.0,
1231
+ "Med Resp": -1.0
1232
+ },
1233
+ "Content Generation": {
1234
+ "Min": -2,
1235
+ "Max": -2,
1236
+ "Med": -2.0,
1237
+ "Med Resp": -1.0
1238
+ },
1239
+ "Editing": {
1240
+ "Min": -2,
1241
+ "Max": -2,
1242
+ "Med": -2.0,
1243
+ "Med Resp": -1.0
1244
+ },
1245
+ "Data Analysis": {
1246
+ "Min": -2,
1247
+ "Max": -2,
1248
+ "Med": -2.0,
1249
+ "Med Resp": -1.0
1250
+ },
1251
+ "Reasoning": {
1252
+ "Min": -2,
1253
+ "Max": -2,
1254
+ "Med": -2.0,
1255
+ "Med Resp": -1.0
1256
+ },
1257
+ "Hallucination": {
1258
+ "Min": -2,
1259
+ "Max": -2,
1260
+ "Med": -2.0,
1261
+ "Med Resp": -1.0
1262
+ },
1263
+ "Safety": {
1264
+ "Min": -2,
1265
+ "Max": -2,
1266
+ "Med": -2.0,
1267
+ "Med Resp": -1.0
1268
+ },
1269
+ "Repetition": {
1270
+ "Min": -2,
1271
+ "Max": -2,
1272
+ "Med": -2.0,
1273
+ "Med Resp": -1.0
1274
+ },
1275
+ "Summarization": {
1276
+ "Min": -2,
1277
+ "Max": -2,
1278
+ "Med": -2.0,
1279
+ "Med Resp": -1.0
1280
+ },
1281
+ "Translation": {
1282
+ "Min": -2,
1283
+ "Max": -2,
1284
+ "Med": -2.0,
1285
+ "Med Resp": -1.0
1286
+ },
1287
+ "Multi-Turn": {
1288
+ "Min": -10,
1289
+ "Max": -4,
1290
+ "Med": -6.0,
1291
+ "Med Resp": -3.0
1292
+ }
1293
+ },
1294
  "GLM-4.5 FP8 (think)": {
1295
  "Overall": {
1296
  "Min": 75,
 
1427
  "Med Resp": 1208.5
1428
  }
1429
  },
1430
+ "MiniMax-M2 (230B A10B)": {
1431
+ "Overall": {
1432
+ "Min": 64,
1433
+ "Max": 28729,
1434
+ "Med": 1142.0,
1435
+ "Med Resp": 325.0
1436
+ },
1437
+ "Content Generation": {
1438
+ "Min": 116,
1439
+ "Max": 16249,
1440
+ "Med": 1235.5,
1441
+ "Med Resp": 501.5
1442
+ },
1443
+ "Editing": {
1444
+ "Min": 111,
1445
+ "Max": 11557,
1446
+ "Med": 858.0,
1447
+ "Med Resp": 201.0
1448
+ },
1449
+ "Data Analysis": {
1450
+ "Min": 76,
1451
+ "Max": 18529,
1452
+ "Med": 834.0,
1453
+ "Med Resp": 170.0
1454
+ },
1455
+ "Reasoning": {
1456
+ "Min": 118,
1457
+ "Max": 18596,
1458
+ "Med": 1674.0,
1459
+ "Med Resp": 418.5
1460
+ },
1461
+ "Hallucination": {
1462
+ "Min": 92,
1463
+ "Max": 8617,
1464
+ "Med": 1130.0,
1465
+ "Med Resp": 436.0
1466
+ },
1467
+ "Safety": {
1468
+ "Min": 64,
1469
+ "Max": 5803,
1470
+ "Med": 563.0,
1471
+ "Med Resp": 176.0
1472
+ },
1473
+ "Repetition": {
1474
+ "Min": 175,
1475
+ "Max": 14147,
1476
+ "Med": 1054.5,
1477
+ "Med Resp": 259.0
1478
+ },
1479
+ "Summarization": {
1480
+ "Min": 135,
1481
+ "Max": 15849,
1482
+ "Med": 716.0,
1483
+ "Med Resp": 197.5
1484
+ },
1485
+ "Translation": {
1486
+ "Min": 216,
1487
+ "Max": 22260,
1488
+ "Med": 1133.0,
1489
+ "Med Resp": 297.5
1490
+ },
1491
+ "Multi-Turn": {
1492
+ "Min": 303,
1493
+ "Max": 28729,
1494
+ "Med": 3732.0,
1495
+ "Med Resp": 1424.0
1496
+ }
1497
+ },
1498
  "gpt-oss-120B (Reasoning: medium)": {
1499
  "Overall": {
1500
  "Min": 43,
 
1835
  "Med Resp": 1728.5
1836
  }
1837
  },
1838
+ "Grok-4": {
1839
+ "Overall": {
1840
+ "Min": -10,
1841
+ "Max": -2,
1842
+ "Med": -2.0,
1843
+ "Med Resp": -1.0
1844
+ },
1845
+ "Content Generation": {
1846
+ "Min": -2,
1847
+ "Max": -2,
1848
+ "Med": -2.0,
1849
+ "Med Resp": -1.0
1850
+ },
1851
+ "Editing": {
1852
+ "Min": -2,
1853
+ "Max": -2,
1854
+ "Med": -2.0,
1855
+ "Med Resp": -1.0
1856
+ },
1857
+ "Data Analysis": {
1858
+ "Min": -2,
1859
+ "Max": -2,
1860
+ "Med": -2.0,
1861
+ "Med Resp": -1.0
1862
+ },
1863
+ "Reasoning": {
1864
+ "Min": -2,
1865
+ "Max": -2,
1866
+ "Med": -2.0,
1867
+ "Med Resp": -1.0
1868
+ },
1869
+ "Hallucination": {
1870
+ "Min": -2,
1871
+ "Max": -2,
1872
+ "Med": -2.0,
1873
+ "Med Resp": -1.0
1874
+ },
1875
+ "Safety": {
1876
+ "Min": -2,
1877
+ "Max": -2,
1878
+ "Med": -2.0,
1879
+ "Med Resp": -1.0
1880
+ },
1881
+ "Repetition": {
1882
+ "Min": -2,
1883
+ "Max": -2,
1884
+ "Med": -2.0,
1885
+ "Med Resp": -1.0
1886
+ },
1887
+ "Summarization": {
1888
+ "Min": -2,
1889
+ "Max": -2,
1890
+ "Med": -2.0,
1891
+ "Med Resp": -1.0
1892
+ },
1893
+ "Translation": {
1894
+ "Min": -2,
1895
+ "Max": -2,
1896
+ "Med": -2.0,
1897
+ "Med Resp": -1.0
1898
+ },
1899
+ "Multi-Turn": {
1900
+ "Min": -10,
1901
+ "Max": -4,
1902
+ "Med": -6.0,
1903
+ "Med Resp": -3.0
1904
+ }
1905
+ },
1906
  "Claude 4 Opus (20250514) (think)": {
1907
  "Overall": {
1908
  "Min": -10,
 
2311
  "Med Resp": -3.0
2312
  }
2313
  },
2314
+ "GPT-5.1 (Reasoning: medium, verbosity: medium)": {
2315
+ "Overall": {
2316
+ "Min": -10,
2317
+ "Max": -2,
2318
+ "Med": -2.0,
2319
+ "Med Resp": -1.0
2320
+ },
2321
+ "Content Generation": {
2322
+ "Min": -2,
2323
+ "Max": -2,
2324
+ "Med": -2.0,
2325
+ "Med Resp": -1.0
2326
+ },
2327
+ "Editing": {
2328
+ "Min": -2,
2329
+ "Max": -2,
2330
+ "Med": -2.0,
2331
+ "Med Resp": -1.0
2332
+ },
2333
+ "Data Analysis": {
2334
+ "Min": -2,
2335
+ "Max": -2,
2336
+ "Med": -2.0,
2337
+ "Med Resp": -1.0
2338
+ },
2339
+ "Reasoning": {
2340
+ "Min": -2,
2341
+ "Max": -2,
2342
+ "Med": -2.0,
2343
+ "Med Resp": -1.0
2344
+ },
2345
+ "Hallucination": {
2346
+ "Min": -2,
2347
+ "Max": -2,
2348
+ "Med": -2.0,
2349
+ "Med Resp": -1.0
2350
+ },
2351
+ "Safety": {
2352
+ "Min": -2,
2353
+ "Max": -2,
2354
+ "Med": -2.0,
2355
+ "Med Resp": -1.0
2356
+ },
2357
+ "Repetition": {
2358
+ "Min": -2,
2359
+ "Max": -2,
2360
+ "Med": -2.0,
2361
+ "Med Resp": -1.0
2362
+ },
2363
+ "Summarization": {
2364
+ "Min": -2,
2365
+ "Max": -2,
2366
+ "Med": -2.0,
2367
+ "Med Resp": -1.0
2368
+ },
2369
+ "Translation": {
2370
+ "Min": -2,
2371
+ "Max": -2,
2372
+ "Med": -2.0,
2373
+ "Med Resp": -1.0
2374
+ },
2375
+ "Multi-Turn": {
2376
+ "Min": -10,
2377
+ "Max": -4,
2378
+ "Med": -6.0,
2379
+ "Med Resp": -3.0
2380
+ }
2381
+ },
2382
+ "KAT Dev 72B Exp": {
2383
+ "Overall": {
2384
+ "Min": 6,
2385
+ "Max": 65602,
2386
+ "Med": 397.0,
2387
+ "Med Resp": 397.0
2388
+ },
2389
+ "Content Generation": {
2390
+ "Min": 26,
2391
+ "Max": 65466,
2392
+ "Med": 554.5,
2393
+ "Med Resp": 554.5
2394
+ },
2395
+ "Editing": {
2396
+ "Min": 13,
2397
+ "Max": 65363,
2398
+ "Med": 223.0,
2399
+ "Med Resp": 223.0
2400
+ },
2401
+ "Data Analysis": {
2402
+ "Min": 21,
2403
+ "Max": 15350,
2404
+ "Med": 289.0,
2405
+ "Med Resp": 289.0
2406
+ },
2407
+ "Reasoning": {
2408
+ "Min": 10,
2409
+ "Max": 65442,
2410
+ "Med": 487.5,
2411
+ "Med Resp": 487.5
2412
+ },
2413
+ "Hallucination": {
2414
+ "Min": 24,
2415
+ "Max": 65455,
2416
+ "Med": 402.0,
2417
+ "Med Resp": 402.0
2418
+ },
2419
+ "Safety": {
2420
+ "Min": 17,
2421
+ "Max": 65474,
2422
+ "Med": 345.0,
2423
+ "Med Resp": 345.0
2424
+ },
2425
+ "Repetition": {
2426
+ "Min": 96,
2427
+ "Max": 65602,
2428
+ "Med": 405.0,
2429
+ "Med Resp": 405.0
2430
+ },
2431
+ "Summarization": {
2432
+ "Min": 39,
2433
+ "Max": 65376,
2434
+ "Med": 292.0,
2435
+ "Med Resp": 292.0
2436
+ },
2437
+ "Translation": {
2438
+ "Min": 10,
2439
+ "Max": 65331,
2440
+ "Med": 339.0,
2441
+ "Med Resp": 339.0
2442
+ },
2443
+ "Multi-Turn": {
2444
+ "Min": 6,
2445
+ "Max": 65466,
2446
+ "Med": 1083.5,
2447
+ "Med Resp": 1083.5
2448
+ }
2449
+ },
2450
  "gpt-oss-20B (Reasoning: medium)": {
2451
  "Overall": {
2452
  "Min": 32,
 
2583
  "Med Resp": -3.0
2584
  }
2585
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2586
  "Apriel 1.5 15B Thinker": {
2587
  "Overall": {
2588
  "Min": 118,
src/data/open/stats.csv CHANGED
@@ -1,9 +1,12 @@
1
  "Model Name" "Link" "Comment" "Group" "Med. Len." "Med. Resp. Len." "Time to First Answer Token" "End-to-End Response Time" "Speed" "Parameter Size (B)" "Type" "Model Type" "Think" "Overall" "Content Generation" "Editing" "Data Analysis" "Reasoning" "Hallucination" "Safety" "Repetition" "Summarization" "Translation" "Multi-Turn"
2
  "GPT-5 (Reasoning: medium)" "https://platform.openai.com/docs/models/gpt-5" "Reasoning: medium" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "70.73" "71.0" "74.38" "76.49" "79.75" "64.94" "56.2" "82.86" "80.16" "69.38" "54.36"
3
  "o3-pro (Reasoning: medium)" "https://platform.openai.com/docs/models/o3-pro" "Reasoning: medium" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "66.47" "72.5" "70.31" "75.7" "83.88" "64.37" "33.88" "74.29" "65.48" "64.33" "48.32"
 
 
4
  "Claude 4 Opus (20250514) (think)" "https://www.anthropic.com/claude/opus" "version: 20250514" "Claude" "" "" "" "" "" "" "Proprietary" "Hybrid" "On" "63.29" "60.75" "59.69" "73.31" "69.83" "78.74" "53.72" "55.71" "65.48" "65.45" "48.99"
5
  "Claude 4.1 Opus (20250805) (think)" "https://www.anthropic.com/claude/opus" "version: 20250805" "Claude" "" "" "" "" "" "" "Proprietary" "Hybrid" "On" "63.24" "61.25" "60.0" "78.49" "72.73" "77.01" "56.2" "57.14" "61.9" "62.64" "46.98"
6
  "GPT-5 mini (Reasoning: medium)" "https://platform.openai.com/docs/models/gpt-5-mini" "Reasoning: medium" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "62.56" "68.0" "62.5" "74.9" "76.86" "55.17" "47.93" "44.29" "74.6" "56.18" "45.3"
 
7
  "Claude 4 Sonnet (20250514) (think)" "https://www.anthropic.com/claude/sonnet" "version: 20250514" "Claude" "" "" "" "" "" "" "Proprietary" "Hybrid" "On" "61.8" "58.0" "58.44" "76.49" "67.77" "79.31" "57.02" "44.29" "65.08" "62.92" "44.97"
8
  "o3" "https://platform.openai.com/docs/models/o3" "" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "60.91" "68.75" "60.0" "73.31" "79.34" "54.02" "34.71" "64.29" "60.71" "55.06" "46.98"
9
  "Gemini 2.5 Pro" "https://deepmind.google/models/gemini/pro/" "" "Gemini" "" "" "" "" "" "" "Proprietary" "Think" "On" "59.34" "54.0" "60.94" "78.88" "73.14" "63.22" "17.36" "52.86" "67.86" "53.93" "52.68"
@@ -41,6 +44,8 @@ top-p: 0.95" "DeepSeek" "408.0" "408.0" "0.211452841758728" "23.47111320495605"
41
  top-p: 0.95" "Qwen" "1113.0" "390.0" "27.26490248867746" "39.635579228401184" "37.74973909656839" "32.8" "Open" "Hybrid" "On" "44.44" "52.25" "41.56" "68.92" "66.53" "35.06" "19.83" "25.71" "46.43" "30.9" "32.89"
42
  "Qwen3 30B A3B Instruct 2507" "https://huggingface.co/Qwen/Qwen3-30B-A3B-Instruct-2507" "temperature: 0.7
43
  top-p: 0.8" "Qwen" "441.5" "441.5" "7.902002811431885" "19.310550212860107" "42.44958664990833" "30.0" "Open" "Instruct" "Off" "42.79" "45.0" "35.0" "56.18" "66.12" "51.15" "33.06" "24.29" "46.83" "28.09" "35.57"
 
 
44
  "A.X 4.0" "https://huggingface.co/skt/A.X-4.0" "" "SKT" "412.5" "412.5" "0.6553128957748413" "7.924791574478149" "57.95526130360478" "71.9" "Open" "Instruct" "Off" "41.59" "56.0" "43.75" "43.43" "42.56" "40.23" "15.7" "24.29" "53.97" "33.43" "32.21"
45
  "gpt-oss-20B (Reasoning: medium)" "https://huggingface.co/openai/gpt-oss-20b" "Reasoning: medium
46
  temperature: 1.0
@@ -53,6 +58,11 @@ top-p: 0.95" "Alibaba" "1147.0" "408.0" "45.23295979184195" "52.38741266727448"
53
  top-p: 0.95" "mistralai" "369.0" "369.0" "3.2450859546661377" "13.907460689544678" "36.382163796915904" "24.0" "Open" "Instruct" "Off" "39.09" "43.0" "44.69" "43.43" "51.65" "25.86" "22.31" "25.71" "51.98" "31.18" "30.2"
54
  "K2-Think" "https://huggingface.co/LLM360/K2-Think" "temperature: 1.0
55
  top-p: 0.95" "LLM360" "1835.0" "486.0" "24.29692639716904" "43.2994556427002" "42.72123101353567" "32.8" "Open" "Think" "On" "35.06" "35.5" "36.56" "56.18" "47.11" "35.06" "14.05" "12.86" "49.21" "21.63" "23.15"
 
 
 
 
 
56
  "EXAONE 4.0 32B (think)" "https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B" "temperature: 0.6
57
  top-p: 0.95" "Exaone" "1274.5" "503.0" "40.64476558326666" "52.11687910556793" "51.19312170664125" "32.0" "Open" "Hybrid" "On" "33.82" "34.25" "29.38" "56.97" "57.44" "24.71" "27.27" "17.14" "38.49" "18.54" "25.5"
58
  "Apriel 1.5 15B Thinker" "https://huggingface.co/ServiceNow-AI/Apriel-1.5-15b-Thinker" "temperature: 0.6
 
1
  "Model Name" "Link" "Comment" "Group" "Med. Len." "Med. Resp. Len." "Time to First Answer Token" "End-to-End Response Time" "Speed" "Parameter Size (B)" "Type" "Model Type" "Think" "Overall" "Content Generation" "Editing" "Data Analysis" "Reasoning" "Hallucination" "Safety" "Repetition" "Summarization" "Translation" "Multi-Turn"
2
  "GPT-5 (Reasoning: medium)" "https://platform.openai.com/docs/models/gpt-5" "Reasoning: medium" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "70.73" "71.0" "74.38" "76.49" "79.75" "64.94" "56.2" "82.86" "80.16" "69.38" "54.36"
3
  "o3-pro (Reasoning: medium)" "https://platform.openai.com/docs/models/o3-pro" "Reasoning: medium" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "66.47" "72.5" "70.31" "75.7" "83.88" "64.37" "33.88" "74.29" "65.48" "64.33" "48.32"
4
+ "GPT-5.1 (Reasoning: medium, verbosity: medium)" "https://platform.openai.com/docs/models/gpt-5.1" "Reasoning: medium, verbosity: medium" "GPT" "" "" "" "11.673096776008606" "" "" "Proprietary" "Think" "On" "64.57" "67.0" "70.0" "72.51" "82.64" "65.52" "52.07" "51.43" "67.06" "59.55" "45.64"
5
+ "Claude 4.5 Opus (think)" "https://www.anthropic.com/claude/opus" "" "Claude" "" "" "" "" "" "" "Proprietary" "Hybrid" "On" "63.41" "63.5" "62.5" "73.71" "77.69" "82.76" "52.89" "58.57" "63.49" "56.74" "45.97"
6
  "Claude 4 Opus (20250514) (think)" "https://www.anthropic.com/claude/opus" "version: 20250514" "Claude" "" "" "" "" "" "" "Proprietary" "Hybrid" "On" "63.29" "60.75" "59.69" "73.31" "69.83" "78.74" "53.72" "55.71" "65.48" "65.45" "48.99"
7
  "Claude 4.1 Opus (20250805) (think)" "https://www.anthropic.com/claude/opus" "version: 20250805" "Claude" "" "" "" "" "" "" "Proprietary" "Hybrid" "On" "63.24" "61.25" "60.0" "78.49" "72.73" "77.01" "56.2" "57.14" "61.9" "62.64" "46.98"
8
  "GPT-5 mini (Reasoning: medium)" "https://platform.openai.com/docs/models/gpt-5-mini" "Reasoning: medium" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "62.56" "68.0" "62.5" "74.9" "76.86" "55.17" "47.93" "44.29" "74.6" "56.18" "45.3"
9
+ "Gemini 3 Pro Preview (Thinking Level: High)" "" "" "Gemini" "1930.5" "378.0" "" "27.89457416534424" "" "" "Open" "Think" "On" "62.48" "59.5" "64.38" "76.49" "78.93" "70.69" "39.67" "65.71" "61.51" "58.15" "48.99"
10
  "Claude 4 Sonnet (20250514) (think)" "https://www.anthropic.com/claude/sonnet" "version: 20250514" "Claude" "" "" "" "" "" "" "Proprietary" "Hybrid" "On" "61.8" "58.0" "58.44" "76.49" "67.77" "79.31" "57.02" "44.29" "65.08" "62.92" "44.97"
11
  "o3" "https://platform.openai.com/docs/models/o3" "" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "60.91" "68.75" "60.0" "73.31" "79.34" "54.02" "34.71" "64.29" "60.71" "55.06" "46.98"
12
  "Gemini 2.5 Pro" "https://deepmind.google/models/gemini/pro/" "" "Gemini" "" "" "" "" "" "" "Proprietary" "Think" "On" "59.34" "54.0" "60.94" "78.88" "73.14" "63.22" "17.36" "52.86" "67.86" "53.93" "52.68"
 
44
  top-p: 0.95" "Qwen" "1113.0" "390.0" "27.26490248867746" "39.635579228401184" "37.74973909656839" "32.8" "Open" "Hybrid" "On" "44.44" "52.25" "41.56" "68.92" "66.53" "35.06" "19.83" "25.71" "46.43" "30.9" "32.89"
45
  "Qwen3 30B A3B Instruct 2507" "https://huggingface.co/Qwen/Qwen3-30B-A3B-Instruct-2507" "temperature: 0.7
46
  top-p: 0.8" "Qwen" "441.5" "441.5" "7.902002811431885" "19.310550212860107" "42.44958664990833" "30.0" "Open" "Instruct" "Off" "42.79" "45.0" "35.0" "56.18" "66.12" "51.15" "33.06" "24.29" "46.83" "28.09" "35.57"
47
+ "MiniMax-M2 (230B A10B)" "https://huggingface.co/MiniMaxAI/MiniMax-M2" "temperature:1.0
48
+ top-p: 0.95" "MiniMaxAI" "1142.0" "325.0" "" "" "" "230.0" "Open" "Think" "On" "42.43" "48.75" "35.62" "53.39" "57.02" "43.1" "44.63" "28.57" "49.21" "30.06" "31.21"
49
  "A.X 4.0" "https://huggingface.co/skt/A.X-4.0" "" "SKT" "412.5" "412.5" "0.6553128957748413" "7.924791574478149" "57.95526130360478" "71.9" "Open" "Instruct" "Off" "41.59" "56.0" "43.75" "43.43" "42.56" "40.23" "15.7" "24.29" "53.97" "33.43" "32.21"
50
  "gpt-oss-20B (Reasoning: medium)" "https://huggingface.co/openai/gpt-oss-20b" "Reasoning: medium
51
  temperature: 1.0
 
58
  top-p: 0.95" "mistralai" "369.0" "369.0" "3.2450859546661377" "13.907460689544678" "36.382163796915904" "24.0" "Open" "Instruct" "Off" "39.09" "43.0" "44.69" "43.43" "51.65" "25.86" "22.31" "25.71" "51.98" "31.18" "30.2"
59
  "K2-Think" "https://huggingface.co/LLM360/K2-Think" "temperature: 1.0
60
  top-p: 0.95" "LLM360" "1835.0" "486.0" "24.29692639716904" "43.2994556427002" "42.72123101353567" "32.8" "Open" "Think" "On" "35.06" "35.5" "36.56" "56.18" "47.11" "35.06" "14.05" "12.86" "49.21" "21.63" "23.15"
61
+ "KAT Dev 72B Exp" "https://huggingface.co/Kwaipilot/KAT-Dev-72B-Exp" "temperature:0.6
62
+ top-p: 0.95" "KAT" "397.0" "397.0" "0.0622165203094482" "8.492375493049622" "50.601864763867184" "72.0" "Open" "Instruct" "Off" "33.94" "29.25" "44.06" "46.22" "46.69" "25.86" "18.18" "20.0" "42.86" "25.56" "25.5"
63
+ "Olmo 3 32B Think" "https://huggingface.co/allenai/Olmo-3-32B-Think" "temperature: 1
64
+ top-p: 0.95
65
+ top-k: 50" "allenai" "3360.5" "473.0" "60.18788400716624" "77.51256728172302" "44.30514641537086" "32.0" "Open" "Think" "On" "33.94" "35.25" "30.94" "57.37" "66.53" "33.33" "28.93" "24.29" "34.52" "11.8" "19.8"
66
  "EXAONE 4.0 32B (think)" "https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B" "temperature: 0.6
67
  top-p: 0.95" "Exaone" "1274.5" "503.0" "40.64476558326666" "52.11687910556793" "51.19312170664125" "32.0" "Open" "Hybrid" "On" "33.82" "34.25" "29.38" "56.97" "57.44" "24.71" "27.27" "17.14" "38.49" "18.54" "25.5"
68
  "Apriel 1.5 15B Thinker" "https://huggingface.co/ServiceNow-AI/Apriel-1.5-15b-Thinker" "temperature: 0.6
src/data/open/stats_lang.csv CHANGED
@@ -1,9 +1,12 @@
1
  "Model Name" "Link" "Comment" "Group" "Med. Len." "Med. Resp. Len." "Time to First Answer Token" "End-to-End Response Time" "Speed" "Parameter Size (B)" "Type" "Model Type" "Think" "Overall" "KO" "EN" "JA" "ZH" "PL" "DE" "PT" "ES" "FR" "IT" "RU" "VI"
2
  "GPT-5 (Reasoning: medium)" "https://platform.openai.com/docs/models/gpt-5" "Reasoning: medium" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "70.73" "64.72" "65.83" "71.69" "67.68" "72.78" "71.27" "73.74" "75.68" "72.83" "77.05" "70.79" "75.61"
3
  "o3-pro (Reasoning: medium)" "https://platform.openai.com/docs/models/o3-pro" "Reasoning: medium" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "66.47" "63.61" "63.61" "69.28" "65.24" "63.89" "64.09" "68.16" "69.19" "70.11" "72.13" "62.36" "71.95"
 
 
4
  "Claude 4 Opus (20250514) (think)" "https://www.anthropic.com/claude/opus" "version: 20250514" "Claude" "" "" "" "" "" "" "Proprietary" "Hybrid" "On" "63.29" "57.5" "62.5" "64.46" "62.8" "59.44" "65.19" "65.92" "60.54" "65.22" "65.57" "65.17" "72.56"
5
  "Claude 4.1 Opus (20250805) (think)" "https://www.anthropic.com/claude/opus" "version: 20250805" "Claude" "" "" "" "" "" "" "Proprietary" "Hybrid" "On" "63.24" "58.33" "61.39" "60.84" "64.02" "61.67" "66.85" "68.16" "61.08" "65.76" "66.67" "65.73" "65.24"
6
  "GPT-5 mini (Reasoning: medium)" "https://platform.openai.com/docs/models/gpt-5-mini" "Reasoning: medium" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "62.56" "57.5" "56.39" "62.65" "62.2" "63.89" "60.22" "66.48" "67.03" "70.11" "67.76" "66.29" "60.98"
 
7
  "Claude 4 Sonnet (20250514) (think)" "https://www.anthropic.com/claude/sonnet" "version: 20250514" "Claude" "" "" "" "" "" "" "Proprietary" "Hybrid" "On" "61.8" "54.17" "59.17" "63.86" "64.63" "59.44" "61.33" "64.8" "62.16" "65.22" "67.21" "66.29" "64.02"
8
  "o3" "https://platform.openai.com/docs/models/o3" "" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "60.91" "57.5" "59.17" "61.45" "58.54" "61.11" "64.09" "60.89" "62.16" "63.59" "65.03" "54.49" "68.29"
9
  "Gemini 2.5 Pro" "https://deepmind.google/models/gemini/pro/" "" "Gemini" "" "" "" "" "" "" "Proprietary" "Think" "On" "59.34" "53.61" "57.78" "59.04" "57.93" "57.22" "56.91" "60.89" "63.24" "67.93" "62.3" "61.24" "60.98"
@@ -41,6 +44,8 @@ top-p: 0.95" "DeepSeek" "408.0" "408.0" "0.211452841758728" "23.47111320495605"
41
  top-p: 0.95" "Qwen" "1113.0" "390.0" "27.26490248867746" "39.635579228401184" "37.74973909656839" "32.8" "Open" "Hybrid" "On" "44.44" "38.89" "41.67" "48.8" "50.0" "38.33" "46.41" "44.69" "44.86" "44.57" "50.82" "46.07" "47.56"
42
  "Qwen3 30B A3B Instruct 2507" "https://huggingface.co/Qwen/Qwen3-30B-A3B-Instruct-2507" "temperature: 0.7
43
  top-p: 0.8" "Qwen" "441.5" "441.5" "7.902002811431885" "19.310550212860107" "42.44958664990833" "30.0" "Open" "Instruct" "Off" "42.79" "34.44" "43.89" "40.96" "48.78" "38.89" "41.99" "46.93" "44.32" "42.93" "48.09" "43.26" "46.95"
 
 
44
  "A.X 4.0" "https://huggingface.co/skt/A.X-4.0" "" "SKT" "412.5" "412.5" "0.6553128957748413" "7.924791574478149" "57.95526130360478" "71.9" "Open" "Instruct" "Off" "41.59" "38.89" "41.11" "43.98" "49.39" "36.11" "45.86" "43.58" "44.32" "39.67" "43.17" "39.89" "36.59"
45
  "gpt-oss-20B (Reasoning: medium)" "https://huggingface.co/openai/gpt-oss-20b" "Reasoning: medium
46
  temperature: 1.0
@@ -53,6 +58,11 @@ top-p: 0.95" "Alibaba" "1147.0" "408.0" "45.23295979184195" "52.38741266727448"
53
  top-p: 0.95" "mistralai" "369.0" "369.0" "3.2450859546661377" "13.907460689544678" "36.382163796915904" "24.0" "Open" "Instruct" "Off" "39.09" "31.39" "40.0" "36.75" "42.07" "34.44" "44.2" "41.9" "42.16" "45.65" "40.98" "37.64" "38.41"
54
  "K2-Think" "https://huggingface.co/LLM360/K2-Think" "temperature: 1.0
55
  top-p: 0.95" "LLM360" "1835.0" "486.0" "24.29692639716904" "43.2994556427002" "42.72123101353567" "32.8" "Open" "Think" "On" "35.06" "29.17" "36.11" "30.12" "44.51" "26.67" "33.15" "38.55" "37.84" "41.85" "37.7" "33.71" "36.59"
 
 
 
 
 
56
  "EXAONE 4.0 32B (think)" "https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B" "temperature: 0.6
57
  top-p: 0.95" "Exaone" "1274.5" "503.0" "40.64476558326666" "52.11687910556793" "51.19312170664125" "32.0" "Open" "Hybrid" "On" "33.82" "33.61" "38.33" "28.92" "35.98" "26.11" "35.91" "34.08" "38.92" "35.33" "33.88" "28.09" "31.71"
58
  "Apriel 1.5 15B Thinker" "https://huggingface.co/ServiceNow-AI/Apriel-1.5-15b-Thinker" "temperature: 0.6
 
1
  "Model Name" "Link" "Comment" "Group" "Med. Len." "Med. Resp. Len." "Time to First Answer Token" "End-to-End Response Time" "Speed" "Parameter Size (B)" "Type" "Model Type" "Think" "Overall" "KO" "EN" "JA" "ZH" "PL" "DE" "PT" "ES" "FR" "IT" "RU" "VI"
2
  "GPT-5 (Reasoning: medium)" "https://platform.openai.com/docs/models/gpt-5" "Reasoning: medium" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "70.73" "64.72" "65.83" "71.69" "67.68" "72.78" "71.27" "73.74" "75.68" "72.83" "77.05" "70.79" "75.61"
3
  "o3-pro (Reasoning: medium)" "https://platform.openai.com/docs/models/o3-pro" "Reasoning: medium" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "66.47" "63.61" "63.61" "69.28" "65.24" "63.89" "64.09" "68.16" "69.19" "70.11" "72.13" "62.36" "71.95"
4
+ "GPT-5.1 (Reasoning: medium, verbosity: medium)" "https://platform.openai.com/docs/models/gpt-5.1" "Reasoning: medium, verbosity: medium" "GPT" "" "" "" "11.673096776008606" "" "" "Proprietary" "Think" "On" "64.57" "57.78" "62.5" "65.06" "62.8" "65.56" "60.22" "65.36" "68.11" "74.46" "70.49" "67.42" "63.41"
5
+ "Claude 4.5 Opus (think)" "https://www.anthropic.com/claude/opus" "" "Claude" "" "" "" "" "" "" "Proprietary" "Hybrid" "On" "63.41" "59.44" "60.28" "66.27" "64.02" "66.67" "65.19" "63.69" "62.16" "63.59" "64.48" "65.73" "67.07"
6
  "Claude 4 Opus (20250514) (think)" "https://www.anthropic.com/claude/opus" "version: 20250514" "Claude" "" "" "" "" "" "" "Proprietary" "Hybrid" "On" "63.29" "57.5" "62.5" "64.46" "62.8" "59.44" "65.19" "65.92" "60.54" "65.22" "65.57" "65.17" "72.56"
7
  "Claude 4.1 Opus (20250805) (think)" "https://www.anthropic.com/claude/opus" "version: 20250805" "Claude" "" "" "" "" "" "" "Proprietary" "Hybrid" "On" "63.24" "58.33" "61.39" "60.84" "64.02" "61.67" "66.85" "68.16" "61.08" "65.76" "66.67" "65.73" "65.24"
8
  "GPT-5 mini (Reasoning: medium)" "https://platform.openai.com/docs/models/gpt-5-mini" "Reasoning: medium" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "62.56" "57.5" "56.39" "62.65" "62.2" "63.89" "60.22" "66.48" "67.03" "70.11" "67.76" "66.29" "60.98"
9
+ "Gemini 3 Pro Preview (Thinking Level: High)" "" "" "Gemini" "1930.5" "378.0" "" "27.89457416534424" "" "" "Open" "Think" "On" "62.48" "59.44" "60.56" "60.24" "62.2" "61.67" "65.19" "63.13" "64.32" "65.76" "65.57" "64.04" "62.2"
10
  "Claude 4 Sonnet (20250514) (think)" "https://www.anthropic.com/claude/sonnet" "version: 20250514" "Claude" "" "" "" "" "" "" "Proprietary" "Hybrid" "On" "61.8" "54.17" "59.17" "63.86" "64.63" "59.44" "61.33" "64.8" "62.16" "65.22" "67.21" "66.29" "64.02"
11
  "o3" "https://platform.openai.com/docs/models/o3" "" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "60.91" "57.5" "59.17" "61.45" "58.54" "61.11" "64.09" "60.89" "62.16" "63.59" "65.03" "54.49" "68.29"
12
  "Gemini 2.5 Pro" "https://deepmind.google/models/gemini/pro/" "" "Gemini" "" "" "" "" "" "" "Proprietary" "Think" "On" "59.34" "53.61" "57.78" "59.04" "57.93" "57.22" "56.91" "60.89" "63.24" "67.93" "62.3" "61.24" "60.98"
 
44
  top-p: 0.95" "Qwen" "1113.0" "390.0" "27.26490248867746" "39.635579228401184" "37.74973909656839" "32.8" "Open" "Hybrid" "On" "44.44" "38.89" "41.67" "48.8" "50.0" "38.33" "46.41" "44.69" "44.86" "44.57" "50.82" "46.07" "47.56"
45
  "Qwen3 30B A3B Instruct 2507" "https://huggingface.co/Qwen/Qwen3-30B-A3B-Instruct-2507" "temperature: 0.7
46
  top-p: 0.8" "Qwen" "441.5" "441.5" "7.902002811431885" "19.310550212860107" "42.44958664990833" "30.0" "Open" "Instruct" "Off" "42.79" "34.44" "43.89" "40.96" "48.78" "38.89" "41.99" "46.93" "44.32" "42.93" "48.09" "43.26" "46.95"
47
+ "MiniMax-M2 (230B A10B)" "https://huggingface.co/MiniMaxAI/MiniMax-M2" "temperature:1.0
48
+ top-p: 0.95" "MiniMaxAI" "1142.0" "325.0" "" "" "" "230.0" "Open" "Think" "On" "42.43" "31.94" "46.11" "37.35" "45.73" "38.33" "45.3" "45.25" "48.65" "41.3" "46.45" "42.7" "46.95"
49
  "A.X 4.0" "https://huggingface.co/skt/A.X-4.0" "" "SKT" "412.5" "412.5" "0.6553128957748413" "7.924791574478149" "57.95526130360478" "71.9" "Open" "Instruct" "Off" "41.59" "38.89" "41.11" "43.98" "49.39" "36.11" "45.86" "43.58" "44.32" "39.67" "43.17" "39.89" "36.59"
50
  "gpt-oss-20B (Reasoning: medium)" "https://huggingface.co/openai/gpt-oss-20b" "Reasoning: medium
51
  temperature: 1.0
 
58
  top-p: 0.95" "mistralai" "369.0" "369.0" "3.2450859546661377" "13.907460689544678" "36.382163796915904" "24.0" "Open" "Instruct" "Off" "39.09" "31.39" "40.0" "36.75" "42.07" "34.44" "44.2" "41.9" "42.16" "45.65" "40.98" "37.64" "38.41"
59
  "K2-Think" "https://huggingface.co/LLM360/K2-Think" "temperature: 1.0
60
  top-p: 0.95" "LLM360" "1835.0" "486.0" "24.29692639716904" "43.2994556427002" "42.72123101353567" "32.8" "Open" "Think" "On" "35.06" "29.17" "36.11" "30.12" "44.51" "26.67" "33.15" "38.55" "37.84" "41.85" "37.7" "33.71" "36.59"
61
+ "KAT Dev 72B Exp" "https://huggingface.co/Kwaipilot/KAT-Dev-72B-Exp" "temperature:0.6
62
+ top-p: 0.95" "KAT" "397.0" "397.0" "0.0622165203094482" "8.492375493049622" "50.601864763867184" "72.0" "Open" "Instruct" "Off" "33.94" "25.0" "32.22" "31.93" "37.2" "34.44" "33.15" "43.02" "37.84" "36.96" "37.7" "30.34" "38.41"
63
+ "Olmo 3 32B Think" "https://huggingface.co/allenai/Olmo-3-32B-Think" "temperature: 1
64
+ top-p: 0.95
65
+ top-k: 50" "allenai" "3360.5" "473.0" "60.18788400716624" "77.51256728172302" "44.30514641537086" "32.0" "Open" "Think" "On" "33.94" "30.56" "41.39" "30.12" "31.1" "25.0" "34.25" "35.75" "33.51" "36.41" "37.16" "31.46" "35.98"
66
  "EXAONE 4.0 32B (think)" "https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B" "temperature: 0.6
67
  top-p: 0.95" "Exaone" "1274.5" "503.0" "40.64476558326666" "52.11687910556793" "51.19312170664125" "32.0" "Open" "Hybrid" "On" "33.82" "33.61" "38.33" "28.92" "35.98" "26.11" "35.91" "34.08" "38.92" "35.33" "33.88" "28.09" "31.71"
68
  "Apriel 1.5 15B Thinker" "https://huggingface.co/ServiceNow-AI/Apriel-1.5-15b-Thinker" "temperature: 0.6
src/data/open/time_data.json CHANGED
@@ -1,4 +1,194 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "Claude 4.1 Opus (20250805) (think)": {
3
  "NUM_GPUS": 0,
4
  "Overall": {
@@ -1329,6 +1519,240 @@
1329
  }
1330
  }
1331
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1332
  "Solar Pro Preview (top_p:0.95, temp: 0.7)": {
1333
  "NUM_GPUS": 1,
1334
  "Overall": {
@@ -3039,37 +3463,227 @@
3039
  }
3040
  }
3041
  },
3042
- "GLM-4.5 FP8 (think)": {
3043
- "NUM_GPUS": 8,
3044
  "Overall": {
3045
  "Time to Answer": {
3046
- "Min": 0.11270952224731445,
3047
- "Max": 1084.7877391024863,
3048
- "Med": 25.261904125875603
3049
  },
3050
  "Latency": {
3051
- "Min": 2.203545331954956,
3052
- "Max": 2499.599281311035,
3053
- "Med": 62.74959444999695
3054
  },
3055
  "Speed": {
3056
- "Min": 19.420678190531984,
3057
- "Max": 38.97772164575481,
3058
- "Med": 23.293980879127712
3059
  }
3060
  },
3061
  "Content Generation": {
3062
  "Time to Answer": {
3063
- "Min": 3.702089722433614,
3064
- "Max": 278.2958468033817,
3065
- "Med": 21.031848154986903
3066
  },
3067
  "Latency": {
3068
- "Min": 14.542505025863647,
3069
- "Max": 357.45922803878784,
3070
- "Med": 52.71355986595154
3071
- },
3072
- "Speed": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3073
  "Min": 19.45536289987673,
3074
  "Max": 27.213499913336133,
3075
  "Med": 23.29757774645036
@@ -3419,105 +4033,295 @@
3419
  }
3420
  }
3421
  },
3422
- "gpt-oss-120B (Reasoning: medium)": {
3423
- "NUM_GPUS": 8,
3424
  "Overall": {
3425
  "Time to Answer": {
3426
  "Min": 0,
3427
- "Max": 101.66583281847353,
3428
- "Med": 7.694922740481965
3429
  },
3430
  "Latency": {
3431
  "Min": 0,
3432
- "Max": 108.71509218215942,
3433
- "Med": 12.121336698532104
3434
  },
3435
  "Speed": {
3436
  "Min": -1.0,
3437
- "Max": 295.4744570001622,
3438
- "Med": 103.31935460342275
3439
  }
3440
  },
3441
  "Content Generation": {
3442
  "Time to Answer": {
3443
- "Min": 1.4268165264489516,
3444
- "Max": 76.09343232158227,
3445
- "Med": 5.27987206336147
3446
  },
3447
  "Latency": {
3448
- "Min": 2.564422369003296,
3449
- "Max": 77.78296256065369,
3450
- "Med": 12.131241917610168
3451
  },
3452
  "Speed": {
3453
- "Min": 39.854399049254106,
3454
- "Max": 164.11560898062044,
3455
- "Med": 102.26319280893972
3456
  }
3457
  },
3458
  "Editing": {
3459
  "Time to Answer": {
3460
  "Min": 0,
3461
- "Max": 34.97314937730854,
3462
- "Med": 7.72154927398273
3463
  },
3464
  "Latency": {
3465
  "Min": 0,
3466
- "Max": 38.421292781829834,
3467
- "Med": 10.624043703079224
3468
  },
3469
  "Speed": {
3470
  "Min": -1.0,
3471
- "Max": 295.4744570001622,
3472
- "Med": 97.16836666526689
3473
  }
3474
  },
3475
  "Data Analysis": {
3476
  "Time to Answer": {
3477
- "Min": 1.5500787364112005,
3478
- "Max": 47.44580010794223,
3479
- "Med": 7.938084126425333
3480
  },
3481
  "Latency": {
3482
- "Min": 2.21130108833313,
3483
- "Max": 48.52851939201355,
3484
- "Med": 10.561246871948242
3485
  },
3486
  "Speed": {
3487
- "Min": 33.67196833472543,
3488
- "Max": 218.4742163852268,
3489
- "Med": 111.7668417486227
3490
  }
3491
  },
3492
  "Reasoning": {
3493
  "Time to Answer": {
3494
- "Min": 1.313997881221957,
3495
- "Max": 71.11774356968237,
3496
- "Med": 9.761283050834567
3497
  },
3498
  "Latency": {
3499
- "Min": 2.4778506755828857,
3500
- "Max": 77.51551747322083,
3501
- "Med": 14.6117924451828
3502
  },
3503
  "Speed": {
3504
- "Min": 52.34248336770566,
3505
- "Max": 162.3515087876845,
3506
- "Med": 121.07198234072999
3507
  }
3508
  },
3509
  "Hallucination": {
3510
  "Time to Answer": {
3511
- "Min": 2.2739883409985806,
3512
- "Max": 25.163233752980585,
3513
- "Med": 7.324965414201975
3514
  },
3515
  "Latency": {
3516
- "Min": 3.2160396575927734,
3517
- "Max": 41.42578959465027,
3518
- "Med": 13.071247458457947
3519
- },
3520
- "Speed": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3521
  "Min": 36.912669174553955,
3522
  "Max": 207.27878278068064,
3523
  "Med": 124.86484051787805
@@ -4369,7 +5173,7 @@
4369
  }
4370
  }
4371
  },
4372
- "Claude 4 Opus (20250514) (think)": {
4373
  "NUM_GPUS": 0,
4374
  "Overall": {
4375
  "Time to Answer": {
@@ -4559,7 +5363,7 @@
4559
  }
4560
  }
4561
  },
4562
- "Gemini 2.5 Pro": {
4563
  "NUM_GPUS": 0,
4564
  "Overall": {
4565
  "Time to Answer": {
@@ -4749,71 +5553,261 @@
4749
  }
4750
  }
4751
  },
4752
- "Tongyi DeepResearch 30B A3B": {
4753
- "NUM_GPUS": 4,
4754
  "Overall": {
4755
  "Time to Answer": {
4756
- "Min": 1.4505500793457031,
4757
- "Max": 244.41708384257143,
4758
- "Med": 45.23295979184195
4759
  },
4760
  "Latency": {
4761
- "Min": 9.191470384597778,
4762
- "Max": 749.16233086586,
4763
- "Med": 52.387412667274475
4764
  },
4765
  "Speed": {
4766
- "Min": 18.069738498345682,
4767
- "Max": 122.37478932044478,
4768
- "Med": 62.676624491545525
4769
  }
4770
  },
4771
  "Content Generation": {
4772
  "Time to Answer": {
4773
- "Min": 4.802471643031882,
4774
- "Max": 203.55154156596308,
4775
- "Med": 41.43268650270611
4776
  },
4777
  "Latency": {
4778
- "Min": 22.65742540359497,
4779
- "Max": 738.4437143802643,
4780
- "Med": 51.50689494609833
4781
  },
4782
  "Speed": {
4783
- "Min": 23.70791793357093,
4784
- "Max": 118.23891726695051,
4785
- "Med": 61.95675692618596
4786
  }
4787
  },
4788
  "Editing": {
4789
  "Time to Answer": {
4790
- "Min": 9.566574335098267,
4791
- "Max": 106.69052745386706,
4792
- "Med": 43.39687190468506
4793
  },
4794
  "Latency": {
4795
- "Min": 11.078340530395508,
4796
- "Max": 716.9541938304901,
4797
- "Med": 48.262219190597534
4798
  },
4799
  "Speed": {
4800
- "Min": 21.45800243159038,
4801
- "Max": 110.85390192747607,
4802
- "Med": 60.728454906690686
4803
  }
4804
  },
4805
  "Data Analysis": {
4806
  "Time to Answer": {
4807
- "Min": 11.874105177737047,
4808
- "Max": 231.70031813596688,
4809
- "Med": 45.77187383012706
4810
  },
4811
  "Latency": {
4812
- "Min": 22.660792589187622,
4813
- "Max": 716.5539243221283,
4814
- "Med": 51.057066202163696
4815
- },
4816
- "Speed": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4817
  "Min": 18.806653004549222,
4818
  "Max": 113.17314697322944,
4819
  "Med": 61.908515815844005
@@ -5509,387 +6503,621 @@
5509
  }
5510
  }
5511
  },
5512
- "gpt-oss-20B (Reasoning: medium)": {
5513
- "NUM_GPUS": 4,
5514
  "Overall": {
5515
  "Time to Answer": {
5516
- "Min": 2.0265204472169556,
5517
- "Max": 129.33762687935325,
5518
- "Med": 26.04652036871504
 
 
 
 
5519
  },
5520
  "Latency": {
5521
- "Min": 7.263976097106934,
5522
- "Max": 138.10640954971313,
5523
- "Med": 29.767700791358948
5524
  },
5525
  "Speed": {
5526
- "Min": 5.317348253806318,
5527
- "Max": 369.6802851223203,
5528
- "Med": 108.53633696847938
5529
  }
5530
  },
5531
  "Content Generation": {
5532
  "Time to Answer": {
5533
- "Min": 2.919738582967988,
5534
- "Max": 69.87850048414344,
5535
- "Med": 25.258961631303542
 
 
 
 
5536
  },
5537
  "Latency": {
5538
- "Min": 7.263976097106934,
5539
- "Max": 73.72067332267761,
5540
- "Med": 29.7125141620636
5541
  },
5542
  "Speed": {
5543
- "Min": 13.604860508942371,
5544
- "Max": 224.07024522186745,
5545
- "Med": 124.14591263963385
5546
  }
5547
  },
5548
  "Editing": {
5549
  "Time to Answer": {
5550
- "Min": 2.0265204472169556,
5551
- "Max": 49.25446497234138,
5552
- "Med": 25.30023380826225
 
 
 
 
5553
  },
5554
  "Latency": {
5555
- "Min": 7.319023847579956,
5556
- "Max": 56.10624122619629,
5557
- "Med": 27.072497606277466
5558
  },
5559
  "Speed": {
5560
- "Min": 12.140381961027476,
5561
- "Max": 250.484990902761,
5562
- "Med": 104.93834609385715
5563
  }
5564
  },
5565
  "Data Analysis": {
5566
  "Time to Answer": {
5567
- "Min": 3.9151465271321517,
5568
- "Max": 75.99197716704057,
5569
- "Med": 26.328562295325447
 
 
 
 
5570
  },
5571
  "Latency": {
5572
- "Min": 7.286376476287842,
5573
- "Max": 84.5227108001709,
5574
- "Med": 28.301609992980957
5575
  },
5576
  "Speed": {
5577
- "Min": 11.70440500661996,
5578
- "Max": 306.1703155318347,
5579
- "Med": 113.21800589706349
5580
  }
5581
  },
5582
  "Reasoning": {
5583
  "Time to Answer": {
5584
- "Min": 8.324975468895651,
5585
- "Max": 129.33762687935325,
5586
- "Med": 27.582642474460418
 
 
 
 
5587
  },
5588
  "Latency": {
5589
- "Min": 18.050434589385986,
5590
- "Max": 138.10640954971313,
5591
- "Med": 32.32542634010315
5592
  },
5593
  "Speed": {
5594
- "Min": 17.82180729362148,
5595
- "Max": 207.91746863187615,
5596
- "Med": 106.44936231341633
5597
  }
5598
  },
5599
  "Hallucination": {
5600
  "Time to Answer": {
5601
- "Min": 5.195440284614488,
5602
- "Max": 61.22046760794428,
5603
- "Med": 25.105323415675343
 
 
 
 
5604
  },
5605
  "Latency": {
5606
- "Min": 7.313647270202637,
5607
- "Max": 63.91348838806152,
5608
- "Med": 32.319284319877625
5609
  },
5610
  "Speed": {
5611
- "Min": 18.438031290688347,
5612
- "Max": 226.50662559152707,
5613
- "Med": 110.66899398987842
5614
  }
5615
  },
5616
  "Safety": {
5617
  "Time to Answer": {
5618
- "Min": 4.932410193462053,
5619
- "Max": 47.165975079516905,
5620
- "Med": 23.45146352177868
 
 
 
 
5621
  },
5622
  "Latency": {
5623
- "Min": 7.2830750942230225,
5624
- "Max": 53.09182548522949,
5625
- "Med": 24.52879786491394
5626
  },
5627
  "Speed": {
5628
- "Min": 5.317348253806318,
5629
- "Max": 250.7582211510182,
5630
- "Med": 90.91284402754488
5631
  }
5632
  },
5633
  "Repetition": {
5634
  "Time to Answer": {
5635
- "Min": 2.557051893849964,
5636
- "Max": 120.32689256267814,
5637
- "Med": 28.832852398544777
 
 
 
 
5638
  },
5639
  "Latency": {
5640
- "Min": 20.129476308822632,
5641
- "Max": 125.98315095901489,
5642
- "Med": 33.87077188491821
5643
  },
5644
  "Speed": {
5645
- "Min": 27.080736058951963,
5646
- "Max": 266.27309790215196,
5647
- "Med": 120.19876605631327
5648
  }
5649
  },
5650
  "Summarization": {
5651
  "Time to Answer": {
5652
- "Min": 2.4394841513682888,
5653
- "Max": 113.86724343465069,
5654
- "Med": 23.029374821644574
 
 
 
 
5655
  },
5656
  "Latency": {
5657
- "Min": 13.429885149002075,
5658
- "Max": 115.32083773612976,
5659
- "Med": 25.605836629867554
5660
  },
5661
  "Speed": {
5662
- "Min": 10.53590514256035,
5663
- "Max": 304.7862966713593,
5664
- "Med": 101.49503613110383
5665
  }
5666
  },
5667
  "Translation": {
5668
  "Time to Answer": {
5669
- "Min": 4.240170876932201,
5670
- "Max": 72.23964902074354,
5671
- "Med": 26.80100677708995
 
 
 
 
5672
  },
5673
  "Latency": {
5674
- "Min": 17.525670528411865,
5675
- "Max": 115.61775875091553,
5676
- "Med": 30.075977206230164
5677
  },
5678
  "Speed": {
5679
- "Min": 22.395176057735203,
5680
- "Max": 369.6802851223203,
5681
- "Med": 122.92398147980118
5682
  }
5683
  },
5684
  "Multi-Turn": {
5685
  "Time to Answer": {
5686
- "Min": 9.995788375397986,
5687
- "Max": 91.21509669950703,
5688
- "Med": 39.29442425858453
 
 
 
 
5689
  },
5690
  "Latency": {
5691
- "Min": 16.48517942428589,
5692
- "Max": 112.05223345756531,
5693
- "Med": 66.7337509393692
5694
  },
5695
  "Speed": {
5696
- "Min": 21.53344632125304,
5697
- "Max": 273.4009542241851,
5698
- "Med": 87.97283614240237
5699
  }
5700
  }
5701
  },
5702
- "o3-pro (Reasoning: medium)": {
5703
- "NUM_GPUS": 0,
5704
  "Overall": {
5705
  "Time to Answer": {
5706
  "Min": 0,
5707
- "Max": 0,
5708
- "Med": 0.0
5709
  },
5710
  "Latency": {
5711
  "Min": 0,
5712
- "Max": 0,
5713
- "Med": 0.0
5714
  },
5715
  "Speed": {
5716
- "Min": 0.0,
5717
- "Max": 0.0,
5718
- "Med": 0.0
5719
  }
5720
  },
5721
  "Content Generation": {
5722
  "Time to Answer": {
5723
- "Min": 0,
5724
- "Max": 0,
5725
- "Med": 0.0
5726
  },
5727
  "Latency": {
5728
- "Min": 0,
5729
- "Max": 0,
5730
- "Med": 0.0
5731
  },
5732
  "Speed": {
5733
- "Min": 0.0,
5734
- "Max": 0.0,
5735
- "Med": 0.0
5736
  }
5737
  },
5738
  "Editing": {
5739
  "Time to Answer": {
5740
- "Min": 0,
5741
- "Max": 0,
5742
- "Med": 0.0
5743
  },
5744
  "Latency": {
5745
- "Min": 0,
5746
- "Max": 0,
5747
- "Med": 0.0
5748
  },
5749
  "Speed": {
5750
- "Min": 0.0,
5751
- "Max": 0.0,
5752
- "Med": 0.0
5753
  }
5754
  },
5755
  "Data Analysis": {
5756
  "Time to Answer": {
5757
- "Min": 0,
5758
- "Max": 0,
5759
- "Med": 0.0
5760
  },
5761
  "Latency": {
5762
- "Min": 0,
5763
- "Max": 0,
5764
- "Med": 0.0
5765
  },
5766
  "Speed": {
5767
- "Min": 0.0,
5768
- "Max": 0.0,
5769
- "Med": 0.0
5770
  }
5771
  },
5772
  "Reasoning": {
5773
  "Time to Answer": {
5774
- "Min": 0,
5775
- "Max": 0,
5776
- "Med": 0.0
5777
  },
5778
  "Latency": {
5779
- "Min": 0,
5780
- "Max": 0,
5781
- "Med": 0.0
5782
  },
5783
  "Speed": {
5784
- "Min": 0.0,
5785
- "Max": 0.0,
5786
- "Med": 0.0
5787
  }
5788
  },
5789
  "Hallucination": {
5790
  "Time to Answer": {
5791
- "Min": 0,
5792
- "Max": 0,
5793
- "Med": 0.0
5794
  },
5795
  "Latency": {
5796
- "Min": 0,
5797
- "Max": 0,
5798
- "Med": 0.0
5799
  },
5800
  "Speed": {
5801
- "Min": 0.0,
5802
- "Max": 0.0,
5803
- "Med": 0.0
5804
  }
5805
  },
5806
  "Safety": {
5807
  "Time to Answer": {
5808
- "Min": 0,
5809
- "Max": 0,
5810
- "Med": 0.0
5811
  },
5812
  "Latency": {
5813
- "Min": 0,
5814
- "Max": 0,
5815
- "Med": 0.0
5816
  },
5817
  "Speed": {
5818
- "Min": 0.0,
5819
- "Max": 0.0,
5820
- "Med": 0.0
5821
  }
5822
  },
5823
  "Repetition": {
5824
  "Time to Answer": {
5825
- "Min": 0,
5826
- "Max": 0,
5827
- "Med": 0.0
5828
  },
5829
  "Latency": {
5830
- "Min": 0,
5831
- "Max": 0,
5832
- "Med": 0.0
5833
  },
5834
  "Speed": {
5835
- "Min": 0.0,
5836
- "Max": 0.0,
5837
- "Med": 0.0
5838
  }
5839
  },
5840
  "Summarization": {
5841
  "Time to Answer": {
5842
- "Min": 0,
5843
- "Max": 0,
5844
- "Med": 0.0
5845
  },
5846
  "Latency": {
5847
- "Min": 0,
5848
- "Max": 0,
5849
- "Med": 0.0
5850
  },
5851
  "Speed": {
5852
- "Min": 0.0,
5853
- "Max": 0.0,
5854
- "Med": 0.0
5855
  }
5856
  },
5857
  "Translation": {
5858
  "Time to Answer": {
5859
- "Min": 0,
5860
- "Max": 0,
5861
- "Med": 0.0
5862
  },
5863
  "Latency": {
5864
- "Min": 0,
5865
- "Max": 0,
5866
- "Med": 0.0
5867
  },
5868
  "Speed": {
5869
- "Min": 0.0,
5870
- "Max": 0.0,
5871
- "Med": 0.0
5872
  }
5873
  },
5874
  "Multi-Turn": {
5875
  "Time to Answer": {
5876
  "Min": 0,
5877
- "Max": 0,
5878
- "Med": 0.0
5879
  },
5880
  "Latency": {
5881
  "Min": 0,
5882
- "Max": 0,
5883
- "Med": 0.0
5884
  },
5885
  "Speed": {
5886
- "Min": 0.0,
5887
- "Max": 0.0,
5888
- "Med": 0.0
5889
  }
5890
  }
5891
  },
5892
- "Grok-4": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5893
  "NUM_GPUS": 0,
5894
  "Overall": {
5895
  "Time to Answer": {
 
1
  {
2
+ "Olmo 3 32B Think": {
3
+ "NUM_GPUS": 4,
4
+ "Overall": {
5
+ "Time to Answer": {
6
+ "Min": 0.04750800132751465,
7
+ "Max": 662.4548862211922,
8
+ "Med": 60.18788400716624
9
+ },
10
+ "Latency": {
11
+ "Min": 4.962059259414673,
12
+ "Max": 1685.2101354599,
13
+ "Med": 77.51256728172302
14
+ },
15
+ "Speed": {
16
+ "Min": 27.866160798473338,
17
+ "Max": 61.32207413470597,
18
+ "Med": 44.30514641537086
19
+ }
20
+ },
21
+ "Content Generation": {
22
+ "Time to Answer": {
23
+ "Min": 9.247790738980477,
24
+ "Max": 535.0928019830272,
25
+ "Med": 54.36686619573619
26
+ },
27
+ "Latency": {
28
+ "Min": 15.104989528656006,
29
+ "Max": 1151.913678407669,
30
+ "Med": 73.72976446151733
31
+ },
32
+ "Speed": {
33
+ "Min": 28.739949330273706,
34
+ "Max": 56.69105335090586,
35
+ "Med": 44.640501961119014
36
+ }
37
+ },
38
+ "Editing": {
39
+ "Time to Answer": {
40
+ "Min": 9.188008042039543,
41
+ "Max": 332.63859022997735,
42
+ "Med": 53.86499203972291
43
+ },
44
+ "Latency": {
45
+ "Min": 12.425836563110352,
46
+ "Max": 373.80425238609314,
47
+ "Med": 64.4289436340332
48
+ },
49
+ "Speed": {
50
+ "Min": 29.70057087539234,
51
+ "Max": 56.003630745295354,
52
+ "Med": 44.42532373669283
53
+ }
54
+ },
55
+ "Data Analysis": {
56
+ "Time to Answer": {
57
+ "Min": 6.513707979240609,
58
+ "Max": 662.4548862211922,
59
+ "Med": 50.387367917383386
60
+ },
61
+ "Latency": {
62
+ "Min": 8.310109853744507,
63
+ "Max": 688.9015896320343,
64
+ "Med": 60.394060373306274
65
+ },
66
+ "Speed": {
67
+ "Min": 27.866160798473338,
68
+ "Max": 55.563448472039894,
69
+ "Med": 44.31595511727765
70
+ }
71
+ },
72
+ "Reasoning": {
73
+ "Time to Answer": {
74
+ "Min": 11.777561432543752,
75
+ "Max": 650.9476703300404,
76
+ "Med": 77.94728694034356
77
+ },
78
+ "Latency": {
79
+ "Min": 16.024362087249756,
80
+ "Max": 668.085782289505,
81
+ "Med": 88.89124500751495
82
+ },
83
+ "Speed": {
84
+ "Min": 31.092492474628955,
85
+ "Max": 50.6252779439028,
86
+ "Med": 44.27561038703696
87
+ }
88
+ },
89
+ "Hallucination": {
90
+ "Time to Answer": {
91
+ "Min": 0.04750800132751465,
92
+ "Max": 244.72053700062895,
93
+ "Med": 31.597100689212525
94
+ },
95
+ "Latency": {
96
+ "Min": 5.460567951202393,
97
+ "Max": 327.6873710155487,
98
+ "Med": 55.2690349817276
99
+ },
100
+ "Speed": {
101
+ "Min": 30.190577882159456,
102
+ "Max": 51.843879801237385,
103
+ "Med": 44.90390378879441
104
+ }
105
+ },
106
+ "Safety": {
107
+ "Time to Answer": {
108
+ "Min": 4.24024046375638,
109
+ "Max": 170.15584615909734,
110
+ "Med": 21.908013919514374
111
+ },
112
+ "Latency": {
113
+ "Min": 4.962059259414673,
114
+ "Max": 181.9493372440338,
115
+ "Med": 47.42558240890503
116
+ },
117
+ "Speed": {
118
+ "Min": 29.473423162196262,
119
+ "Max": 49.371541840187,
120
+ "Med": 43.69696811113183
121
+ }
122
+ },
123
+ "Repetition": {
124
+ "Time to Answer": {
125
+ "Min": 22.753700505019047,
126
+ "Max": 561.4907359476722,
127
+ "Med": 99.96764908013014
128
+ },
129
+ "Latency": {
130
+ "Min": 29.67628502845764,
131
+ "Max": 1685.2101354599,
132
+ "Med": 124.29333961009979
133
+ },
134
+ "Speed": {
135
+ "Min": 29.66526343233663,
136
+ "Max": 51.682686076605144,
137
+ "Med": 43.84949112639627
138
+ }
139
+ },
140
+ "Summarization": {
141
+ "Time to Answer": {
142
+ "Min": 6.697763084475674,
143
+ "Max": 486.0299537912613,
144
+ "Med": 42.976535539723244
145
+ },
146
+ "Latency": {
147
+ "Min": 8.786500215530396,
148
+ "Max": 489.2409255504608,
149
+ "Med": 51.56357514858246
150
+ },
151
+ "Speed": {
152
+ "Min": 29.2644856978122,
153
+ "Max": 48.69414303312388,
154
+ "Med": 43.39629490720476
155
+ }
156
+ },
157
+ "Translation": {
158
+ "Time to Answer": {
159
+ "Min": 15.65578042784481,
160
+ "Max": 361.4257761741054,
161
+ "Med": 94.50096548687068
162
+ },
163
+ "Latency": {
164
+ "Min": 18.458808422088623,
165
+ "Max": 368.57612133026123,
166
+ "Med": 104.97938454151154
167
+ },
168
+ "Speed": {
169
+ "Min": 29.43716166031538,
170
+ "Max": 53.272603690387285,
171
+ "Med": 43.679275761958166
172
+ }
173
+ },
174
+ "Multi-Turn": {
175
+ "Time to Answer": {
176
+ "Min": 16.31281149502611,
177
+ "Max": 455.0286171197091,
178
+ "Med": 98.93747010243024
179
+ },
180
+ "Latency": {
181
+ "Min": 22.590834856033325,
182
+ "Max": 506.03700613975525,
183
+ "Med": 158.81773710250854
184
+ },
185
+ "Speed": {
186
+ "Min": 36.85509319068589,
187
+ "Max": 61.32207413470597,
188
+ "Med": 44.533473375170736
189
+ }
190
+ }
191
+ },
192
  "Claude 4.1 Opus (20250805) (think)": {
193
  "NUM_GPUS": 0,
194
  "Overall": {
 
1519
  }
1520
  }
1521
  },
1522
+ "Gemini 3 Pro Preview (Thinking Level: High)": {
1523
+ "NUM_GPUS": -1,
1524
+ "Overall": {
1525
+ "Time to Answer": {
1526
+ "Min": [
1527
+ 0
1528
+ ],
1529
+ "Max": [
1530
+ 0
1531
+ ],
1532
+ "Med": 0.0
1533
+ },
1534
+ "Latency": {
1535
+ "Min": 0,
1536
+ "Max": 169.1725790500641,
1537
+ "Med": 27.89457416534424
1538
+ },
1539
+ "Speed": {
1540
+ "Min": 0,
1541
+ "Max": 0,
1542
+ "Med": 0.0
1543
+ }
1544
+ },
1545
+ "Content Generation": {
1546
+ "Time to Answer": {
1547
+ "Min": [
1548
+ 0
1549
+ ],
1550
+ "Max": [
1551
+ 0
1552
+ ],
1553
+ "Med": 0.0
1554
+ },
1555
+ "Latency": {
1556
+ "Min": 0,
1557
+ "Max": 168.15567064285278,
1558
+ "Med": 30.950587153434753
1559
+ },
1560
+ "Speed": {
1561
+ "Min": 0,
1562
+ "Max": 0,
1563
+ "Med": 0.0
1564
+ }
1565
+ },
1566
+ "Editing": {
1567
+ "Time to Answer": {
1568
+ "Min": [
1569
+ 0
1570
+ ],
1571
+ "Max": [
1572
+ 0
1573
+ ],
1574
+ "Med": 0.0
1575
+ },
1576
+ "Latency": {
1577
+ "Min": 5.864927530288696,
1578
+ "Max": 109.41859698295593,
1579
+ "Med": 23.469240069389343
1580
+ },
1581
+ "Speed": {
1582
+ "Min": 0,
1583
+ "Max": 0,
1584
+ "Med": 0.0
1585
+ }
1586
+ },
1587
+ "Data Analysis": {
1588
+ "Time to Answer": {
1589
+ "Min": [
1590
+ 0
1591
+ ],
1592
+ "Max": [
1593
+ 0
1594
+ ],
1595
+ "Med": 0.0
1596
+ },
1597
+ "Latency": {
1598
+ "Min": 6.848255395889282,
1599
+ "Max": 151.04712963104248,
1600
+ "Med": 20.09416127204895
1601
+ },
1602
+ "Speed": {
1603
+ "Min": 0,
1604
+ "Max": 0,
1605
+ "Med": 0.0
1606
+ }
1607
+ },
1608
+ "Reasoning": {
1609
+ "Time to Answer": {
1610
+ "Min": [
1611
+ 0
1612
+ ],
1613
+ "Max": [
1614
+ 0
1615
+ ],
1616
+ "Med": 0.0
1617
+ },
1618
+ "Latency": {
1619
+ "Min": 0,
1620
+ "Max": 165.32855772972107,
1621
+ "Med": 26.79689347743988
1622
+ },
1623
+ "Speed": {
1624
+ "Min": 0,
1625
+ "Max": 0,
1626
+ "Med": 0.0
1627
+ }
1628
+ },
1629
+ "Hallucination": {
1630
+ "Time to Answer": {
1631
+ "Min": [
1632
+ 0
1633
+ ],
1634
+ "Max": [
1635
+ 0
1636
+ ],
1637
+ "Med": 0.0
1638
+ },
1639
+ "Latency": {
1640
+ "Min": 9.33104419708252,
1641
+ "Max": 90.23524713516235,
1642
+ "Med": 27.72087299823761
1643
+ },
1644
+ "Speed": {
1645
+ "Min": 0,
1646
+ "Max": 0,
1647
+ "Med": 0.0
1648
+ }
1649
+ },
1650
+ "Safety": {
1651
+ "Time to Answer": {
1652
+ "Min": [
1653
+ 0
1654
+ ],
1655
+ "Max": [
1656
+ 0
1657
+ ],
1658
+ "Med": 0.0
1659
+ },
1660
+ "Latency": {
1661
+ "Min": 13.609748363494873,
1662
+ "Max": 98.26702857017517,
1663
+ "Med": 28.671757698059082
1664
+ },
1665
+ "Speed": {
1666
+ "Min": 0,
1667
+ "Max": 0,
1668
+ "Med": 0.0
1669
+ }
1670
+ },
1671
+ "Repetition": {
1672
+ "Time to Answer": {
1673
+ "Min": [
1674
+ 0
1675
+ ],
1676
+ "Max": [
1677
+ 0
1678
+ ],
1679
+ "Med": 0.0
1680
+ },
1681
+ "Latency": {
1682
+ "Min": 9.140820264816284,
1683
+ "Max": 76.10930681228638,
1684
+ "Med": 25.359631299972534
1685
+ },
1686
+ "Speed": {
1687
+ "Min": 0,
1688
+ "Max": 0,
1689
+ "Med": 0.0
1690
+ }
1691
+ },
1692
+ "Summarization": {
1693
+ "Time to Answer": {
1694
+ "Min": [
1695
+ 0
1696
+ ],
1697
+ "Max": [
1698
+ 0
1699
+ ],
1700
+ "Med": 0.0
1701
+ },
1702
+ "Latency": {
1703
+ "Min": 8.343881130218506,
1704
+ "Max": 52.00087642669678,
1705
+ "Med": 18.741631627082825
1706
+ },
1707
+ "Speed": {
1708
+ "Min": 0,
1709
+ "Max": 0,
1710
+ "Med": 0.0
1711
+ }
1712
+ },
1713
+ "Translation": {
1714
+ "Time to Answer": {
1715
+ "Min": [
1716
+ 0
1717
+ ],
1718
+ "Max": [
1719
+ 0
1720
+ ],
1721
+ "Med": 0.0
1722
+ },
1723
+ "Latency": {
1724
+ "Min": 12.577407121658325,
1725
+ "Max": 103.21936011314392,
1726
+ "Med": 30.767643094062805
1727
+ },
1728
+ "Speed": {
1729
+ "Min": 0,
1730
+ "Max": 0,
1731
+ "Med": 0.0
1732
+ }
1733
+ },
1734
+ "Multi-Turn": {
1735
+ "Time to Answer": {
1736
+ "Min": [
1737
+ 0
1738
+ ],
1739
+ "Max": [
1740
+ 0
1741
+ ],
1742
+ "Med": 0.0
1743
+ },
1744
+ "Latency": {
1745
+ "Min": 12.984463930130005,
1746
+ "Max": 169.1725790500641,
1747
+ "Med": 65.30046927928925
1748
+ },
1749
+ "Speed": {
1750
+ "Min": 0,
1751
+ "Max": 0,
1752
+ "Med": 0.0
1753
+ }
1754
+ }
1755
+ },
1756
  "Solar Pro Preview (top_p:0.95, temp: 0.7)": {
1757
  "NUM_GPUS": 1,
1758
  "Overall": {
 
3463
  }
3464
  }
3465
  },
3466
+ "Claude 4.5 Opus (think)": {
3467
+ "NUM_GPUS": 0,
3468
  "Overall": {
3469
  "Time to Answer": {
3470
+ "Min": 0,
3471
+ "Max": 0,
3472
+ "Med": 0.0
3473
  },
3474
  "Latency": {
3475
+ "Min": 0,
3476
+ "Max": 0,
3477
+ "Med": 0.0
3478
  },
3479
  "Speed": {
3480
+ "Min": -1.0,
3481
+ "Max": -1.0,
3482
+ "Med": -1.0
3483
  }
3484
  },
3485
  "Content Generation": {
3486
  "Time to Answer": {
3487
+ "Min": 0,
3488
+ "Max": 0,
3489
+ "Med": 0.0
3490
  },
3491
  "Latency": {
3492
+ "Min": 0,
3493
+ "Max": 0,
3494
+ "Med": 0.0
3495
+ },
3496
+ "Speed": {
3497
+ "Min": -1.0,
3498
+ "Max": -1.0,
3499
+ "Med": -1.0
3500
+ }
3501
+ },
3502
+ "Editing": {
3503
+ "Time to Answer": {
3504
+ "Min": 0,
3505
+ "Max": 0,
3506
+ "Med": 0.0
3507
+ },
3508
+ "Latency": {
3509
+ "Min": 0,
3510
+ "Max": 0,
3511
+ "Med": 0.0
3512
+ },
3513
+ "Speed": {
3514
+ "Min": -1.0,
3515
+ "Max": -1.0,
3516
+ "Med": -1.0
3517
+ }
3518
+ },
3519
+ "Data Analysis": {
3520
+ "Time to Answer": {
3521
+ "Min": 0,
3522
+ "Max": 0,
3523
+ "Med": 0.0
3524
+ },
3525
+ "Latency": {
3526
+ "Min": 0,
3527
+ "Max": 0,
3528
+ "Med": 0.0
3529
+ },
3530
+ "Speed": {
3531
+ "Min": -1.0,
3532
+ "Max": -1.0,
3533
+ "Med": -1.0
3534
+ }
3535
+ },
3536
+ "Reasoning": {
3537
+ "Time to Answer": {
3538
+ "Min": 0,
3539
+ "Max": 0,
3540
+ "Med": 0.0
3541
+ },
3542
+ "Latency": {
3543
+ "Min": 0,
3544
+ "Max": 0,
3545
+ "Med": 0.0
3546
+ },
3547
+ "Speed": {
3548
+ "Min": -1.0,
3549
+ "Max": -1.0,
3550
+ "Med": -1.0
3551
+ }
3552
+ },
3553
+ "Hallucination": {
3554
+ "Time to Answer": {
3555
+ "Min": 0,
3556
+ "Max": 0,
3557
+ "Med": 0.0
3558
+ },
3559
+ "Latency": {
3560
+ "Min": 0,
3561
+ "Max": 0,
3562
+ "Med": 0.0
3563
+ },
3564
+ "Speed": {
3565
+ "Min": -1.0,
3566
+ "Max": -1.0,
3567
+ "Med": -1.0
3568
+ }
3569
+ },
3570
+ "Safety": {
3571
+ "Time to Answer": {
3572
+ "Min": 0,
3573
+ "Max": 0,
3574
+ "Med": 0.0
3575
+ },
3576
+ "Latency": {
3577
+ "Min": 0,
3578
+ "Max": 0,
3579
+ "Med": 0.0
3580
+ },
3581
+ "Speed": {
3582
+ "Min": -1.0,
3583
+ "Max": -1.0,
3584
+ "Med": -1.0
3585
+ }
3586
+ },
3587
+ "Repetition": {
3588
+ "Time to Answer": {
3589
+ "Min": 0,
3590
+ "Max": 0,
3591
+ "Med": 0.0
3592
+ },
3593
+ "Latency": {
3594
+ "Min": 0,
3595
+ "Max": 0,
3596
+ "Med": 0.0
3597
+ },
3598
+ "Speed": {
3599
+ "Min": -1.0,
3600
+ "Max": -1.0,
3601
+ "Med": -1.0
3602
+ }
3603
+ },
3604
+ "Summarization": {
3605
+ "Time to Answer": {
3606
+ "Min": 0,
3607
+ "Max": 0,
3608
+ "Med": 0.0
3609
+ },
3610
+ "Latency": {
3611
+ "Min": 0,
3612
+ "Max": 0,
3613
+ "Med": 0.0
3614
+ },
3615
+ "Speed": {
3616
+ "Min": -1.0,
3617
+ "Max": -1.0,
3618
+ "Med": -1.0
3619
+ }
3620
+ },
3621
+ "Translation": {
3622
+ "Time to Answer": {
3623
+ "Min": 0,
3624
+ "Max": 0,
3625
+ "Med": 0.0
3626
+ },
3627
+ "Latency": {
3628
+ "Min": 0,
3629
+ "Max": 0,
3630
+ "Med": 0.0
3631
+ },
3632
+ "Speed": {
3633
+ "Min": -1.0,
3634
+ "Max": -1.0,
3635
+ "Med": -1.0
3636
+ }
3637
+ },
3638
+ "Multi-Turn": {
3639
+ "Time to Answer": {
3640
+ "Min": 0,
3641
+ "Max": 0,
3642
+ "Med": 0.0
3643
+ },
3644
+ "Latency": {
3645
+ "Min": 0,
3646
+ "Max": 0,
3647
+ "Med": 0.0
3648
+ },
3649
+ "Speed": {
3650
+ "Min": -1.0,
3651
+ "Max": -1.0,
3652
+ "Med": -1.0
3653
+ }
3654
+ }
3655
+ },
3656
+ "GLM-4.5 FP8 (think)": {
3657
+ "NUM_GPUS": 8,
3658
+ "Overall": {
3659
+ "Time to Answer": {
3660
+ "Min": 0.11270952224731445,
3661
+ "Max": 1084.7877391024863,
3662
+ "Med": 25.261904125875603
3663
+ },
3664
+ "Latency": {
3665
+ "Min": 2.203545331954956,
3666
+ "Max": 2499.599281311035,
3667
+ "Med": 62.74959444999695
3668
+ },
3669
+ "Speed": {
3670
+ "Min": 19.420678190531984,
3671
+ "Max": 38.97772164575481,
3672
+ "Med": 23.293980879127712
3673
+ }
3674
+ },
3675
+ "Content Generation": {
3676
+ "Time to Answer": {
3677
+ "Min": 3.702089722433614,
3678
+ "Max": 278.2958468033817,
3679
+ "Med": 21.031848154986903
3680
+ },
3681
+ "Latency": {
3682
+ "Min": 14.542505025863647,
3683
+ "Max": 357.45922803878784,
3684
+ "Med": 52.71355986595154
3685
+ },
3686
+ "Speed": {
3687
  "Min": 19.45536289987673,
3688
  "Max": 27.213499913336133,
3689
  "Med": 23.29757774645036
 
4033
  }
4034
  }
4035
  },
4036
+ "MiniMax-M2 (230B A10B)": {
4037
+ "NUM_GPUS": -1,
4038
  "Overall": {
4039
  "Time to Answer": {
4040
  "Min": 0,
4041
+ "Max": 0,
4042
+ "Med": 0.0
4043
  },
4044
  "Latency": {
4045
  "Min": 0,
4046
+ "Max": 0,
4047
+ "Med": 0.0
4048
  },
4049
  "Speed": {
4050
  "Min": -1.0,
4051
+ "Max": -1.0,
4052
+ "Med": -1.0
4053
  }
4054
  },
4055
  "Content Generation": {
4056
  "Time to Answer": {
4057
+ "Min": 0,
4058
+ "Max": 0,
4059
+ "Med": 0.0
4060
  },
4061
  "Latency": {
4062
+ "Min": 0,
4063
+ "Max": 0,
4064
+ "Med": 0.0
4065
  },
4066
  "Speed": {
4067
+ "Min": -1.0,
4068
+ "Max": -1.0,
4069
+ "Med": -1.0
4070
  }
4071
  },
4072
  "Editing": {
4073
  "Time to Answer": {
4074
  "Min": 0,
4075
+ "Max": 0,
4076
+ "Med": 0.0
4077
  },
4078
  "Latency": {
4079
  "Min": 0,
4080
+ "Max": 0,
4081
+ "Med": 0.0
4082
  },
4083
  "Speed": {
4084
  "Min": -1.0,
4085
+ "Max": -1.0,
4086
+ "Med": -1.0
4087
  }
4088
  },
4089
  "Data Analysis": {
4090
  "Time to Answer": {
4091
+ "Min": 0,
4092
+ "Max": 0,
4093
+ "Med": 0.0
4094
  },
4095
  "Latency": {
4096
+ "Min": 0,
4097
+ "Max": 0,
4098
+ "Med": 0.0
4099
  },
4100
  "Speed": {
4101
+ "Min": -1.0,
4102
+ "Max": -1.0,
4103
+ "Med": -1.0
4104
  }
4105
  },
4106
  "Reasoning": {
4107
  "Time to Answer": {
4108
+ "Min": 0,
4109
+ "Max": 0,
4110
+ "Med": 0.0
4111
  },
4112
  "Latency": {
4113
+ "Min": 0,
4114
+ "Max": 0,
4115
+ "Med": 0.0
4116
  },
4117
  "Speed": {
4118
+ "Min": -1.0,
4119
+ "Max": -1.0,
4120
+ "Med": -1.0
4121
  }
4122
  },
4123
  "Hallucination": {
4124
  "Time to Answer": {
4125
+ "Min": 0,
4126
+ "Max": 0,
4127
+ "Med": 0.0
4128
  },
4129
  "Latency": {
4130
+ "Min": 0,
4131
+ "Max": 0,
4132
+ "Med": 0.0
4133
+ },
4134
+ "Speed": {
4135
+ "Min": -1.0,
4136
+ "Max": -1.0,
4137
+ "Med": -1.0
4138
+ }
4139
+ },
4140
+ "Safety": {
4141
+ "Time to Answer": {
4142
+ "Min": 0,
4143
+ "Max": 0,
4144
+ "Med": 0.0
4145
+ },
4146
+ "Latency": {
4147
+ "Min": 0,
4148
+ "Max": 0,
4149
+ "Med": 0.0
4150
+ },
4151
+ "Speed": {
4152
+ "Min": -1.0,
4153
+ "Max": -1.0,
4154
+ "Med": -1.0
4155
+ }
4156
+ },
4157
+ "Repetition": {
4158
+ "Time to Answer": {
4159
+ "Min": 0,
4160
+ "Max": 0,
4161
+ "Med": 0.0
4162
+ },
4163
+ "Latency": {
4164
+ "Min": 0,
4165
+ "Max": 0,
4166
+ "Med": 0.0
4167
+ },
4168
+ "Speed": {
4169
+ "Min": -1.0,
4170
+ "Max": -1.0,
4171
+ "Med": -1.0
4172
+ }
4173
+ },
4174
+ "Summarization": {
4175
+ "Time to Answer": {
4176
+ "Min": 0,
4177
+ "Max": 0,
4178
+ "Med": 0.0
4179
+ },
4180
+ "Latency": {
4181
+ "Min": 0,
4182
+ "Max": 0,
4183
+ "Med": 0.0
4184
+ },
4185
+ "Speed": {
4186
+ "Min": -1.0,
4187
+ "Max": -1.0,
4188
+ "Med": -1.0
4189
+ }
4190
+ },
4191
+ "Translation": {
4192
+ "Time to Answer": {
4193
+ "Min": 0,
4194
+ "Max": 0,
4195
+ "Med": 0.0
4196
+ },
4197
+ "Latency": {
4198
+ "Min": 0,
4199
+ "Max": 0,
4200
+ "Med": 0.0
4201
+ },
4202
+ "Speed": {
4203
+ "Min": -1.0,
4204
+ "Max": -1.0,
4205
+ "Med": -1.0
4206
+ }
4207
+ },
4208
+ "Multi-Turn": {
4209
+ "Time to Answer": {
4210
+ "Min": 0,
4211
+ "Max": 0,
4212
+ "Med": 0.0
4213
+ },
4214
+ "Latency": {
4215
+ "Min": 0,
4216
+ "Max": 0,
4217
+ "Med": 0.0
4218
+ },
4219
+ "Speed": {
4220
+ "Min": -1.0,
4221
+ "Max": -1.0,
4222
+ "Med": -1.0
4223
+ }
4224
+ }
4225
+ },
4226
+ "gpt-oss-120B (Reasoning: medium)": {
4227
+ "NUM_GPUS": 8,
4228
+ "Overall": {
4229
+ "Time to Answer": {
4230
+ "Min": 0,
4231
+ "Max": 101.66583281847353,
4232
+ "Med": 7.694922740481965
4233
+ },
4234
+ "Latency": {
4235
+ "Min": 0,
4236
+ "Max": 108.71509218215942,
4237
+ "Med": 12.121336698532104
4238
+ },
4239
+ "Speed": {
4240
+ "Min": -1.0,
4241
+ "Max": 295.4744570001622,
4242
+ "Med": 103.31935460342275
4243
+ }
4244
+ },
4245
+ "Content Generation": {
4246
+ "Time to Answer": {
4247
+ "Min": 1.4268165264489516,
4248
+ "Max": 76.09343232158227,
4249
+ "Med": 5.27987206336147
4250
+ },
4251
+ "Latency": {
4252
+ "Min": 2.564422369003296,
4253
+ "Max": 77.78296256065369,
4254
+ "Med": 12.131241917610168
4255
+ },
4256
+ "Speed": {
4257
+ "Min": 39.854399049254106,
4258
+ "Max": 164.11560898062044,
4259
+ "Med": 102.26319280893972
4260
+ }
4261
+ },
4262
+ "Editing": {
4263
+ "Time to Answer": {
4264
+ "Min": 0,
4265
+ "Max": 34.97314937730854,
4266
+ "Med": 7.72154927398273
4267
+ },
4268
+ "Latency": {
4269
+ "Min": 0,
4270
+ "Max": 38.421292781829834,
4271
+ "Med": 10.624043703079224
4272
+ },
4273
+ "Speed": {
4274
+ "Min": -1.0,
4275
+ "Max": 295.4744570001622,
4276
+ "Med": 97.16836666526689
4277
+ }
4278
+ },
4279
+ "Data Analysis": {
4280
+ "Time to Answer": {
4281
+ "Min": 1.5500787364112005,
4282
+ "Max": 47.44580010794223,
4283
+ "Med": 7.938084126425333
4284
+ },
4285
+ "Latency": {
4286
+ "Min": 2.21130108833313,
4287
+ "Max": 48.52851939201355,
4288
+ "Med": 10.561246871948242
4289
+ },
4290
+ "Speed": {
4291
+ "Min": 33.67196833472543,
4292
+ "Max": 218.4742163852268,
4293
+ "Med": 111.7668417486227
4294
+ }
4295
+ },
4296
+ "Reasoning": {
4297
+ "Time to Answer": {
4298
+ "Min": 1.313997881221957,
4299
+ "Max": 71.11774356968237,
4300
+ "Med": 9.761283050834567
4301
+ },
4302
+ "Latency": {
4303
+ "Min": 2.4778506755828857,
4304
+ "Max": 77.51551747322083,
4305
+ "Med": 14.6117924451828
4306
+ },
4307
+ "Speed": {
4308
+ "Min": 52.34248336770566,
4309
+ "Max": 162.3515087876845,
4310
+ "Med": 121.07198234072999
4311
+ }
4312
+ },
4313
+ "Hallucination": {
4314
+ "Time to Answer": {
4315
+ "Min": 2.2739883409985806,
4316
+ "Max": 25.163233752980585,
4317
+ "Med": 7.324965414201975
4318
+ },
4319
+ "Latency": {
4320
+ "Min": 3.2160396575927734,
4321
+ "Max": 41.42578959465027,
4322
+ "Med": 13.071247458457947
4323
+ },
4324
+ "Speed": {
4325
  "Min": 36.912669174553955,
4326
  "Max": 207.27878278068064,
4327
  "Med": 124.86484051787805
 
5173
  }
5174
  }
5175
  },
5176
+ "Grok-4": {
5177
  "NUM_GPUS": 0,
5178
  "Overall": {
5179
  "Time to Answer": {
 
5363
  }
5364
  }
5365
  },
5366
+ "Claude 4 Opus (20250514) (think)": {
5367
  "NUM_GPUS": 0,
5368
  "Overall": {
5369
  "Time to Answer": {
 
5553
  }
5554
  }
5555
  },
5556
+ "Gemini 2.5 Pro": {
5557
+ "NUM_GPUS": 0,
5558
  "Overall": {
5559
  "Time to Answer": {
5560
+ "Min": 0,
5561
+ "Max": 0,
5562
+ "Med": 0.0
5563
  },
5564
  "Latency": {
5565
+ "Min": 0,
5566
+ "Max": 0,
5567
+ "Med": 0.0
5568
  },
5569
  "Speed": {
5570
+ "Min": 0.0,
5571
+ "Max": 0.0,
5572
+ "Med": 0.0
5573
  }
5574
  },
5575
  "Content Generation": {
5576
  "Time to Answer": {
5577
+ "Min": 0,
5578
+ "Max": 0,
5579
+ "Med": 0.0
5580
  },
5581
  "Latency": {
5582
+ "Min": 0,
5583
+ "Max": 0,
5584
+ "Med": 0.0
5585
  },
5586
  "Speed": {
5587
+ "Min": 0.0,
5588
+ "Max": 0.0,
5589
+ "Med": 0.0
5590
  }
5591
  },
5592
  "Editing": {
5593
  "Time to Answer": {
5594
+ "Min": 0,
5595
+ "Max": 0,
5596
+ "Med": 0.0
5597
  },
5598
  "Latency": {
5599
+ "Min": 0,
5600
+ "Max": 0,
5601
+ "Med": 0.0
5602
  },
5603
  "Speed": {
5604
+ "Min": 0.0,
5605
+ "Max": 0.0,
5606
+ "Med": 0.0
5607
  }
5608
  },
5609
  "Data Analysis": {
5610
  "Time to Answer": {
5611
+ "Min": 0,
5612
+ "Max": 0,
5613
+ "Med": 0.0
5614
  },
5615
  "Latency": {
5616
+ "Min": 0,
5617
+ "Max": 0,
5618
+ "Med": 0.0
5619
+ },
5620
+ "Speed": {
5621
+ "Min": 0.0,
5622
+ "Max": 0.0,
5623
+ "Med": 0.0
5624
+ }
5625
+ },
5626
+ "Reasoning": {
5627
+ "Time to Answer": {
5628
+ "Min": 0,
5629
+ "Max": 0,
5630
+ "Med": 0.0
5631
+ },
5632
+ "Latency": {
5633
+ "Min": 0,
5634
+ "Max": 0,
5635
+ "Med": 0.0
5636
+ },
5637
+ "Speed": {
5638
+ "Min": 0.0,
5639
+ "Max": 0.0,
5640
+ "Med": 0.0
5641
+ }
5642
+ },
5643
+ "Hallucination": {
5644
+ "Time to Answer": {
5645
+ "Min": 0,
5646
+ "Max": 0,
5647
+ "Med": 0.0
5648
+ },
5649
+ "Latency": {
5650
+ "Min": 0,
5651
+ "Max": 0,
5652
+ "Med": 0.0
5653
+ },
5654
+ "Speed": {
5655
+ "Min": 0.0,
5656
+ "Max": 0.0,
5657
+ "Med": 0.0
5658
+ }
5659
+ },
5660
+ "Safety": {
5661
+ "Time to Answer": {
5662
+ "Min": 0,
5663
+ "Max": 0,
5664
+ "Med": 0.0
5665
+ },
5666
+ "Latency": {
5667
+ "Min": 0,
5668
+ "Max": 0,
5669
+ "Med": 0.0
5670
+ },
5671
+ "Speed": {
5672
+ "Min": 0.0,
5673
+ "Max": 0.0,
5674
+ "Med": 0.0
5675
+ }
5676
+ },
5677
+ "Repetition": {
5678
+ "Time to Answer": {
5679
+ "Min": 0,
5680
+ "Max": 0,
5681
+ "Med": 0.0
5682
+ },
5683
+ "Latency": {
5684
+ "Min": 0,
5685
+ "Max": 0,
5686
+ "Med": 0.0
5687
+ },
5688
+ "Speed": {
5689
+ "Min": 0.0,
5690
+ "Max": 0.0,
5691
+ "Med": 0.0
5692
+ }
5693
+ },
5694
+ "Summarization": {
5695
+ "Time to Answer": {
5696
+ "Min": 0,
5697
+ "Max": 0,
5698
+ "Med": 0.0
5699
+ },
5700
+ "Latency": {
5701
+ "Min": 0,
5702
+ "Max": 0,
5703
+ "Med": 0.0
5704
+ },
5705
+ "Speed": {
5706
+ "Min": 0.0,
5707
+ "Max": 0.0,
5708
+ "Med": 0.0
5709
+ }
5710
+ },
5711
+ "Translation": {
5712
+ "Time to Answer": {
5713
+ "Min": 0,
5714
+ "Max": 0,
5715
+ "Med": 0.0
5716
+ },
5717
+ "Latency": {
5718
+ "Min": 0,
5719
+ "Max": 0,
5720
+ "Med": 0.0
5721
+ },
5722
+ "Speed": {
5723
+ "Min": 0.0,
5724
+ "Max": 0.0,
5725
+ "Med": 0.0
5726
+ }
5727
+ },
5728
+ "Multi-Turn": {
5729
+ "Time to Answer": {
5730
+ "Min": 0,
5731
+ "Max": 0,
5732
+ "Med": 0.0
5733
+ },
5734
+ "Latency": {
5735
+ "Min": 0,
5736
+ "Max": 0,
5737
+ "Med": 0.0
5738
+ },
5739
+ "Speed": {
5740
+ "Min": 0.0,
5741
+ "Max": 0.0,
5742
+ "Med": 0.0
5743
+ }
5744
+ }
5745
+ },
5746
+ "Tongyi DeepResearch 30B A3B": {
5747
+ "NUM_GPUS": 4,
5748
+ "Overall": {
5749
+ "Time to Answer": {
5750
+ "Min": 1.4505500793457031,
5751
+ "Max": 244.41708384257143,
5752
+ "Med": 45.23295979184195
5753
+ },
5754
+ "Latency": {
5755
+ "Min": 9.191470384597778,
5756
+ "Max": 749.16233086586,
5757
+ "Med": 52.387412667274475
5758
+ },
5759
+ "Speed": {
5760
+ "Min": 18.069738498345682,
5761
+ "Max": 122.37478932044478,
5762
+ "Med": 62.676624491545525
5763
+ }
5764
+ },
5765
+ "Content Generation": {
5766
+ "Time to Answer": {
5767
+ "Min": 4.802471643031882,
5768
+ "Max": 203.55154156596308,
5769
+ "Med": 41.43268650270611
5770
+ },
5771
+ "Latency": {
5772
+ "Min": 22.65742540359497,
5773
+ "Max": 738.4437143802643,
5774
+ "Med": 51.50689494609833
5775
+ },
5776
+ "Speed": {
5777
+ "Min": 23.70791793357093,
5778
+ "Max": 118.23891726695051,
5779
+ "Med": 61.95675692618596
5780
+ }
5781
+ },
5782
+ "Editing": {
5783
+ "Time to Answer": {
5784
+ "Min": 9.566574335098267,
5785
+ "Max": 106.69052745386706,
5786
+ "Med": 43.39687190468506
5787
+ },
5788
+ "Latency": {
5789
+ "Min": 11.078340530395508,
5790
+ "Max": 716.9541938304901,
5791
+ "Med": 48.262219190597534
5792
+ },
5793
+ "Speed": {
5794
+ "Min": 21.45800243159038,
5795
+ "Max": 110.85390192747607,
5796
+ "Med": 60.728454906690686
5797
+ }
5798
+ },
5799
+ "Data Analysis": {
5800
+ "Time to Answer": {
5801
+ "Min": 11.874105177737047,
5802
+ "Max": 231.70031813596688,
5803
+ "Med": 45.77187383012706
5804
+ },
5805
+ "Latency": {
5806
+ "Min": 22.660792589187622,
5807
+ "Max": 716.5539243221283,
5808
+ "Med": 51.057066202163696
5809
+ },
5810
+ "Speed": {
5811
  "Min": 18.806653004549222,
5812
  "Max": 113.17314697322944,
5813
  "Med": 61.908515815844005
 
6503
  }
6504
  }
6505
  },
6506
+ "GPT-5.1 (Reasoning: medium, verbosity: medium)": {
6507
+ "NUM_GPUS": -1,
6508
  "Overall": {
6509
  "Time to Answer": {
6510
+ "Min": [
6511
+ 0
6512
+ ],
6513
+ "Max": [
6514
+ 0
6515
+ ],
6516
+ "Med": 0.0
6517
  },
6518
  "Latency": {
6519
+ "Min": 1.4775474071502686,
6520
+ "Max": 747.6701903343201,
6521
+ "Med": 11.673096776008606
6522
  },
6523
  "Speed": {
6524
+ "Min": 0,
6525
+ "Max": 0,
6526
+ "Med": 0.0
6527
  }
6528
  },
6529
  "Content Generation": {
6530
  "Time to Answer": {
6531
+ "Min": [
6532
+ 0
6533
+ ],
6534
+ "Max": [
6535
+ 0
6536
+ ],
6537
+ "Med": 0.0
6538
  },
6539
  "Latency": {
6540
+ "Min": 3.002990961074829,
6541
+ "Max": 421.94651198387146,
6542
+ "Med": 16.867193579673767
6543
  },
6544
  "Speed": {
6545
+ "Min": 0,
6546
+ "Max": 0,
6547
+ "Med": 0.0
6548
  }
6549
  },
6550
  "Editing": {
6551
  "Time to Answer": {
6552
+ "Min": [
6553
+ 0
6554
+ ],
6555
+ "Max": [
6556
+ 0
6557
+ ],
6558
+ "Med": 0.0
6559
  },
6560
  "Latency": {
6561
+ "Min": 1.4775474071502686,
6562
+ "Max": 82.63027286529541,
6563
+ "Med": 5.669041872024536
6564
  },
6565
  "Speed": {
6566
+ "Min": 0,
6567
+ "Max": 0,
6568
+ "Med": 0.0
6569
  }
6570
  },
6571
  "Data Analysis": {
6572
  "Time to Answer": {
6573
+ "Min": [
6574
+ 0
6575
+ ],
6576
+ "Max": [
6577
+ 0
6578
+ ],
6579
+ "Med": 0.0
6580
  },
6581
  "Latency": {
6582
+ "Min": 1.664919137954712,
6583
+ "Max": 358.5435652732849,
6584
+ "Med": 7.0718772411346436
6585
  },
6586
  "Speed": {
6587
+ "Min": 0,
6588
+ "Max": 0,
6589
+ "Med": 0.0
6590
  }
6591
  },
6592
  "Reasoning": {
6593
  "Time to Answer": {
6594
+ "Min": [
6595
+ 0
6596
+ ],
6597
+ "Max": [
6598
+ 0
6599
+ ],
6600
+ "Med": 0.0
6601
  },
6602
  "Latency": {
6603
+ "Min": 1.82639479637146,
6604
+ "Max": 747.6701903343201,
6605
+ "Med": 12.99689531326294
6606
  },
6607
  "Speed": {
6608
+ "Min": 0,
6609
+ "Max": 0,
6610
+ "Med": 0.0
6611
  }
6612
  },
6613
  "Hallucination": {
6614
  "Time to Answer": {
6615
+ "Min": [
6616
+ 0
6617
+ ],
6618
+ "Max": [
6619
+ 0
6620
+ ],
6621
+ "Med": 0.0
6622
  },
6623
  "Latency": {
6624
+ "Min": 2.273186445236206,
6625
+ "Max": 115.95099306106567,
6626
+ "Med": 22.67124307155609
6627
  },
6628
  "Speed": {
6629
+ "Min": 0,
6630
+ "Max": 0,
6631
+ "Med": 0.0
6632
  }
6633
  },
6634
  "Safety": {
6635
  "Time to Answer": {
6636
+ "Min": [
6637
+ 0
6638
+ ],
6639
+ "Max": [
6640
+ 0
6641
+ ],
6642
+ "Med": 0.0
6643
  },
6644
  "Latency": {
6645
+ "Min": 3.3134090900421143,
6646
+ "Max": 140.77250027656555,
6647
+ "Med": 18.410767793655396
6648
  },
6649
  "Speed": {
6650
+ "Min": 0,
6651
+ "Max": 0,
6652
+ "Med": 0.0
6653
  }
6654
  },
6655
  "Repetition": {
6656
  "Time to Answer": {
6657
+ "Min": [
6658
+ 0
6659
+ ],
6660
+ "Max": [
6661
+ 0
6662
+ ],
6663
+ "Med": 0.0
6664
  },
6665
  "Latency": {
6666
+ "Min": 2.3753366470336914,
6667
+ "Max": 428.47876358032227,
6668
+ "Med": 19.905622720718384
6669
  },
6670
  "Speed": {
6671
+ "Min": 0,
6672
+ "Max": 0,
6673
+ "Med": 0.0
6674
  }
6675
  },
6676
  "Summarization": {
6677
  "Time to Answer": {
6678
+ "Min": [
6679
+ 0
6680
+ ],
6681
+ "Max": [
6682
+ 0
6683
+ ],
6684
+ "Med": 0.0
6685
  },
6686
  "Latency": {
6687
+ "Min": 2.2187492847442627,
6688
+ "Max": 126.85083556175232,
6689
+ "Med": 5.20970344543457
6690
  },
6691
  "Speed": {
6692
+ "Min": 0,
6693
+ "Max": 0,
6694
+ "Med": 0.0
6695
  }
6696
  },
6697
  "Translation": {
6698
  "Time to Answer": {
6699
+ "Min": [
6700
+ 0
6701
+ ],
6702
+ "Max": [
6703
+ 0
6704
+ ],
6705
+ "Med": 0.0
6706
  },
6707
  "Latency": {
6708
+ "Min": 2.0158095359802246,
6709
+ "Max": 64.36819744110107,
6710
+ "Med": 9.735138773918152
6711
  },
6712
  "Speed": {
6713
+ "Min": 0,
6714
+ "Max": 0,
6715
+ "Med": 0.0
6716
  }
6717
  },
6718
  "Multi-Turn": {
6719
  "Time to Answer": {
6720
+ "Min": [
6721
+ 0
6722
+ ],
6723
+ "Max": [
6724
+ 0
6725
+ ],
6726
+ "Med": 0.0
6727
  },
6728
  "Latency": {
6729
+ "Min": 4.493000507354736,
6730
+ "Max": 501.9931924343109,
6731
+ "Med": 38.35947251319885
6732
  },
6733
  "Speed": {
6734
+ "Min": 0,
6735
+ "Max": 0,
6736
+ "Med": 0.0
6737
  }
6738
  }
6739
  },
6740
+ "KAT Dev 72B Exp": {
6741
+ "NUM_GPUS": 8,
6742
  "Overall": {
6743
  "Time to Answer": {
6744
  "Min": 0,
6745
+ "Max": 1.8852267265319824,
6746
+ "Med": 0.06221652030944824
6747
  },
6748
  "Latency": {
6749
  "Min": 0,
6750
+ "Max": 1739.6013979911804,
6751
+ "Med": 8.492375493049622
6752
  },
6753
  "Speed": {
6754
+ "Min": 11.841053492664015,
6755
+ "Max": 179.6668529421545,
6756
+ "Med": 50.601864763867184
6757
  }
6758
  },
6759
  "Content Generation": {
6760
  "Time to Answer": {
6761
+ "Min": 0.05644536018371582,
6762
+ "Max": 1.8852267265319824,
6763
+ "Med": 0.06070876121520996
6764
  },
6765
  "Latency": {
6766
+ "Min": 0.5495977401733398,
6767
+ "Max": 1734.7112760543823,
6768
+ "Med": 11.42176365852356
6769
  },
6770
  "Speed": {
6771
+ "Min": 33.34421066358906,
6772
+ "Max": 61.99760541627945,
6773
+ "Med": 51.61603722996896
6774
  }
6775
  },
6776
  "Editing": {
6777
  "Time to Answer": {
6778
+ "Min": 0.056221723556518555,
6779
+ "Max": 0.21474575996398926,
6780
+ "Med": 0.06082558631896973
6781
  },
6782
  "Latency": {
6783
+ "Min": 0.2646908760070801,
6784
+ "Max": 1595.0533018112183,
6785
+ "Med": 4.54656982421875
6786
  },
6787
  "Speed": {
6788
+ "Min": 33.0970793748843,
6789
+ "Max": 67.8011225008837,
6790
+ "Med": 49.877649602198524
6791
  }
6792
  },
6793
  "Data Analysis": {
6794
  "Time to Answer": {
6795
+ "Min": 0.057355642318725586,
6796
+ "Max": 1.3980965614318848,
6797
+ "Med": 0.10868549346923828
6798
  },
6799
  "Latency": {
6800
+ "Min": 0.4547910690307617,
6801
+ "Max": 343.81701016426086,
6802
+ "Med": 6.172606706619263
6803
  },
6804
  "Speed": {
6805
+ "Min": 33.23552955310353,
6806
+ "Max": 61.68261581762401,
6807
+ "Med": 49.023444483063024
6808
  }
6809
  },
6810
  "Reasoning": {
6811
  "Time to Answer": {
6812
+ "Min": 0.05635571479797363,
6813
+ "Max": 0.11922383308410645,
6814
+ "Med": 0.06026041507720947
6815
  },
6816
  "Latency": {
6817
+ "Min": 0.3007025718688965,
6818
+ "Max": 1650.0614280700684,
6819
+ "Med": 10.36361300945282
6820
  },
6821
  "Speed": {
6822
+ "Min": 35.11266626219754,
6823
+ "Max": 61.64151659547226,
6824
+ "Med": 48.08432696922458
6825
  }
6826
  },
6827
  "Hallucination": {
6828
  "Time to Answer": {
6829
+ "Min": 0.05710172653198242,
6830
+ "Max": 0.2662782669067383,
6831
+ "Med": 0.06034708023071289
6832
  },
6833
  "Latency": {
6834
+ "Min": 0.4679543972015381,
6835
+ "Max": 1617.9812409877777,
6836
+ "Med": 8.860660910606384
6837
  },
6838
  "Speed": {
6839
+ "Min": 34.20033736500205,
6840
+ "Max": 63.47312348668281,
6841
+ "Med": 49.06857210020506
6842
  }
6843
  },
6844
  "Safety": {
6845
  "Time to Answer": {
6846
+ "Min": 0.05749773979187012,
6847
+ "Max": 0.1197052001953125,
6848
+ "Med": 0.06013894081115723
6849
  },
6850
  "Latency": {
6851
+ "Min": 0.4509849548339844,
6852
+ "Max": 1738.2601640224457,
6853
+ "Med": 7.764802932739258
6854
  },
6855
  "Speed": {
6856
+ "Min": 31.161307774126723,
6857
+ "Max": 62.96165283098918,
6858
+ "Med": 46.6376538248078
6859
  }
6860
  },
6861
  "Repetition": {
6862
  "Time to Answer": {
6863
+ "Min": 0.05688214302062988,
6864
+ "Max": 0.11974930763244629,
6865
+ "Med": 0.05986011028289795
6866
  },
6867
  "Latency": {
6868
+ "Min": 1.6353342533111572,
6869
+ "Max": 1736.6408779621124,
6870
+ "Med": 9.356394052505493
6871
  },
6872
  "Speed": {
6873
+ "Min": 33.02125276478502,
6874
+ "Max": 61.19217468044336,
6875
+ "Med": 46.27108150127529
6876
  }
6877
  },
6878
  "Summarization": {
6879
  "Time to Answer": {
6880
+ "Min": 0.05743670463562012,
6881
+ "Max": 0.4117448329925537,
6882
+ "Med": 0.1098024845123291
6883
  },
6884
  "Latency": {
6885
+ "Min": 0.989130973815918,
6886
+ "Max": 1565.9191603660583,
6887
+ "Med": 6.263204216957092
6888
  },
6889
  "Speed": {
6890
+ "Min": 33.11188589504452,
6891
+ "Max": 57.96248782433984,
6892
+ "Med": 49.54728638934513
6893
  }
6894
  },
6895
  "Translation": {
6896
  "Time to Answer": {
6897
+ "Min": 0.05727648735046387,
6898
+ "Max": 0.3603339195251465,
6899
+ "Med": 0.06304633617401123
6900
  },
6901
  "Latency": {
6902
+ "Min": 0.2665116786956787,
6903
+ "Max": 1739.6013979911804,
6904
+ "Med": 7.2887866497039795
6905
  },
6906
  "Speed": {
6907
+ "Min": 33.701689123361014,
6908
+ "Max": 61.86641373663972,
6909
+ "Med": 50.14262477886369
6910
  }
6911
  },
6912
  "Multi-Turn": {
6913
  "Time to Answer": {
6914
  "Min": 0,
6915
+ "Max": 1.4983344078063965,
6916
+ "Med": 0.2324150800704956
6917
  },
6918
  "Latency": {
6919
  "Min": 0,
6920
+ "Max": 888.0258376598358,
6921
+ "Med": 21.200571298599243
6922
  },
6923
  "Speed": {
6924
+ "Min": 11.841053492664015,
6925
+ "Max": 179.6668529421545,
6926
+ "Med": 52.505335801448915
6927
  }
6928
  }
6929
  },
6930
+ "gpt-oss-20B (Reasoning: medium)": {
6931
+ "NUM_GPUS": 4,
6932
+ "Overall": {
6933
+ "Time to Answer": {
6934
+ "Min": 2.0265204472169556,
6935
+ "Max": 129.33762687935325,
6936
+ "Med": 26.04652036871504
6937
+ },
6938
+ "Latency": {
6939
+ "Min": 7.263976097106934,
6940
+ "Max": 138.10640954971313,
6941
+ "Med": 29.767700791358948
6942
+ },
6943
+ "Speed": {
6944
+ "Min": 5.317348253806318,
6945
+ "Max": 369.6802851223203,
6946
+ "Med": 108.53633696847938
6947
+ }
6948
+ },
6949
+ "Content Generation": {
6950
+ "Time to Answer": {
6951
+ "Min": 2.919738582967988,
6952
+ "Max": 69.87850048414344,
6953
+ "Med": 25.258961631303542
6954
+ },
6955
+ "Latency": {
6956
+ "Min": 7.263976097106934,
6957
+ "Max": 73.72067332267761,
6958
+ "Med": 29.7125141620636
6959
+ },
6960
+ "Speed": {
6961
+ "Min": 13.604860508942371,
6962
+ "Max": 224.07024522186745,
6963
+ "Med": 124.14591263963385
6964
+ }
6965
+ },
6966
+ "Editing": {
6967
+ "Time to Answer": {
6968
+ "Min": 2.0265204472169556,
6969
+ "Max": 49.25446497234138,
6970
+ "Med": 25.30023380826225
6971
+ },
6972
+ "Latency": {
6973
+ "Min": 7.319023847579956,
6974
+ "Max": 56.10624122619629,
6975
+ "Med": 27.072497606277466
6976
+ },
6977
+ "Speed": {
6978
+ "Min": 12.140381961027476,
6979
+ "Max": 250.484990902761,
6980
+ "Med": 104.93834609385715
6981
+ }
6982
+ },
6983
+ "Data Analysis": {
6984
+ "Time to Answer": {
6985
+ "Min": 3.9151465271321517,
6986
+ "Max": 75.99197716704057,
6987
+ "Med": 26.328562295325447
6988
+ },
6989
+ "Latency": {
6990
+ "Min": 7.286376476287842,
6991
+ "Max": 84.5227108001709,
6992
+ "Med": 28.301609992980957
6993
+ },
6994
+ "Speed": {
6995
+ "Min": 11.70440500661996,
6996
+ "Max": 306.1703155318347,
6997
+ "Med": 113.21800589706349
6998
+ }
6999
+ },
7000
+ "Reasoning": {
7001
+ "Time to Answer": {
7002
+ "Min": 8.324975468895651,
7003
+ "Max": 129.33762687935325,
7004
+ "Med": 27.582642474460418
7005
+ },
7006
+ "Latency": {
7007
+ "Min": 18.050434589385986,
7008
+ "Max": 138.10640954971313,
7009
+ "Med": 32.32542634010315
7010
+ },
7011
+ "Speed": {
7012
+ "Min": 17.82180729362148,
7013
+ "Max": 207.91746863187615,
7014
+ "Med": 106.44936231341633
7015
+ }
7016
+ },
7017
+ "Hallucination": {
7018
+ "Time to Answer": {
7019
+ "Min": 5.195440284614488,
7020
+ "Max": 61.22046760794428,
7021
+ "Med": 25.105323415675343
7022
+ },
7023
+ "Latency": {
7024
+ "Min": 7.313647270202637,
7025
+ "Max": 63.91348838806152,
7026
+ "Med": 32.319284319877625
7027
+ },
7028
+ "Speed": {
7029
+ "Min": 18.438031290688347,
7030
+ "Max": 226.50662559152707,
7031
+ "Med": 110.66899398987842
7032
+ }
7033
+ },
7034
+ "Safety": {
7035
+ "Time to Answer": {
7036
+ "Min": 4.932410193462053,
7037
+ "Max": 47.165975079516905,
7038
+ "Med": 23.45146352177868
7039
+ },
7040
+ "Latency": {
7041
+ "Min": 7.2830750942230225,
7042
+ "Max": 53.09182548522949,
7043
+ "Med": 24.52879786491394
7044
+ },
7045
+ "Speed": {
7046
+ "Min": 5.317348253806318,
7047
+ "Max": 250.7582211510182,
7048
+ "Med": 90.91284402754488
7049
+ }
7050
+ },
7051
+ "Repetition": {
7052
+ "Time to Answer": {
7053
+ "Min": 2.557051893849964,
7054
+ "Max": 120.32689256267814,
7055
+ "Med": 28.832852398544777
7056
+ },
7057
+ "Latency": {
7058
+ "Min": 20.129476308822632,
7059
+ "Max": 125.98315095901489,
7060
+ "Med": 33.87077188491821
7061
+ },
7062
+ "Speed": {
7063
+ "Min": 27.080736058951963,
7064
+ "Max": 266.27309790215196,
7065
+ "Med": 120.19876605631327
7066
+ }
7067
+ },
7068
+ "Summarization": {
7069
+ "Time to Answer": {
7070
+ "Min": 2.4394841513682888,
7071
+ "Max": 113.86724343465069,
7072
+ "Med": 23.029374821644574
7073
+ },
7074
+ "Latency": {
7075
+ "Min": 13.429885149002075,
7076
+ "Max": 115.32083773612976,
7077
+ "Med": 25.605836629867554
7078
+ },
7079
+ "Speed": {
7080
+ "Min": 10.53590514256035,
7081
+ "Max": 304.7862966713593,
7082
+ "Med": 101.49503613110383
7083
+ }
7084
+ },
7085
+ "Translation": {
7086
+ "Time to Answer": {
7087
+ "Min": 4.240170876932201,
7088
+ "Max": 72.23964902074354,
7089
+ "Med": 26.80100677708995
7090
+ },
7091
+ "Latency": {
7092
+ "Min": 17.525670528411865,
7093
+ "Max": 115.61775875091553,
7094
+ "Med": 30.075977206230164
7095
+ },
7096
+ "Speed": {
7097
+ "Min": 22.395176057735203,
7098
+ "Max": 369.6802851223203,
7099
+ "Med": 122.92398147980118
7100
+ }
7101
+ },
7102
+ "Multi-Turn": {
7103
+ "Time to Answer": {
7104
+ "Min": 9.995788375397986,
7105
+ "Max": 91.21509669950703,
7106
+ "Med": 39.29442425858453
7107
+ },
7108
+ "Latency": {
7109
+ "Min": 16.48517942428589,
7110
+ "Max": 112.05223345756531,
7111
+ "Med": 66.7337509393692
7112
+ },
7113
+ "Speed": {
7114
+ "Min": 21.53344632125304,
7115
+ "Max": 273.4009542241851,
7116
+ "Med": 87.97283614240237
7117
+ }
7118
+ }
7119
+ },
7120
+ "o3-pro (Reasoning: medium)": {
7121
  "NUM_GPUS": 0,
7122
  "Overall": {
7123
  "Time to Answer": {