Spaces:
Running
Running
j_yoon.song
commited on
Commit
·
ac0e84a
1
Parent(s):
055c28e
add models
Browse files- src/data/open/length_data.json +476 -68
- src/data/open/stats.csv +10 -0
- src/data/open/stats_lang.csv +10 -0
- src/data/open/time_data.json +1534 -306
src/data/open/length_data.json
CHANGED
|
@@ -1,4 +1,72 @@
|
|
| 1 |
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
"Claude 4.1 Opus (20250805) (think)": {
|
| 3 |
"Overall": {
|
| 4 |
"Min": -10,
|
|
@@ -475,6 +543,74 @@
|
|
| 475 |
"Med Resp": -3.0
|
| 476 |
}
|
| 477 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 478 |
"Solar Pro Preview (top_p:0.95, temp: 0.7)": {
|
| 479 |
"Overall": {
|
| 480 |
"Min": 1,
|
|
@@ -1087,6 +1223,74 @@
|
|
| 1087 |
"Med Resp": 2282.5
|
| 1088 |
}
|
| 1089 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1090 |
"GLM-4.5 FP8 (think)": {
|
| 1091 |
"Overall": {
|
| 1092 |
"Min": 75,
|
|
@@ -1223,6 +1427,74 @@
|
|
| 1223 |
"Med Resp": 1208.5
|
| 1224 |
}
|
| 1225 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1226 |
"gpt-oss-120B (Reasoning: medium)": {
|
| 1227 |
"Overall": {
|
| 1228 |
"Min": 43,
|
|
@@ -1563,6 +1835,74 @@
|
|
| 1563 |
"Med Resp": 1728.5
|
| 1564 |
}
|
| 1565 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1566 |
"Claude 4 Opus (20250514) (think)": {
|
| 1567 |
"Overall": {
|
| 1568 |
"Min": -10,
|
|
@@ -1971,6 +2311,142 @@
|
|
| 1971 |
"Med Resp": -3.0
|
| 1972 |
}
|
| 1973 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1974 |
"gpt-oss-20B (Reasoning: medium)": {
|
| 1975 |
"Overall": {
|
| 1976 |
"Min": 32,
|
|
@@ -2107,74 +2583,6 @@
|
|
| 2107 |
"Med Resp": -3.0
|
| 2108 |
}
|
| 2109 |
},
|
| 2110 |
-
"Grok-4": {
|
| 2111 |
-
"Overall": {
|
| 2112 |
-
"Min": -10,
|
| 2113 |
-
"Max": -2,
|
| 2114 |
-
"Med": -2.0,
|
| 2115 |
-
"Med Resp": -1.0
|
| 2116 |
-
},
|
| 2117 |
-
"Content Generation": {
|
| 2118 |
-
"Min": -2,
|
| 2119 |
-
"Max": -2,
|
| 2120 |
-
"Med": -2.0,
|
| 2121 |
-
"Med Resp": -1.0
|
| 2122 |
-
},
|
| 2123 |
-
"Editing": {
|
| 2124 |
-
"Min": -2,
|
| 2125 |
-
"Max": -2,
|
| 2126 |
-
"Med": -2.0,
|
| 2127 |
-
"Med Resp": -1.0
|
| 2128 |
-
},
|
| 2129 |
-
"Data Analysis": {
|
| 2130 |
-
"Min": -2,
|
| 2131 |
-
"Max": -2,
|
| 2132 |
-
"Med": -2.0,
|
| 2133 |
-
"Med Resp": -1.0
|
| 2134 |
-
},
|
| 2135 |
-
"Reasoning": {
|
| 2136 |
-
"Min": -2,
|
| 2137 |
-
"Max": -2,
|
| 2138 |
-
"Med": -2.0,
|
| 2139 |
-
"Med Resp": -1.0
|
| 2140 |
-
},
|
| 2141 |
-
"Hallucination": {
|
| 2142 |
-
"Min": -2,
|
| 2143 |
-
"Max": -2,
|
| 2144 |
-
"Med": -2.0,
|
| 2145 |
-
"Med Resp": -1.0
|
| 2146 |
-
},
|
| 2147 |
-
"Safety": {
|
| 2148 |
-
"Min": -2,
|
| 2149 |
-
"Max": -2,
|
| 2150 |
-
"Med": -2.0,
|
| 2151 |
-
"Med Resp": -1.0
|
| 2152 |
-
},
|
| 2153 |
-
"Repetition": {
|
| 2154 |
-
"Min": -2,
|
| 2155 |
-
"Max": -2,
|
| 2156 |
-
"Med": -2.0,
|
| 2157 |
-
"Med Resp": -1.0
|
| 2158 |
-
},
|
| 2159 |
-
"Summarization": {
|
| 2160 |
-
"Min": -2,
|
| 2161 |
-
"Max": -2,
|
| 2162 |
-
"Med": -2.0,
|
| 2163 |
-
"Med Resp": -1.0
|
| 2164 |
-
},
|
| 2165 |
-
"Translation": {
|
| 2166 |
-
"Min": -2,
|
| 2167 |
-
"Max": -2,
|
| 2168 |
-
"Med": -2.0,
|
| 2169 |
-
"Med Resp": -1.0
|
| 2170 |
-
},
|
| 2171 |
-
"Multi-Turn": {
|
| 2172 |
-
"Min": -10,
|
| 2173 |
-
"Max": -4,
|
| 2174 |
-
"Med": -6.0,
|
| 2175 |
-
"Med Resp": -3.0
|
| 2176 |
-
}
|
| 2177 |
-
},
|
| 2178 |
"Apriel 1.5 15B Thinker": {
|
| 2179 |
"Overall": {
|
| 2180 |
"Min": 118,
|
|
|
|
| 1 |
{
|
| 2 |
+
"Olmo 3 32B Think": {
|
| 3 |
+
"Overall": {
|
| 4 |
+
"Min": 210,
|
| 5 |
+
"Max": 65454,
|
| 6 |
+
"Med": 3360.5,
|
| 7 |
+
"Med Resp": 473.0
|
| 8 |
+
},
|
| 9 |
+
"Content Generation": {
|
| 10 |
+
"Min": 683,
|
| 11 |
+
"Max": 65300,
|
| 12 |
+
"Med": 3224.0,
|
| 13 |
+
"Med Resp": 606.0
|
| 14 |
+
},
|
| 15 |
+
"Editing": {
|
| 16 |
+
"Min": 580,
|
| 17 |
+
"Max": 14539,
|
| 18 |
+
"Med": 2859.0,
|
| 19 |
+
"Med Resp": 419.5
|
| 20 |
+
},
|
| 21 |
+
"Data Analysis": {
|
| 22 |
+
"Min": 369,
|
| 23 |
+
"Max": 23205,
|
| 24 |
+
"Med": 2624.0,
|
| 25 |
+
"Med Resp": 332.0
|
| 26 |
+
},
|
| 27 |
+
"Reasoning": {
|
| 28 |
+
"Min": 779,
|
| 29 |
+
"Max": 27491,
|
| 30 |
+
"Med": 3911.5,
|
| 31 |
+
"Med Resp": 467.5
|
| 32 |
+
},
|
| 33 |
+
"Hallucination": {
|
| 34 |
+
"Min": 254,
|
| 35 |
+
"Max": 10751,
|
| 36 |
+
"Med": 2472.0,
|
| 37 |
+
"Med Resp": 768.0
|
| 38 |
+
},
|
| 39 |
+
"Safety": {
|
| 40 |
+
"Min": 210,
|
| 41 |
+
"Max": 7162,
|
| 42 |
+
"Med": 2025.0,
|
| 43 |
+
"Med Resp": 646.0
|
| 44 |
+
},
|
| 45 |
+
"Repetition": {
|
| 46 |
+
"Min": 1336,
|
| 47 |
+
"Max": 65454,
|
| 48 |
+
"Med": 5205.5,
|
| 49 |
+
"Med Resp": 575.5
|
| 50 |
+
},
|
| 51 |
+
"Summarization": {
|
| 52 |
+
"Min": 380,
|
| 53 |
+
"Max": 17517,
|
| 54 |
+
"Med": 2254.0,
|
| 55 |
+
"Med Resp": 248.0
|
| 56 |
+
},
|
| 57 |
+
"Translation": {
|
| 58 |
+
"Min": 846,
|
| 59 |
+
"Max": 15667,
|
| 60 |
+
"Med": 4546.5,
|
| 61 |
+
"Med Resp": 349.5
|
| 62 |
+
},
|
| 63 |
+
"Multi-Turn": {
|
| 64 |
+
"Min": 1010,
|
| 65 |
+
"Max": 24077,
|
| 66 |
+
"Med": 6999.5,
|
| 67 |
+
"Med Resp": 1764.5
|
| 68 |
+
}
|
| 69 |
+
},
|
| 70 |
"Claude 4.1 Opus (20250805) (think)": {
|
| 71 |
"Overall": {
|
| 72 |
"Min": -10,
|
|
|
|
| 543 |
"Med Resp": -3.0
|
| 544 |
}
|
| 545 |
},
|
| 546 |
+
"Gemini 3 Pro Preview (Thinking Level: High)": {
|
| 547 |
+
"Overall": {
|
| 548 |
+
"Min": 0,
|
| 549 |
+
"Max": 18460,
|
| 550 |
+
"Med": 1930.5,
|
| 551 |
+
"Med Resp": 378.0
|
| 552 |
+
},
|
| 553 |
+
"Content Generation": {
|
| 554 |
+
"Min": 0,
|
| 555 |
+
"Max": 12404,
|
| 556 |
+
"Med": 1967.5,
|
| 557 |
+
"Med Resp": 570.5
|
| 558 |
+
},
|
| 559 |
+
"Editing": {
|
| 560 |
+
"Min": 433,
|
| 561 |
+
"Max": 7372,
|
| 562 |
+
"Med": 1684.0,
|
| 563 |
+
"Med Resp": 322.0
|
| 564 |
+
},
|
| 565 |
+
"Data Analysis": {
|
| 566 |
+
"Min": 386,
|
| 567 |
+
"Max": 10791,
|
| 568 |
+
"Med": 1357.0,
|
| 569 |
+
"Med Resp": 172.0
|
| 570 |
+
},
|
| 571 |
+
"Reasoning": {
|
| 572 |
+
"Min": 0,
|
| 573 |
+
"Max": 18460,
|
| 574 |
+
"Med": 1821.0,
|
| 575 |
+
"Med Resp": 502.5
|
| 576 |
+
},
|
| 577 |
+
"Hallucination": {
|
| 578 |
+
"Min": 524,
|
| 579 |
+
"Max": 6228,
|
| 580 |
+
"Med": 1833.0,
|
| 581 |
+
"Med Resp": 468.0
|
| 582 |
+
},
|
| 583 |
+
"Safety": {
|
| 584 |
+
"Min": 832,
|
| 585 |
+
"Max": 6324,
|
| 586 |
+
"Med": 1802.0,
|
| 587 |
+
"Med Resp": 291.0
|
| 588 |
+
},
|
| 589 |
+
"Repetition": {
|
| 590 |
+
"Min": 516,
|
| 591 |
+
"Max": 5086,
|
| 592 |
+
"Med": 1910.0,
|
| 593 |
+
"Med Resp": 314.0
|
| 594 |
+
},
|
| 595 |
+
"Summarization": {
|
| 596 |
+
"Min": 663,
|
| 597 |
+
"Max": 3857,
|
| 598 |
+
"Med": 1295.5,
|
| 599 |
+
"Med Resp": 184.5
|
| 600 |
+
},
|
| 601 |
+
"Translation": {
|
| 602 |
+
"Min": 964,
|
| 603 |
+
"Max": 9535,
|
| 604 |
+
"Med": 2286.5,
|
| 605 |
+
"Med Resp": 357.0
|
| 606 |
+
},
|
| 607 |
+
"Multi-Turn": {
|
| 608 |
+
"Min": 608,
|
| 609 |
+
"Max": 10590,
|
| 610 |
+
"Med": 4601.5,
|
| 611 |
+
"Med Resp": 1377.0
|
| 612 |
+
}
|
| 613 |
+
},
|
| 614 |
"Solar Pro Preview (top_p:0.95, temp: 0.7)": {
|
| 615 |
"Overall": {
|
| 616 |
"Min": 1,
|
|
|
|
| 1223 |
"Med Resp": 2282.5
|
| 1224 |
}
|
| 1225 |
},
|
| 1226 |
+
"Claude 4.5 Opus (think)": {
|
| 1227 |
+
"Overall": {
|
| 1228 |
+
"Min": -10,
|
| 1229 |
+
"Max": -2,
|
| 1230 |
+
"Med": -2.0,
|
| 1231 |
+
"Med Resp": -1.0
|
| 1232 |
+
},
|
| 1233 |
+
"Content Generation": {
|
| 1234 |
+
"Min": -2,
|
| 1235 |
+
"Max": -2,
|
| 1236 |
+
"Med": -2.0,
|
| 1237 |
+
"Med Resp": -1.0
|
| 1238 |
+
},
|
| 1239 |
+
"Editing": {
|
| 1240 |
+
"Min": -2,
|
| 1241 |
+
"Max": -2,
|
| 1242 |
+
"Med": -2.0,
|
| 1243 |
+
"Med Resp": -1.0
|
| 1244 |
+
},
|
| 1245 |
+
"Data Analysis": {
|
| 1246 |
+
"Min": -2,
|
| 1247 |
+
"Max": -2,
|
| 1248 |
+
"Med": -2.0,
|
| 1249 |
+
"Med Resp": -1.0
|
| 1250 |
+
},
|
| 1251 |
+
"Reasoning": {
|
| 1252 |
+
"Min": -2,
|
| 1253 |
+
"Max": -2,
|
| 1254 |
+
"Med": -2.0,
|
| 1255 |
+
"Med Resp": -1.0
|
| 1256 |
+
},
|
| 1257 |
+
"Hallucination": {
|
| 1258 |
+
"Min": -2,
|
| 1259 |
+
"Max": -2,
|
| 1260 |
+
"Med": -2.0,
|
| 1261 |
+
"Med Resp": -1.0
|
| 1262 |
+
},
|
| 1263 |
+
"Safety": {
|
| 1264 |
+
"Min": -2,
|
| 1265 |
+
"Max": -2,
|
| 1266 |
+
"Med": -2.0,
|
| 1267 |
+
"Med Resp": -1.0
|
| 1268 |
+
},
|
| 1269 |
+
"Repetition": {
|
| 1270 |
+
"Min": -2,
|
| 1271 |
+
"Max": -2,
|
| 1272 |
+
"Med": -2.0,
|
| 1273 |
+
"Med Resp": -1.0
|
| 1274 |
+
},
|
| 1275 |
+
"Summarization": {
|
| 1276 |
+
"Min": -2,
|
| 1277 |
+
"Max": -2,
|
| 1278 |
+
"Med": -2.0,
|
| 1279 |
+
"Med Resp": -1.0
|
| 1280 |
+
},
|
| 1281 |
+
"Translation": {
|
| 1282 |
+
"Min": -2,
|
| 1283 |
+
"Max": -2,
|
| 1284 |
+
"Med": -2.0,
|
| 1285 |
+
"Med Resp": -1.0
|
| 1286 |
+
},
|
| 1287 |
+
"Multi-Turn": {
|
| 1288 |
+
"Min": -10,
|
| 1289 |
+
"Max": -4,
|
| 1290 |
+
"Med": -6.0,
|
| 1291 |
+
"Med Resp": -3.0
|
| 1292 |
+
}
|
| 1293 |
+
},
|
| 1294 |
"GLM-4.5 FP8 (think)": {
|
| 1295 |
"Overall": {
|
| 1296 |
"Min": 75,
|
|
|
|
| 1427 |
"Med Resp": 1208.5
|
| 1428 |
}
|
| 1429 |
},
|
| 1430 |
+
"MiniMax-M2 (230B A10B)": {
|
| 1431 |
+
"Overall": {
|
| 1432 |
+
"Min": 64,
|
| 1433 |
+
"Max": 28729,
|
| 1434 |
+
"Med": 1142.0,
|
| 1435 |
+
"Med Resp": 325.0
|
| 1436 |
+
},
|
| 1437 |
+
"Content Generation": {
|
| 1438 |
+
"Min": 116,
|
| 1439 |
+
"Max": 16249,
|
| 1440 |
+
"Med": 1235.5,
|
| 1441 |
+
"Med Resp": 501.5
|
| 1442 |
+
},
|
| 1443 |
+
"Editing": {
|
| 1444 |
+
"Min": 111,
|
| 1445 |
+
"Max": 11557,
|
| 1446 |
+
"Med": 858.0,
|
| 1447 |
+
"Med Resp": 201.0
|
| 1448 |
+
},
|
| 1449 |
+
"Data Analysis": {
|
| 1450 |
+
"Min": 76,
|
| 1451 |
+
"Max": 18529,
|
| 1452 |
+
"Med": 834.0,
|
| 1453 |
+
"Med Resp": 170.0
|
| 1454 |
+
},
|
| 1455 |
+
"Reasoning": {
|
| 1456 |
+
"Min": 118,
|
| 1457 |
+
"Max": 18596,
|
| 1458 |
+
"Med": 1674.0,
|
| 1459 |
+
"Med Resp": 418.5
|
| 1460 |
+
},
|
| 1461 |
+
"Hallucination": {
|
| 1462 |
+
"Min": 92,
|
| 1463 |
+
"Max": 8617,
|
| 1464 |
+
"Med": 1130.0,
|
| 1465 |
+
"Med Resp": 436.0
|
| 1466 |
+
},
|
| 1467 |
+
"Safety": {
|
| 1468 |
+
"Min": 64,
|
| 1469 |
+
"Max": 5803,
|
| 1470 |
+
"Med": 563.0,
|
| 1471 |
+
"Med Resp": 176.0
|
| 1472 |
+
},
|
| 1473 |
+
"Repetition": {
|
| 1474 |
+
"Min": 175,
|
| 1475 |
+
"Max": 14147,
|
| 1476 |
+
"Med": 1054.5,
|
| 1477 |
+
"Med Resp": 259.0
|
| 1478 |
+
},
|
| 1479 |
+
"Summarization": {
|
| 1480 |
+
"Min": 135,
|
| 1481 |
+
"Max": 15849,
|
| 1482 |
+
"Med": 716.0,
|
| 1483 |
+
"Med Resp": 197.5
|
| 1484 |
+
},
|
| 1485 |
+
"Translation": {
|
| 1486 |
+
"Min": 216,
|
| 1487 |
+
"Max": 22260,
|
| 1488 |
+
"Med": 1133.0,
|
| 1489 |
+
"Med Resp": 297.5
|
| 1490 |
+
},
|
| 1491 |
+
"Multi-Turn": {
|
| 1492 |
+
"Min": 303,
|
| 1493 |
+
"Max": 28729,
|
| 1494 |
+
"Med": 3732.0,
|
| 1495 |
+
"Med Resp": 1424.0
|
| 1496 |
+
}
|
| 1497 |
+
},
|
| 1498 |
"gpt-oss-120B (Reasoning: medium)": {
|
| 1499 |
"Overall": {
|
| 1500 |
"Min": 43,
|
|
|
|
| 1835 |
"Med Resp": 1728.5
|
| 1836 |
}
|
| 1837 |
},
|
| 1838 |
+
"Grok-4": {
|
| 1839 |
+
"Overall": {
|
| 1840 |
+
"Min": -10,
|
| 1841 |
+
"Max": -2,
|
| 1842 |
+
"Med": -2.0,
|
| 1843 |
+
"Med Resp": -1.0
|
| 1844 |
+
},
|
| 1845 |
+
"Content Generation": {
|
| 1846 |
+
"Min": -2,
|
| 1847 |
+
"Max": -2,
|
| 1848 |
+
"Med": -2.0,
|
| 1849 |
+
"Med Resp": -1.0
|
| 1850 |
+
},
|
| 1851 |
+
"Editing": {
|
| 1852 |
+
"Min": -2,
|
| 1853 |
+
"Max": -2,
|
| 1854 |
+
"Med": -2.0,
|
| 1855 |
+
"Med Resp": -1.0
|
| 1856 |
+
},
|
| 1857 |
+
"Data Analysis": {
|
| 1858 |
+
"Min": -2,
|
| 1859 |
+
"Max": -2,
|
| 1860 |
+
"Med": -2.0,
|
| 1861 |
+
"Med Resp": -1.0
|
| 1862 |
+
},
|
| 1863 |
+
"Reasoning": {
|
| 1864 |
+
"Min": -2,
|
| 1865 |
+
"Max": -2,
|
| 1866 |
+
"Med": -2.0,
|
| 1867 |
+
"Med Resp": -1.0
|
| 1868 |
+
},
|
| 1869 |
+
"Hallucination": {
|
| 1870 |
+
"Min": -2,
|
| 1871 |
+
"Max": -2,
|
| 1872 |
+
"Med": -2.0,
|
| 1873 |
+
"Med Resp": -1.0
|
| 1874 |
+
},
|
| 1875 |
+
"Safety": {
|
| 1876 |
+
"Min": -2,
|
| 1877 |
+
"Max": -2,
|
| 1878 |
+
"Med": -2.0,
|
| 1879 |
+
"Med Resp": -1.0
|
| 1880 |
+
},
|
| 1881 |
+
"Repetition": {
|
| 1882 |
+
"Min": -2,
|
| 1883 |
+
"Max": -2,
|
| 1884 |
+
"Med": -2.0,
|
| 1885 |
+
"Med Resp": -1.0
|
| 1886 |
+
},
|
| 1887 |
+
"Summarization": {
|
| 1888 |
+
"Min": -2,
|
| 1889 |
+
"Max": -2,
|
| 1890 |
+
"Med": -2.0,
|
| 1891 |
+
"Med Resp": -1.0
|
| 1892 |
+
},
|
| 1893 |
+
"Translation": {
|
| 1894 |
+
"Min": -2,
|
| 1895 |
+
"Max": -2,
|
| 1896 |
+
"Med": -2.0,
|
| 1897 |
+
"Med Resp": -1.0
|
| 1898 |
+
},
|
| 1899 |
+
"Multi-Turn": {
|
| 1900 |
+
"Min": -10,
|
| 1901 |
+
"Max": -4,
|
| 1902 |
+
"Med": -6.0,
|
| 1903 |
+
"Med Resp": -3.0
|
| 1904 |
+
}
|
| 1905 |
+
},
|
| 1906 |
"Claude 4 Opus (20250514) (think)": {
|
| 1907 |
"Overall": {
|
| 1908 |
"Min": -10,
|
|
|
|
| 2311 |
"Med Resp": -3.0
|
| 2312 |
}
|
| 2313 |
},
|
| 2314 |
+
"GPT-5.1 (Reasoning: medium, verbosity: medium)": {
|
| 2315 |
+
"Overall": {
|
| 2316 |
+
"Min": -10,
|
| 2317 |
+
"Max": -2,
|
| 2318 |
+
"Med": -2.0,
|
| 2319 |
+
"Med Resp": -1.0
|
| 2320 |
+
},
|
| 2321 |
+
"Content Generation": {
|
| 2322 |
+
"Min": -2,
|
| 2323 |
+
"Max": -2,
|
| 2324 |
+
"Med": -2.0,
|
| 2325 |
+
"Med Resp": -1.0
|
| 2326 |
+
},
|
| 2327 |
+
"Editing": {
|
| 2328 |
+
"Min": -2,
|
| 2329 |
+
"Max": -2,
|
| 2330 |
+
"Med": -2.0,
|
| 2331 |
+
"Med Resp": -1.0
|
| 2332 |
+
},
|
| 2333 |
+
"Data Analysis": {
|
| 2334 |
+
"Min": -2,
|
| 2335 |
+
"Max": -2,
|
| 2336 |
+
"Med": -2.0,
|
| 2337 |
+
"Med Resp": -1.0
|
| 2338 |
+
},
|
| 2339 |
+
"Reasoning": {
|
| 2340 |
+
"Min": -2,
|
| 2341 |
+
"Max": -2,
|
| 2342 |
+
"Med": -2.0,
|
| 2343 |
+
"Med Resp": -1.0
|
| 2344 |
+
},
|
| 2345 |
+
"Hallucination": {
|
| 2346 |
+
"Min": -2,
|
| 2347 |
+
"Max": -2,
|
| 2348 |
+
"Med": -2.0,
|
| 2349 |
+
"Med Resp": -1.0
|
| 2350 |
+
},
|
| 2351 |
+
"Safety": {
|
| 2352 |
+
"Min": -2,
|
| 2353 |
+
"Max": -2,
|
| 2354 |
+
"Med": -2.0,
|
| 2355 |
+
"Med Resp": -1.0
|
| 2356 |
+
},
|
| 2357 |
+
"Repetition": {
|
| 2358 |
+
"Min": -2,
|
| 2359 |
+
"Max": -2,
|
| 2360 |
+
"Med": -2.0,
|
| 2361 |
+
"Med Resp": -1.0
|
| 2362 |
+
},
|
| 2363 |
+
"Summarization": {
|
| 2364 |
+
"Min": -2,
|
| 2365 |
+
"Max": -2,
|
| 2366 |
+
"Med": -2.0,
|
| 2367 |
+
"Med Resp": -1.0
|
| 2368 |
+
},
|
| 2369 |
+
"Translation": {
|
| 2370 |
+
"Min": -2,
|
| 2371 |
+
"Max": -2,
|
| 2372 |
+
"Med": -2.0,
|
| 2373 |
+
"Med Resp": -1.0
|
| 2374 |
+
},
|
| 2375 |
+
"Multi-Turn": {
|
| 2376 |
+
"Min": -10,
|
| 2377 |
+
"Max": -4,
|
| 2378 |
+
"Med": -6.0,
|
| 2379 |
+
"Med Resp": -3.0
|
| 2380 |
+
}
|
| 2381 |
+
},
|
| 2382 |
+
"KAT Dev 72B Exp": {
|
| 2383 |
+
"Overall": {
|
| 2384 |
+
"Min": 6,
|
| 2385 |
+
"Max": 65602,
|
| 2386 |
+
"Med": 397.0,
|
| 2387 |
+
"Med Resp": 397.0
|
| 2388 |
+
},
|
| 2389 |
+
"Content Generation": {
|
| 2390 |
+
"Min": 26,
|
| 2391 |
+
"Max": 65466,
|
| 2392 |
+
"Med": 554.5,
|
| 2393 |
+
"Med Resp": 554.5
|
| 2394 |
+
},
|
| 2395 |
+
"Editing": {
|
| 2396 |
+
"Min": 13,
|
| 2397 |
+
"Max": 65363,
|
| 2398 |
+
"Med": 223.0,
|
| 2399 |
+
"Med Resp": 223.0
|
| 2400 |
+
},
|
| 2401 |
+
"Data Analysis": {
|
| 2402 |
+
"Min": 21,
|
| 2403 |
+
"Max": 15350,
|
| 2404 |
+
"Med": 289.0,
|
| 2405 |
+
"Med Resp": 289.0
|
| 2406 |
+
},
|
| 2407 |
+
"Reasoning": {
|
| 2408 |
+
"Min": 10,
|
| 2409 |
+
"Max": 65442,
|
| 2410 |
+
"Med": 487.5,
|
| 2411 |
+
"Med Resp": 487.5
|
| 2412 |
+
},
|
| 2413 |
+
"Hallucination": {
|
| 2414 |
+
"Min": 24,
|
| 2415 |
+
"Max": 65455,
|
| 2416 |
+
"Med": 402.0,
|
| 2417 |
+
"Med Resp": 402.0
|
| 2418 |
+
},
|
| 2419 |
+
"Safety": {
|
| 2420 |
+
"Min": 17,
|
| 2421 |
+
"Max": 65474,
|
| 2422 |
+
"Med": 345.0,
|
| 2423 |
+
"Med Resp": 345.0
|
| 2424 |
+
},
|
| 2425 |
+
"Repetition": {
|
| 2426 |
+
"Min": 96,
|
| 2427 |
+
"Max": 65602,
|
| 2428 |
+
"Med": 405.0,
|
| 2429 |
+
"Med Resp": 405.0
|
| 2430 |
+
},
|
| 2431 |
+
"Summarization": {
|
| 2432 |
+
"Min": 39,
|
| 2433 |
+
"Max": 65376,
|
| 2434 |
+
"Med": 292.0,
|
| 2435 |
+
"Med Resp": 292.0
|
| 2436 |
+
},
|
| 2437 |
+
"Translation": {
|
| 2438 |
+
"Min": 10,
|
| 2439 |
+
"Max": 65331,
|
| 2440 |
+
"Med": 339.0,
|
| 2441 |
+
"Med Resp": 339.0
|
| 2442 |
+
},
|
| 2443 |
+
"Multi-Turn": {
|
| 2444 |
+
"Min": 6,
|
| 2445 |
+
"Max": 65466,
|
| 2446 |
+
"Med": 1083.5,
|
| 2447 |
+
"Med Resp": 1083.5
|
| 2448 |
+
}
|
| 2449 |
+
},
|
| 2450 |
"gpt-oss-20B (Reasoning: medium)": {
|
| 2451 |
"Overall": {
|
| 2452 |
"Min": 32,
|
|
|
|
| 2583 |
"Med Resp": -3.0
|
| 2584 |
}
|
| 2585 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2586 |
"Apriel 1.5 15B Thinker": {
|
| 2587 |
"Overall": {
|
| 2588 |
"Min": 118,
|
src/data/open/stats.csv
CHANGED
|
@@ -1,9 +1,12 @@
|
|
| 1 |
"Model Name" "Link" "Comment" "Group" "Med. Len." "Med. Resp. Len." "Time to First Answer Token" "End-to-End Response Time" "Speed" "Parameter Size (B)" "Type" "Model Type" "Think" "Overall" "Content Generation" "Editing" "Data Analysis" "Reasoning" "Hallucination" "Safety" "Repetition" "Summarization" "Translation" "Multi-Turn"
|
| 2 |
"GPT-5 (Reasoning: medium)" "https://platform.openai.com/docs/models/gpt-5" "Reasoning: medium" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "70.73" "71.0" "74.38" "76.49" "79.75" "64.94" "56.2" "82.86" "80.16" "69.38" "54.36"
|
| 3 |
"o3-pro (Reasoning: medium)" "https://platform.openai.com/docs/models/o3-pro" "Reasoning: medium" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "66.47" "72.5" "70.31" "75.7" "83.88" "64.37" "33.88" "74.29" "65.48" "64.33" "48.32"
|
|
|
|
|
|
|
| 4 |
"Claude 4 Opus (20250514) (think)" "https://www.anthropic.com/claude/opus" "version: 20250514" "Claude" "" "" "" "" "" "" "Proprietary" "Hybrid" "On" "63.29" "60.75" "59.69" "73.31" "69.83" "78.74" "53.72" "55.71" "65.48" "65.45" "48.99"
|
| 5 |
"Claude 4.1 Opus (20250805) (think)" "https://www.anthropic.com/claude/opus" "version: 20250805" "Claude" "" "" "" "" "" "" "Proprietary" "Hybrid" "On" "63.24" "61.25" "60.0" "78.49" "72.73" "77.01" "56.2" "57.14" "61.9" "62.64" "46.98"
|
| 6 |
"GPT-5 mini (Reasoning: medium)" "https://platform.openai.com/docs/models/gpt-5-mini" "Reasoning: medium" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "62.56" "68.0" "62.5" "74.9" "76.86" "55.17" "47.93" "44.29" "74.6" "56.18" "45.3"
|
|
|
|
| 7 |
"Claude 4 Sonnet (20250514) (think)" "https://www.anthropic.com/claude/sonnet" "version: 20250514" "Claude" "" "" "" "" "" "" "Proprietary" "Hybrid" "On" "61.8" "58.0" "58.44" "76.49" "67.77" "79.31" "57.02" "44.29" "65.08" "62.92" "44.97"
|
| 8 |
"o3" "https://platform.openai.com/docs/models/o3" "" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "60.91" "68.75" "60.0" "73.31" "79.34" "54.02" "34.71" "64.29" "60.71" "55.06" "46.98"
|
| 9 |
"Gemini 2.5 Pro" "https://deepmind.google/models/gemini/pro/" "" "Gemini" "" "" "" "" "" "" "Proprietary" "Think" "On" "59.34" "54.0" "60.94" "78.88" "73.14" "63.22" "17.36" "52.86" "67.86" "53.93" "52.68"
|
|
@@ -41,6 +44,8 @@ top-p: 0.95" "DeepSeek" "408.0" "408.0" "0.211452841758728" "23.47111320495605"
|
|
| 41 |
top-p: 0.95" "Qwen" "1113.0" "390.0" "27.26490248867746" "39.635579228401184" "37.74973909656839" "32.8" "Open" "Hybrid" "On" "44.44" "52.25" "41.56" "68.92" "66.53" "35.06" "19.83" "25.71" "46.43" "30.9" "32.89"
|
| 42 |
"Qwen3 30B A3B Instruct 2507" "https://huggingface.co/Qwen/Qwen3-30B-A3B-Instruct-2507" "temperature: 0.7
|
| 43 |
top-p: 0.8" "Qwen" "441.5" "441.5" "7.902002811431885" "19.310550212860107" "42.44958664990833" "30.0" "Open" "Instruct" "Off" "42.79" "45.0" "35.0" "56.18" "66.12" "51.15" "33.06" "24.29" "46.83" "28.09" "35.57"
|
|
|
|
|
|
|
| 44 |
"A.X 4.0" "https://huggingface.co/skt/A.X-4.0" "" "SKT" "412.5" "412.5" "0.6553128957748413" "7.924791574478149" "57.95526130360478" "71.9" "Open" "Instruct" "Off" "41.59" "56.0" "43.75" "43.43" "42.56" "40.23" "15.7" "24.29" "53.97" "33.43" "32.21"
|
| 45 |
"gpt-oss-20B (Reasoning: medium)" "https://huggingface.co/openai/gpt-oss-20b" "Reasoning: medium
|
| 46 |
temperature: 1.0
|
|
@@ -53,6 +58,11 @@ top-p: 0.95" "Alibaba" "1147.0" "408.0" "45.23295979184195" "52.38741266727448"
|
|
| 53 |
top-p: 0.95" "mistralai" "369.0" "369.0" "3.2450859546661377" "13.907460689544678" "36.382163796915904" "24.0" "Open" "Instruct" "Off" "39.09" "43.0" "44.69" "43.43" "51.65" "25.86" "22.31" "25.71" "51.98" "31.18" "30.2"
|
| 54 |
"K2-Think" "https://huggingface.co/LLM360/K2-Think" "temperature: 1.0
|
| 55 |
top-p: 0.95" "LLM360" "1835.0" "486.0" "24.29692639716904" "43.2994556427002" "42.72123101353567" "32.8" "Open" "Think" "On" "35.06" "35.5" "36.56" "56.18" "47.11" "35.06" "14.05" "12.86" "49.21" "21.63" "23.15"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
"EXAONE 4.0 32B (think)" "https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B" "temperature: 0.6
|
| 57 |
top-p: 0.95" "Exaone" "1274.5" "503.0" "40.64476558326666" "52.11687910556793" "51.19312170664125" "32.0" "Open" "Hybrid" "On" "33.82" "34.25" "29.38" "56.97" "57.44" "24.71" "27.27" "17.14" "38.49" "18.54" "25.5"
|
| 58 |
"Apriel 1.5 15B Thinker" "https://huggingface.co/ServiceNow-AI/Apriel-1.5-15b-Thinker" "temperature: 0.6
|
|
|
|
| 1 |
"Model Name" "Link" "Comment" "Group" "Med. Len." "Med. Resp. Len." "Time to First Answer Token" "End-to-End Response Time" "Speed" "Parameter Size (B)" "Type" "Model Type" "Think" "Overall" "Content Generation" "Editing" "Data Analysis" "Reasoning" "Hallucination" "Safety" "Repetition" "Summarization" "Translation" "Multi-Turn"
|
| 2 |
"GPT-5 (Reasoning: medium)" "https://platform.openai.com/docs/models/gpt-5" "Reasoning: medium" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "70.73" "71.0" "74.38" "76.49" "79.75" "64.94" "56.2" "82.86" "80.16" "69.38" "54.36"
|
| 3 |
"o3-pro (Reasoning: medium)" "https://platform.openai.com/docs/models/o3-pro" "Reasoning: medium" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "66.47" "72.5" "70.31" "75.7" "83.88" "64.37" "33.88" "74.29" "65.48" "64.33" "48.32"
|
| 4 |
+
"GPT-5.1 (Reasoning: medium, verbosity: medium)" "https://platform.openai.com/docs/models/gpt-5.1" "Reasoning: medium, verbosity: medium" "GPT" "" "" "" "11.673096776008606" "" "" "Proprietary" "Think" "On" "64.57" "67.0" "70.0" "72.51" "82.64" "65.52" "52.07" "51.43" "67.06" "59.55" "45.64"
|
| 5 |
+
"Claude 4.5 Opus (think)" "https://www.anthropic.com/claude/opus" "" "Claude" "" "" "" "" "" "" "Proprietary" "Hybrid" "On" "63.41" "63.5" "62.5" "73.71" "77.69" "82.76" "52.89" "58.57" "63.49" "56.74" "45.97"
|
| 6 |
"Claude 4 Opus (20250514) (think)" "https://www.anthropic.com/claude/opus" "version: 20250514" "Claude" "" "" "" "" "" "" "Proprietary" "Hybrid" "On" "63.29" "60.75" "59.69" "73.31" "69.83" "78.74" "53.72" "55.71" "65.48" "65.45" "48.99"
|
| 7 |
"Claude 4.1 Opus (20250805) (think)" "https://www.anthropic.com/claude/opus" "version: 20250805" "Claude" "" "" "" "" "" "" "Proprietary" "Hybrid" "On" "63.24" "61.25" "60.0" "78.49" "72.73" "77.01" "56.2" "57.14" "61.9" "62.64" "46.98"
|
| 8 |
"GPT-5 mini (Reasoning: medium)" "https://platform.openai.com/docs/models/gpt-5-mini" "Reasoning: medium" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "62.56" "68.0" "62.5" "74.9" "76.86" "55.17" "47.93" "44.29" "74.6" "56.18" "45.3"
|
| 9 |
+
"Gemini 3 Pro Preview (Thinking Level: High)" "" "" "Gemini" "1930.5" "378.0" "" "27.89457416534424" "" "" "Open" "Think" "On" "62.48" "59.5" "64.38" "76.49" "78.93" "70.69" "39.67" "65.71" "61.51" "58.15" "48.99"
|
| 10 |
"Claude 4 Sonnet (20250514) (think)" "https://www.anthropic.com/claude/sonnet" "version: 20250514" "Claude" "" "" "" "" "" "" "Proprietary" "Hybrid" "On" "61.8" "58.0" "58.44" "76.49" "67.77" "79.31" "57.02" "44.29" "65.08" "62.92" "44.97"
|
| 11 |
"o3" "https://platform.openai.com/docs/models/o3" "" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "60.91" "68.75" "60.0" "73.31" "79.34" "54.02" "34.71" "64.29" "60.71" "55.06" "46.98"
|
| 12 |
"Gemini 2.5 Pro" "https://deepmind.google/models/gemini/pro/" "" "Gemini" "" "" "" "" "" "" "Proprietary" "Think" "On" "59.34" "54.0" "60.94" "78.88" "73.14" "63.22" "17.36" "52.86" "67.86" "53.93" "52.68"
|
|
|
|
| 44 |
top-p: 0.95" "Qwen" "1113.0" "390.0" "27.26490248867746" "39.635579228401184" "37.74973909656839" "32.8" "Open" "Hybrid" "On" "44.44" "52.25" "41.56" "68.92" "66.53" "35.06" "19.83" "25.71" "46.43" "30.9" "32.89"
|
| 45 |
"Qwen3 30B A3B Instruct 2507" "https://huggingface.co/Qwen/Qwen3-30B-A3B-Instruct-2507" "temperature: 0.7
|
| 46 |
top-p: 0.8" "Qwen" "441.5" "441.5" "7.902002811431885" "19.310550212860107" "42.44958664990833" "30.0" "Open" "Instruct" "Off" "42.79" "45.0" "35.0" "56.18" "66.12" "51.15" "33.06" "24.29" "46.83" "28.09" "35.57"
|
| 47 |
+
"MiniMax-M2 (230B A10B)" "https://huggingface.co/MiniMaxAI/MiniMax-M2" "temperature:1.0
|
| 48 |
+
top-p: 0.95" "MiniMaxAI" "1142.0" "325.0" "" "" "" "230.0" "Open" "Think" "On" "42.43" "48.75" "35.62" "53.39" "57.02" "43.1" "44.63" "28.57" "49.21" "30.06" "31.21"
|
| 49 |
"A.X 4.0" "https://huggingface.co/skt/A.X-4.0" "" "SKT" "412.5" "412.5" "0.6553128957748413" "7.924791574478149" "57.95526130360478" "71.9" "Open" "Instruct" "Off" "41.59" "56.0" "43.75" "43.43" "42.56" "40.23" "15.7" "24.29" "53.97" "33.43" "32.21"
|
| 50 |
"gpt-oss-20B (Reasoning: medium)" "https://huggingface.co/openai/gpt-oss-20b" "Reasoning: medium
|
| 51 |
temperature: 1.0
|
|
|
|
| 58 |
top-p: 0.95" "mistralai" "369.0" "369.0" "3.2450859546661377" "13.907460689544678" "36.382163796915904" "24.0" "Open" "Instruct" "Off" "39.09" "43.0" "44.69" "43.43" "51.65" "25.86" "22.31" "25.71" "51.98" "31.18" "30.2"
|
| 59 |
"K2-Think" "https://huggingface.co/LLM360/K2-Think" "temperature: 1.0
|
| 60 |
top-p: 0.95" "LLM360" "1835.0" "486.0" "24.29692639716904" "43.2994556427002" "42.72123101353567" "32.8" "Open" "Think" "On" "35.06" "35.5" "36.56" "56.18" "47.11" "35.06" "14.05" "12.86" "49.21" "21.63" "23.15"
|
| 61 |
+
"KAT Dev 72B Exp" "https://huggingface.co/Kwaipilot/KAT-Dev-72B-Exp" "temperature:0.6
|
| 62 |
+
top-p: 0.95" "KAT" "397.0" "397.0" "0.0622165203094482" "8.492375493049622" "50.601864763867184" "72.0" "Open" "Instruct" "Off" "33.94" "29.25" "44.06" "46.22" "46.69" "25.86" "18.18" "20.0" "42.86" "25.56" "25.5"
|
| 63 |
+
"Olmo 3 32B Think" "https://huggingface.co/allenai/Olmo-3-32B-Think" "temperature: 1
|
| 64 |
+
top-p: 0.95
|
| 65 |
+
top-k: 50" "allenai" "3360.5" "473.0" "60.18788400716624" "77.51256728172302" "44.30514641537086" "32.0" "Open" "Think" "On" "33.94" "35.25" "30.94" "57.37" "66.53" "33.33" "28.93" "24.29" "34.52" "11.8" "19.8"
|
| 66 |
"EXAONE 4.0 32B (think)" "https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B" "temperature: 0.6
|
| 67 |
top-p: 0.95" "Exaone" "1274.5" "503.0" "40.64476558326666" "52.11687910556793" "51.19312170664125" "32.0" "Open" "Hybrid" "On" "33.82" "34.25" "29.38" "56.97" "57.44" "24.71" "27.27" "17.14" "38.49" "18.54" "25.5"
|
| 68 |
"Apriel 1.5 15B Thinker" "https://huggingface.co/ServiceNow-AI/Apriel-1.5-15b-Thinker" "temperature: 0.6
|
src/data/open/stats_lang.csv
CHANGED
|
@@ -1,9 +1,12 @@
|
|
| 1 |
"Model Name" "Link" "Comment" "Group" "Med. Len." "Med. Resp. Len." "Time to First Answer Token" "End-to-End Response Time" "Speed" "Parameter Size (B)" "Type" "Model Type" "Think" "Overall" "KO" "EN" "JA" "ZH" "PL" "DE" "PT" "ES" "FR" "IT" "RU" "VI"
|
| 2 |
"GPT-5 (Reasoning: medium)" "https://platform.openai.com/docs/models/gpt-5" "Reasoning: medium" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "70.73" "64.72" "65.83" "71.69" "67.68" "72.78" "71.27" "73.74" "75.68" "72.83" "77.05" "70.79" "75.61"
|
| 3 |
"o3-pro (Reasoning: medium)" "https://platform.openai.com/docs/models/o3-pro" "Reasoning: medium" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "66.47" "63.61" "63.61" "69.28" "65.24" "63.89" "64.09" "68.16" "69.19" "70.11" "72.13" "62.36" "71.95"
|
|
|
|
|
|
|
| 4 |
"Claude 4 Opus (20250514) (think)" "https://www.anthropic.com/claude/opus" "version: 20250514" "Claude" "" "" "" "" "" "" "Proprietary" "Hybrid" "On" "63.29" "57.5" "62.5" "64.46" "62.8" "59.44" "65.19" "65.92" "60.54" "65.22" "65.57" "65.17" "72.56"
|
| 5 |
"Claude 4.1 Opus (20250805) (think)" "https://www.anthropic.com/claude/opus" "version: 20250805" "Claude" "" "" "" "" "" "" "Proprietary" "Hybrid" "On" "63.24" "58.33" "61.39" "60.84" "64.02" "61.67" "66.85" "68.16" "61.08" "65.76" "66.67" "65.73" "65.24"
|
| 6 |
"GPT-5 mini (Reasoning: medium)" "https://platform.openai.com/docs/models/gpt-5-mini" "Reasoning: medium" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "62.56" "57.5" "56.39" "62.65" "62.2" "63.89" "60.22" "66.48" "67.03" "70.11" "67.76" "66.29" "60.98"
|
|
|
|
| 7 |
"Claude 4 Sonnet (20250514) (think)" "https://www.anthropic.com/claude/sonnet" "version: 20250514" "Claude" "" "" "" "" "" "" "Proprietary" "Hybrid" "On" "61.8" "54.17" "59.17" "63.86" "64.63" "59.44" "61.33" "64.8" "62.16" "65.22" "67.21" "66.29" "64.02"
|
| 8 |
"o3" "https://platform.openai.com/docs/models/o3" "" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "60.91" "57.5" "59.17" "61.45" "58.54" "61.11" "64.09" "60.89" "62.16" "63.59" "65.03" "54.49" "68.29"
|
| 9 |
"Gemini 2.5 Pro" "https://deepmind.google/models/gemini/pro/" "" "Gemini" "" "" "" "" "" "" "Proprietary" "Think" "On" "59.34" "53.61" "57.78" "59.04" "57.93" "57.22" "56.91" "60.89" "63.24" "67.93" "62.3" "61.24" "60.98"
|
|
@@ -41,6 +44,8 @@ top-p: 0.95" "DeepSeek" "408.0" "408.0" "0.211452841758728" "23.47111320495605"
|
|
| 41 |
top-p: 0.95" "Qwen" "1113.0" "390.0" "27.26490248867746" "39.635579228401184" "37.74973909656839" "32.8" "Open" "Hybrid" "On" "44.44" "38.89" "41.67" "48.8" "50.0" "38.33" "46.41" "44.69" "44.86" "44.57" "50.82" "46.07" "47.56"
|
| 42 |
"Qwen3 30B A3B Instruct 2507" "https://huggingface.co/Qwen/Qwen3-30B-A3B-Instruct-2507" "temperature: 0.7
|
| 43 |
top-p: 0.8" "Qwen" "441.5" "441.5" "7.902002811431885" "19.310550212860107" "42.44958664990833" "30.0" "Open" "Instruct" "Off" "42.79" "34.44" "43.89" "40.96" "48.78" "38.89" "41.99" "46.93" "44.32" "42.93" "48.09" "43.26" "46.95"
|
|
|
|
|
|
|
| 44 |
"A.X 4.0" "https://huggingface.co/skt/A.X-4.0" "" "SKT" "412.5" "412.5" "0.6553128957748413" "7.924791574478149" "57.95526130360478" "71.9" "Open" "Instruct" "Off" "41.59" "38.89" "41.11" "43.98" "49.39" "36.11" "45.86" "43.58" "44.32" "39.67" "43.17" "39.89" "36.59"
|
| 45 |
"gpt-oss-20B (Reasoning: medium)" "https://huggingface.co/openai/gpt-oss-20b" "Reasoning: medium
|
| 46 |
temperature: 1.0
|
|
@@ -53,6 +58,11 @@ top-p: 0.95" "Alibaba" "1147.0" "408.0" "45.23295979184195" "52.38741266727448"
|
|
| 53 |
top-p: 0.95" "mistralai" "369.0" "369.0" "3.2450859546661377" "13.907460689544678" "36.382163796915904" "24.0" "Open" "Instruct" "Off" "39.09" "31.39" "40.0" "36.75" "42.07" "34.44" "44.2" "41.9" "42.16" "45.65" "40.98" "37.64" "38.41"
|
| 54 |
"K2-Think" "https://huggingface.co/LLM360/K2-Think" "temperature: 1.0
|
| 55 |
top-p: 0.95" "LLM360" "1835.0" "486.0" "24.29692639716904" "43.2994556427002" "42.72123101353567" "32.8" "Open" "Think" "On" "35.06" "29.17" "36.11" "30.12" "44.51" "26.67" "33.15" "38.55" "37.84" "41.85" "37.7" "33.71" "36.59"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
"EXAONE 4.0 32B (think)" "https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B" "temperature: 0.6
|
| 57 |
top-p: 0.95" "Exaone" "1274.5" "503.0" "40.64476558326666" "52.11687910556793" "51.19312170664125" "32.0" "Open" "Hybrid" "On" "33.82" "33.61" "38.33" "28.92" "35.98" "26.11" "35.91" "34.08" "38.92" "35.33" "33.88" "28.09" "31.71"
|
| 58 |
"Apriel 1.5 15B Thinker" "https://huggingface.co/ServiceNow-AI/Apriel-1.5-15b-Thinker" "temperature: 0.6
|
|
|
|
| 1 |
"Model Name" "Link" "Comment" "Group" "Med. Len." "Med. Resp. Len." "Time to First Answer Token" "End-to-End Response Time" "Speed" "Parameter Size (B)" "Type" "Model Type" "Think" "Overall" "KO" "EN" "JA" "ZH" "PL" "DE" "PT" "ES" "FR" "IT" "RU" "VI"
|
| 2 |
"GPT-5 (Reasoning: medium)" "https://platform.openai.com/docs/models/gpt-5" "Reasoning: medium" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "70.73" "64.72" "65.83" "71.69" "67.68" "72.78" "71.27" "73.74" "75.68" "72.83" "77.05" "70.79" "75.61"
|
| 3 |
"o3-pro (Reasoning: medium)" "https://platform.openai.com/docs/models/o3-pro" "Reasoning: medium" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "66.47" "63.61" "63.61" "69.28" "65.24" "63.89" "64.09" "68.16" "69.19" "70.11" "72.13" "62.36" "71.95"
|
| 4 |
+
"GPT-5.1 (Reasoning: medium, verbosity: medium)" "https://platform.openai.com/docs/models/gpt-5.1" "Reasoning: medium, verbosity: medium" "GPT" "" "" "" "11.673096776008606" "" "" "Proprietary" "Think" "On" "64.57" "57.78" "62.5" "65.06" "62.8" "65.56" "60.22" "65.36" "68.11" "74.46" "70.49" "67.42" "63.41"
|
| 5 |
+
"Claude 4.5 Opus (think)" "https://www.anthropic.com/claude/opus" "" "Claude" "" "" "" "" "" "" "Proprietary" "Hybrid" "On" "63.41" "59.44" "60.28" "66.27" "64.02" "66.67" "65.19" "63.69" "62.16" "63.59" "64.48" "65.73" "67.07"
|
| 6 |
"Claude 4 Opus (20250514) (think)" "https://www.anthropic.com/claude/opus" "version: 20250514" "Claude" "" "" "" "" "" "" "Proprietary" "Hybrid" "On" "63.29" "57.5" "62.5" "64.46" "62.8" "59.44" "65.19" "65.92" "60.54" "65.22" "65.57" "65.17" "72.56"
|
| 7 |
"Claude 4.1 Opus (20250805) (think)" "https://www.anthropic.com/claude/opus" "version: 20250805" "Claude" "" "" "" "" "" "" "Proprietary" "Hybrid" "On" "63.24" "58.33" "61.39" "60.84" "64.02" "61.67" "66.85" "68.16" "61.08" "65.76" "66.67" "65.73" "65.24"
|
| 8 |
"GPT-5 mini (Reasoning: medium)" "https://platform.openai.com/docs/models/gpt-5-mini" "Reasoning: medium" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "62.56" "57.5" "56.39" "62.65" "62.2" "63.89" "60.22" "66.48" "67.03" "70.11" "67.76" "66.29" "60.98"
|
| 9 |
+
"Gemini 3 Pro Preview (Thinking Level: High)" "" "" "Gemini" "1930.5" "378.0" "" "27.89457416534424" "" "" "Open" "Think" "On" "62.48" "59.44" "60.56" "60.24" "62.2" "61.67" "65.19" "63.13" "64.32" "65.76" "65.57" "64.04" "62.2"
|
| 10 |
"Claude 4 Sonnet (20250514) (think)" "https://www.anthropic.com/claude/sonnet" "version: 20250514" "Claude" "" "" "" "" "" "" "Proprietary" "Hybrid" "On" "61.8" "54.17" "59.17" "63.86" "64.63" "59.44" "61.33" "64.8" "62.16" "65.22" "67.21" "66.29" "64.02"
|
| 11 |
"o3" "https://platform.openai.com/docs/models/o3" "" "GPT" "" "" "" "" "" "" "Proprietary" "Think" "On" "60.91" "57.5" "59.17" "61.45" "58.54" "61.11" "64.09" "60.89" "62.16" "63.59" "65.03" "54.49" "68.29"
|
| 12 |
"Gemini 2.5 Pro" "https://deepmind.google/models/gemini/pro/" "" "Gemini" "" "" "" "" "" "" "Proprietary" "Think" "On" "59.34" "53.61" "57.78" "59.04" "57.93" "57.22" "56.91" "60.89" "63.24" "67.93" "62.3" "61.24" "60.98"
|
|
|
|
| 44 |
top-p: 0.95" "Qwen" "1113.0" "390.0" "27.26490248867746" "39.635579228401184" "37.74973909656839" "32.8" "Open" "Hybrid" "On" "44.44" "38.89" "41.67" "48.8" "50.0" "38.33" "46.41" "44.69" "44.86" "44.57" "50.82" "46.07" "47.56"
|
| 45 |
"Qwen3 30B A3B Instruct 2507" "https://huggingface.co/Qwen/Qwen3-30B-A3B-Instruct-2507" "temperature: 0.7
|
| 46 |
top-p: 0.8" "Qwen" "441.5" "441.5" "7.902002811431885" "19.310550212860107" "42.44958664990833" "30.0" "Open" "Instruct" "Off" "42.79" "34.44" "43.89" "40.96" "48.78" "38.89" "41.99" "46.93" "44.32" "42.93" "48.09" "43.26" "46.95"
|
| 47 |
+
"MiniMax-M2 (230B A10B)" "https://huggingface.co/MiniMaxAI/MiniMax-M2" "temperature:1.0
|
| 48 |
+
top-p: 0.95" "MiniMaxAI" "1142.0" "325.0" "" "" "" "230.0" "Open" "Think" "On" "42.43" "31.94" "46.11" "37.35" "45.73" "38.33" "45.3" "45.25" "48.65" "41.3" "46.45" "42.7" "46.95"
|
| 49 |
"A.X 4.0" "https://huggingface.co/skt/A.X-4.0" "" "SKT" "412.5" "412.5" "0.6553128957748413" "7.924791574478149" "57.95526130360478" "71.9" "Open" "Instruct" "Off" "41.59" "38.89" "41.11" "43.98" "49.39" "36.11" "45.86" "43.58" "44.32" "39.67" "43.17" "39.89" "36.59"
|
| 50 |
"gpt-oss-20B (Reasoning: medium)" "https://huggingface.co/openai/gpt-oss-20b" "Reasoning: medium
|
| 51 |
temperature: 1.0
|
|
|
|
| 58 |
top-p: 0.95" "mistralai" "369.0" "369.0" "3.2450859546661377" "13.907460689544678" "36.382163796915904" "24.0" "Open" "Instruct" "Off" "39.09" "31.39" "40.0" "36.75" "42.07" "34.44" "44.2" "41.9" "42.16" "45.65" "40.98" "37.64" "38.41"
|
| 59 |
"K2-Think" "https://huggingface.co/LLM360/K2-Think" "temperature: 1.0
|
| 60 |
top-p: 0.95" "LLM360" "1835.0" "486.0" "24.29692639716904" "43.2994556427002" "42.72123101353567" "32.8" "Open" "Think" "On" "35.06" "29.17" "36.11" "30.12" "44.51" "26.67" "33.15" "38.55" "37.84" "41.85" "37.7" "33.71" "36.59"
|
| 61 |
+
"KAT Dev 72B Exp" "https://huggingface.co/Kwaipilot/KAT-Dev-72B-Exp" "temperature:0.6
|
| 62 |
+
top-p: 0.95" "KAT" "397.0" "397.0" "0.0622165203094482" "8.492375493049622" "50.601864763867184" "72.0" "Open" "Instruct" "Off" "33.94" "25.0" "32.22" "31.93" "37.2" "34.44" "33.15" "43.02" "37.84" "36.96" "37.7" "30.34" "38.41"
|
| 63 |
+
"Olmo 3 32B Think" "https://huggingface.co/allenai/Olmo-3-32B-Think" "temperature: 1
|
| 64 |
+
top-p: 0.95
|
| 65 |
+
top-k: 50" "allenai" "3360.5" "473.0" "60.18788400716624" "77.51256728172302" "44.30514641537086" "32.0" "Open" "Think" "On" "33.94" "30.56" "41.39" "30.12" "31.1" "25.0" "34.25" "35.75" "33.51" "36.41" "37.16" "31.46" "35.98"
|
| 66 |
"EXAONE 4.0 32B (think)" "https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B" "temperature: 0.6
|
| 67 |
top-p: 0.95" "Exaone" "1274.5" "503.0" "40.64476558326666" "52.11687910556793" "51.19312170664125" "32.0" "Open" "Hybrid" "On" "33.82" "33.61" "38.33" "28.92" "35.98" "26.11" "35.91" "34.08" "38.92" "35.33" "33.88" "28.09" "31.71"
|
| 68 |
"Apriel 1.5 15B Thinker" "https://huggingface.co/ServiceNow-AI/Apriel-1.5-15b-Thinker" "temperature: 0.6
|
src/data/open/time_data.json
CHANGED
|
@@ -1,4 +1,194 @@
|
|
| 1 |
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
"Claude 4.1 Opus (20250805) (think)": {
|
| 3 |
"NUM_GPUS": 0,
|
| 4 |
"Overall": {
|
|
@@ -1329,6 +1519,240 @@
|
|
| 1329 |
}
|
| 1330 |
}
|
| 1331 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1332 |
"Solar Pro Preview (top_p:0.95, temp: 0.7)": {
|
| 1333 |
"NUM_GPUS": 1,
|
| 1334 |
"Overall": {
|
|
@@ -3039,37 +3463,227 @@
|
|
| 3039 |
}
|
| 3040 |
}
|
| 3041 |
},
|
| 3042 |
-
"
|
| 3043 |
-
"NUM_GPUS":
|
| 3044 |
"Overall": {
|
| 3045 |
"Time to Answer": {
|
| 3046 |
-
"Min": 0
|
| 3047 |
-
"Max":
|
| 3048 |
-
"Med":
|
| 3049 |
},
|
| 3050 |
"Latency": {
|
| 3051 |
-
"Min":
|
| 3052 |
-
"Max":
|
| 3053 |
-
"Med":
|
| 3054 |
},
|
| 3055 |
"Speed": {
|
| 3056 |
-
"Min":
|
| 3057 |
-
"Max":
|
| 3058 |
-
"Med":
|
| 3059 |
}
|
| 3060 |
},
|
| 3061 |
"Content Generation": {
|
| 3062 |
"Time to Answer": {
|
| 3063 |
-
"Min":
|
| 3064 |
-
"Max":
|
| 3065 |
-
"Med":
|
| 3066 |
},
|
| 3067 |
"Latency": {
|
| 3068 |
-
"Min":
|
| 3069 |
-
"Max":
|
| 3070 |
-
"Med":
|
| 3071 |
-
},
|
| 3072 |
-
"Speed": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3073 |
"Min": 19.45536289987673,
|
| 3074 |
"Max": 27.213499913336133,
|
| 3075 |
"Med": 23.29757774645036
|
|
@@ -3419,105 +4033,295 @@
|
|
| 3419 |
}
|
| 3420 |
}
|
| 3421 |
},
|
| 3422 |
-
"
|
| 3423 |
-
"NUM_GPUS":
|
| 3424 |
"Overall": {
|
| 3425 |
"Time to Answer": {
|
| 3426 |
"Min": 0,
|
| 3427 |
-
"Max":
|
| 3428 |
-
"Med":
|
| 3429 |
},
|
| 3430 |
"Latency": {
|
| 3431 |
"Min": 0,
|
| 3432 |
-
"Max":
|
| 3433 |
-
"Med":
|
| 3434 |
},
|
| 3435 |
"Speed": {
|
| 3436 |
"Min": -1.0,
|
| 3437 |
-
"Max":
|
| 3438 |
-
"Med":
|
| 3439 |
}
|
| 3440 |
},
|
| 3441 |
"Content Generation": {
|
| 3442 |
"Time to Answer": {
|
| 3443 |
-
"Min":
|
| 3444 |
-
"Max":
|
| 3445 |
-
"Med":
|
| 3446 |
},
|
| 3447 |
"Latency": {
|
| 3448 |
-
"Min":
|
| 3449 |
-
"Max":
|
| 3450 |
-
"Med":
|
| 3451 |
},
|
| 3452 |
"Speed": {
|
| 3453 |
-
"Min":
|
| 3454 |
-
"Max":
|
| 3455 |
-
"Med":
|
| 3456 |
}
|
| 3457 |
},
|
| 3458 |
"Editing": {
|
| 3459 |
"Time to Answer": {
|
| 3460 |
"Min": 0,
|
| 3461 |
-
"Max":
|
| 3462 |
-
"Med":
|
| 3463 |
},
|
| 3464 |
"Latency": {
|
| 3465 |
"Min": 0,
|
| 3466 |
-
"Max":
|
| 3467 |
-
"Med":
|
| 3468 |
},
|
| 3469 |
"Speed": {
|
| 3470 |
"Min": -1.0,
|
| 3471 |
-
"Max":
|
| 3472 |
-
"Med":
|
| 3473 |
}
|
| 3474 |
},
|
| 3475 |
"Data Analysis": {
|
| 3476 |
"Time to Answer": {
|
| 3477 |
-
"Min":
|
| 3478 |
-
"Max":
|
| 3479 |
-
"Med":
|
| 3480 |
},
|
| 3481 |
"Latency": {
|
| 3482 |
-
"Min":
|
| 3483 |
-
"Max":
|
| 3484 |
-
"Med":
|
| 3485 |
},
|
| 3486 |
"Speed": {
|
| 3487 |
-
"Min":
|
| 3488 |
-
"Max":
|
| 3489 |
-
"Med":
|
| 3490 |
}
|
| 3491 |
},
|
| 3492 |
"Reasoning": {
|
| 3493 |
"Time to Answer": {
|
| 3494 |
-
"Min":
|
| 3495 |
-
"Max":
|
| 3496 |
-
"Med":
|
| 3497 |
},
|
| 3498 |
"Latency": {
|
| 3499 |
-
"Min":
|
| 3500 |
-
"Max":
|
| 3501 |
-
"Med":
|
| 3502 |
},
|
| 3503 |
"Speed": {
|
| 3504 |
-
"Min":
|
| 3505 |
-
"Max":
|
| 3506 |
-
"Med":
|
| 3507 |
}
|
| 3508 |
},
|
| 3509 |
"Hallucination": {
|
| 3510 |
"Time to Answer": {
|
| 3511 |
-
"Min":
|
| 3512 |
-
"Max":
|
| 3513 |
-
"Med":
|
| 3514 |
},
|
| 3515 |
"Latency": {
|
| 3516 |
-
"Min":
|
| 3517 |
-
"Max":
|
| 3518 |
-
"Med":
|
| 3519 |
-
},
|
| 3520 |
-
"Speed": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3521 |
"Min": 36.912669174553955,
|
| 3522 |
"Max": 207.27878278068064,
|
| 3523 |
"Med": 124.86484051787805
|
|
@@ -4369,7 +5173,7 @@
|
|
| 4369 |
}
|
| 4370 |
}
|
| 4371 |
},
|
| 4372 |
-
"
|
| 4373 |
"NUM_GPUS": 0,
|
| 4374 |
"Overall": {
|
| 4375 |
"Time to Answer": {
|
|
@@ -4559,7 +5363,7 @@
|
|
| 4559 |
}
|
| 4560 |
}
|
| 4561 |
},
|
| 4562 |
-
"
|
| 4563 |
"NUM_GPUS": 0,
|
| 4564 |
"Overall": {
|
| 4565 |
"Time to Answer": {
|
|
@@ -4749,71 +5553,261 @@
|
|
| 4749 |
}
|
| 4750 |
}
|
| 4751 |
},
|
| 4752 |
-
"
|
| 4753 |
-
"NUM_GPUS":
|
| 4754 |
"Overall": {
|
| 4755 |
"Time to Answer": {
|
| 4756 |
-
"Min":
|
| 4757 |
-
"Max":
|
| 4758 |
-
"Med":
|
| 4759 |
},
|
| 4760 |
"Latency": {
|
| 4761 |
-
"Min":
|
| 4762 |
-
"Max":
|
| 4763 |
-
"Med":
|
| 4764 |
},
|
| 4765 |
"Speed": {
|
| 4766 |
-
"Min":
|
| 4767 |
-
"Max":
|
| 4768 |
-
"Med":
|
| 4769 |
}
|
| 4770 |
},
|
| 4771 |
"Content Generation": {
|
| 4772 |
"Time to Answer": {
|
| 4773 |
-
"Min":
|
| 4774 |
-
"Max":
|
| 4775 |
-
"Med":
|
| 4776 |
},
|
| 4777 |
"Latency": {
|
| 4778 |
-
"Min":
|
| 4779 |
-
"Max":
|
| 4780 |
-
"Med":
|
| 4781 |
},
|
| 4782 |
"Speed": {
|
| 4783 |
-
"Min":
|
| 4784 |
-
"Max":
|
| 4785 |
-
"Med":
|
| 4786 |
}
|
| 4787 |
},
|
| 4788 |
"Editing": {
|
| 4789 |
"Time to Answer": {
|
| 4790 |
-
"Min":
|
| 4791 |
-
"Max":
|
| 4792 |
-
"Med":
|
| 4793 |
},
|
| 4794 |
"Latency": {
|
| 4795 |
-
"Min":
|
| 4796 |
-
"Max":
|
| 4797 |
-
"Med":
|
| 4798 |
},
|
| 4799 |
"Speed": {
|
| 4800 |
-
"Min":
|
| 4801 |
-
"Max":
|
| 4802 |
-
"Med":
|
| 4803 |
}
|
| 4804 |
},
|
| 4805 |
"Data Analysis": {
|
| 4806 |
"Time to Answer": {
|
| 4807 |
-
"Min":
|
| 4808 |
-
"Max":
|
| 4809 |
-
"Med":
|
| 4810 |
},
|
| 4811 |
"Latency": {
|
| 4812 |
-
"Min":
|
| 4813 |
-
"Max":
|
| 4814 |
-
"Med":
|
| 4815 |
-
},
|
| 4816 |
-
"Speed": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4817 |
"Min": 18.806653004549222,
|
| 4818 |
"Max": 113.17314697322944,
|
| 4819 |
"Med": 61.908515815844005
|
|
@@ -5509,387 +6503,621 @@
|
|
| 5509 |
}
|
| 5510 |
}
|
| 5511 |
},
|
| 5512 |
-
"
|
| 5513 |
-
"NUM_GPUS":
|
| 5514 |
"Overall": {
|
| 5515 |
"Time to Answer": {
|
| 5516 |
-
"Min":
|
| 5517 |
-
|
| 5518 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5519 |
},
|
| 5520 |
"Latency": {
|
| 5521 |
-
"Min":
|
| 5522 |
-
"Max":
|
| 5523 |
-
"Med":
|
| 5524 |
},
|
| 5525 |
"Speed": {
|
| 5526 |
-
"Min":
|
| 5527 |
-
"Max":
|
| 5528 |
-
"Med":
|
| 5529 |
}
|
| 5530 |
},
|
| 5531 |
"Content Generation": {
|
| 5532 |
"Time to Answer": {
|
| 5533 |
-
"Min":
|
| 5534 |
-
|
| 5535 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5536 |
},
|
| 5537 |
"Latency": {
|
| 5538 |
-
"Min":
|
| 5539 |
-
"Max":
|
| 5540 |
-
"Med":
|
| 5541 |
},
|
| 5542 |
"Speed": {
|
| 5543 |
-
"Min":
|
| 5544 |
-
"Max":
|
| 5545 |
-
"Med":
|
| 5546 |
}
|
| 5547 |
},
|
| 5548 |
"Editing": {
|
| 5549 |
"Time to Answer": {
|
| 5550 |
-
"Min":
|
| 5551 |
-
|
| 5552 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5553 |
},
|
| 5554 |
"Latency": {
|
| 5555 |
-
"Min":
|
| 5556 |
-
"Max":
|
| 5557 |
-
"Med":
|
| 5558 |
},
|
| 5559 |
"Speed": {
|
| 5560 |
-
"Min":
|
| 5561 |
-
"Max":
|
| 5562 |
-
"Med":
|
| 5563 |
}
|
| 5564 |
},
|
| 5565 |
"Data Analysis": {
|
| 5566 |
"Time to Answer": {
|
| 5567 |
-
"Min":
|
| 5568 |
-
|
| 5569 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5570 |
},
|
| 5571 |
"Latency": {
|
| 5572 |
-
"Min":
|
| 5573 |
-
"Max":
|
| 5574 |
-
"Med":
|
| 5575 |
},
|
| 5576 |
"Speed": {
|
| 5577 |
-
"Min":
|
| 5578 |
-
"Max":
|
| 5579 |
-
"Med":
|
| 5580 |
}
|
| 5581 |
},
|
| 5582 |
"Reasoning": {
|
| 5583 |
"Time to Answer": {
|
| 5584 |
-
"Min":
|
| 5585 |
-
|
| 5586 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5587 |
},
|
| 5588 |
"Latency": {
|
| 5589 |
-
"Min":
|
| 5590 |
-
"Max":
|
| 5591 |
-
"Med":
|
| 5592 |
},
|
| 5593 |
"Speed": {
|
| 5594 |
-
"Min":
|
| 5595 |
-
"Max":
|
| 5596 |
-
"Med":
|
| 5597 |
}
|
| 5598 |
},
|
| 5599 |
"Hallucination": {
|
| 5600 |
"Time to Answer": {
|
| 5601 |
-
"Min":
|
| 5602 |
-
|
| 5603 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5604 |
},
|
| 5605 |
"Latency": {
|
| 5606 |
-
"Min":
|
| 5607 |
-
"Max":
|
| 5608 |
-
"Med":
|
| 5609 |
},
|
| 5610 |
"Speed": {
|
| 5611 |
-
"Min":
|
| 5612 |
-
"Max":
|
| 5613 |
-
"Med":
|
| 5614 |
}
|
| 5615 |
},
|
| 5616 |
"Safety": {
|
| 5617 |
"Time to Answer": {
|
| 5618 |
-
"Min":
|
| 5619 |
-
|
| 5620 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5621 |
},
|
| 5622 |
"Latency": {
|
| 5623 |
-
"Min":
|
| 5624 |
-
"Max":
|
| 5625 |
-
"Med":
|
| 5626 |
},
|
| 5627 |
"Speed": {
|
| 5628 |
-
"Min":
|
| 5629 |
-
"Max":
|
| 5630 |
-
"Med":
|
| 5631 |
}
|
| 5632 |
},
|
| 5633 |
"Repetition": {
|
| 5634 |
"Time to Answer": {
|
| 5635 |
-
"Min":
|
| 5636 |
-
|
| 5637 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5638 |
},
|
| 5639 |
"Latency": {
|
| 5640 |
-
"Min":
|
| 5641 |
-
"Max":
|
| 5642 |
-
"Med":
|
| 5643 |
},
|
| 5644 |
"Speed": {
|
| 5645 |
-
"Min":
|
| 5646 |
-
"Max":
|
| 5647 |
-
"Med":
|
| 5648 |
}
|
| 5649 |
},
|
| 5650 |
"Summarization": {
|
| 5651 |
"Time to Answer": {
|
| 5652 |
-
"Min":
|
| 5653 |
-
|
| 5654 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5655 |
},
|
| 5656 |
"Latency": {
|
| 5657 |
-
"Min":
|
| 5658 |
-
"Max":
|
| 5659 |
-
"Med":
|
| 5660 |
},
|
| 5661 |
"Speed": {
|
| 5662 |
-
"Min":
|
| 5663 |
-
"Max":
|
| 5664 |
-
"Med":
|
| 5665 |
}
|
| 5666 |
},
|
| 5667 |
"Translation": {
|
| 5668 |
"Time to Answer": {
|
| 5669 |
-
"Min":
|
| 5670 |
-
|
| 5671 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5672 |
},
|
| 5673 |
"Latency": {
|
| 5674 |
-
"Min":
|
| 5675 |
-
"Max":
|
| 5676 |
-
"Med":
|
| 5677 |
},
|
| 5678 |
"Speed": {
|
| 5679 |
-
"Min":
|
| 5680 |
-
"Max":
|
| 5681 |
-
"Med":
|
| 5682 |
}
|
| 5683 |
},
|
| 5684 |
"Multi-Turn": {
|
| 5685 |
"Time to Answer": {
|
| 5686 |
-
"Min":
|
| 5687 |
-
|
| 5688 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5689 |
},
|
| 5690 |
"Latency": {
|
| 5691 |
-
"Min":
|
| 5692 |
-
"Max":
|
| 5693 |
-
"Med":
|
| 5694 |
},
|
| 5695 |
"Speed": {
|
| 5696 |
-
"Min":
|
| 5697 |
-
"Max":
|
| 5698 |
-
"Med":
|
| 5699 |
}
|
| 5700 |
}
|
| 5701 |
},
|
| 5702 |
-
"
|
| 5703 |
-
"NUM_GPUS":
|
| 5704 |
"Overall": {
|
| 5705 |
"Time to Answer": {
|
| 5706 |
"Min": 0,
|
| 5707 |
-
"Max":
|
| 5708 |
-
"Med": 0.
|
| 5709 |
},
|
| 5710 |
"Latency": {
|
| 5711 |
"Min": 0,
|
| 5712 |
-
"Max":
|
| 5713 |
-
"Med":
|
| 5714 |
},
|
| 5715 |
"Speed": {
|
| 5716 |
-
"Min":
|
| 5717 |
-
"Max":
|
| 5718 |
-
"Med":
|
| 5719 |
}
|
| 5720 |
},
|
| 5721 |
"Content Generation": {
|
| 5722 |
"Time to Answer": {
|
| 5723 |
-
"Min": 0,
|
| 5724 |
-
"Max":
|
| 5725 |
-
"Med": 0.
|
| 5726 |
},
|
| 5727 |
"Latency": {
|
| 5728 |
-
"Min": 0,
|
| 5729 |
-
"Max":
|
| 5730 |
-
"Med":
|
| 5731 |
},
|
| 5732 |
"Speed": {
|
| 5733 |
-
"Min":
|
| 5734 |
-
"Max":
|
| 5735 |
-
"Med":
|
| 5736 |
}
|
| 5737 |
},
|
| 5738 |
"Editing": {
|
| 5739 |
"Time to Answer": {
|
| 5740 |
-
"Min": 0,
|
| 5741 |
-
"Max": 0,
|
| 5742 |
-
"Med": 0.
|
| 5743 |
},
|
| 5744 |
"Latency": {
|
| 5745 |
-
"Min": 0,
|
| 5746 |
-
"Max":
|
| 5747 |
-
"Med":
|
| 5748 |
},
|
| 5749 |
"Speed": {
|
| 5750 |
-
"Min":
|
| 5751 |
-
"Max":
|
| 5752 |
-
"Med":
|
| 5753 |
}
|
| 5754 |
},
|
| 5755 |
"Data Analysis": {
|
| 5756 |
"Time to Answer": {
|
| 5757 |
-
"Min": 0,
|
| 5758 |
-
"Max":
|
| 5759 |
-
"Med": 0.
|
| 5760 |
},
|
| 5761 |
"Latency": {
|
| 5762 |
-
"Min": 0,
|
| 5763 |
-
"Max":
|
| 5764 |
-
"Med":
|
| 5765 |
},
|
| 5766 |
"Speed": {
|
| 5767 |
-
"Min":
|
| 5768 |
-
"Max":
|
| 5769 |
-
"Med":
|
| 5770 |
}
|
| 5771 |
},
|
| 5772 |
"Reasoning": {
|
| 5773 |
"Time to Answer": {
|
| 5774 |
-
"Min": 0,
|
| 5775 |
-
"Max": 0,
|
| 5776 |
-
"Med": 0.
|
| 5777 |
},
|
| 5778 |
"Latency": {
|
| 5779 |
-
"Min": 0,
|
| 5780 |
-
"Max":
|
| 5781 |
-
"Med":
|
| 5782 |
},
|
| 5783 |
"Speed": {
|
| 5784 |
-
"Min":
|
| 5785 |
-
"Max":
|
| 5786 |
-
"Med":
|
| 5787 |
}
|
| 5788 |
},
|
| 5789 |
"Hallucination": {
|
| 5790 |
"Time to Answer": {
|
| 5791 |
-
"Min": 0,
|
| 5792 |
-
"Max": 0,
|
| 5793 |
-
"Med": 0.
|
| 5794 |
},
|
| 5795 |
"Latency": {
|
| 5796 |
-
"Min": 0,
|
| 5797 |
-
"Max":
|
| 5798 |
-
"Med":
|
| 5799 |
},
|
| 5800 |
"Speed": {
|
| 5801 |
-
"Min":
|
| 5802 |
-
"Max":
|
| 5803 |
-
"Med":
|
| 5804 |
}
|
| 5805 |
},
|
| 5806 |
"Safety": {
|
| 5807 |
"Time to Answer": {
|
| 5808 |
-
"Min": 0,
|
| 5809 |
-
"Max": 0,
|
| 5810 |
-
"Med": 0.
|
| 5811 |
},
|
| 5812 |
"Latency": {
|
| 5813 |
-
"Min": 0,
|
| 5814 |
-
"Max":
|
| 5815 |
-
"Med":
|
| 5816 |
},
|
| 5817 |
"Speed": {
|
| 5818 |
-
"Min":
|
| 5819 |
-
"Max":
|
| 5820 |
-
"Med":
|
| 5821 |
}
|
| 5822 |
},
|
| 5823 |
"Repetition": {
|
| 5824 |
"Time to Answer": {
|
| 5825 |
-
"Min": 0,
|
| 5826 |
-
"Max": 0,
|
| 5827 |
-
"Med": 0.
|
| 5828 |
},
|
| 5829 |
"Latency": {
|
| 5830 |
-
"Min":
|
| 5831 |
-
"Max":
|
| 5832 |
-
"Med":
|
| 5833 |
},
|
| 5834 |
"Speed": {
|
| 5835 |
-
"Min":
|
| 5836 |
-
"Max":
|
| 5837 |
-
"Med":
|
| 5838 |
}
|
| 5839 |
},
|
| 5840 |
"Summarization": {
|
| 5841 |
"Time to Answer": {
|
| 5842 |
-
"Min": 0,
|
| 5843 |
-
"Max": 0,
|
| 5844 |
-
"Med": 0.
|
| 5845 |
},
|
| 5846 |
"Latency": {
|
| 5847 |
-
"Min": 0,
|
| 5848 |
-
"Max":
|
| 5849 |
-
"Med":
|
| 5850 |
},
|
| 5851 |
"Speed": {
|
| 5852 |
-
"Min":
|
| 5853 |
-
"Max":
|
| 5854 |
-
"Med":
|
| 5855 |
}
|
| 5856 |
},
|
| 5857 |
"Translation": {
|
| 5858 |
"Time to Answer": {
|
| 5859 |
-
"Min": 0,
|
| 5860 |
-
"Max": 0,
|
| 5861 |
-
"Med": 0.
|
| 5862 |
},
|
| 5863 |
"Latency": {
|
| 5864 |
-
"Min": 0,
|
| 5865 |
-
"Max":
|
| 5866 |
-
"Med":
|
| 5867 |
},
|
| 5868 |
"Speed": {
|
| 5869 |
-
"Min":
|
| 5870 |
-
"Max":
|
| 5871 |
-
"Med":
|
| 5872 |
}
|
| 5873 |
},
|
| 5874 |
"Multi-Turn": {
|
| 5875 |
"Time to Answer": {
|
| 5876 |
"Min": 0,
|
| 5877 |
-
"Max":
|
| 5878 |
-
"Med": 0.
|
| 5879 |
},
|
| 5880 |
"Latency": {
|
| 5881 |
"Min": 0,
|
| 5882 |
-
"Max":
|
| 5883 |
-
"Med":
|
| 5884 |
},
|
| 5885 |
"Speed": {
|
| 5886 |
-
"Min":
|
| 5887 |
-
"Max":
|
| 5888 |
-
"Med":
|
| 5889 |
}
|
| 5890 |
}
|
| 5891 |
},
|
| 5892 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5893 |
"NUM_GPUS": 0,
|
| 5894 |
"Overall": {
|
| 5895 |
"Time to Answer": {
|
|
|
|
| 1 |
{
|
| 2 |
+
"Olmo 3 32B Think": {
|
| 3 |
+
"NUM_GPUS": 4,
|
| 4 |
+
"Overall": {
|
| 5 |
+
"Time to Answer": {
|
| 6 |
+
"Min": 0.04750800132751465,
|
| 7 |
+
"Max": 662.4548862211922,
|
| 8 |
+
"Med": 60.18788400716624
|
| 9 |
+
},
|
| 10 |
+
"Latency": {
|
| 11 |
+
"Min": 4.962059259414673,
|
| 12 |
+
"Max": 1685.2101354599,
|
| 13 |
+
"Med": 77.51256728172302
|
| 14 |
+
},
|
| 15 |
+
"Speed": {
|
| 16 |
+
"Min": 27.866160798473338,
|
| 17 |
+
"Max": 61.32207413470597,
|
| 18 |
+
"Med": 44.30514641537086
|
| 19 |
+
}
|
| 20 |
+
},
|
| 21 |
+
"Content Generation": {
|
| 22 |
+
"Time to Answer": {
|
| 23 |
+
"Min": 9.247790738980477,
|
| 24 |
+
"Max": 535.0928019830272,
|
| 25 |
+
"Med": 54.36686619573619
|
| 26 |
+
},
|
| 27 |
+
"Latency": {
|
| 28 |
+
"Min": 15.104989528656006,
|
| 29 |
+
"Max": 1151.913678407669,
|
| 30 |
+
"Med": 73.72976446151733
|
| 31 |
+
},
|
| 32 |
+
"Speed": {
|
| 33 |
+
"Min": 28.739949330273706,
|
| 34 |
+
"Max": 56.69105335090586,
|
| 35 |
+
"Med": 44.640501961119014
|
| 36 |
+
}
|
| 37 |
+
},
|
| 38 |
+
"Editing": {
|
| 39 |
+
"Time to Answer": {
|
| 40 |
+
"Min": 9.188008042039543,
|
| 41 |
+
"Max": 332.63859022997735,
|
| 42 |
+
"Med": 53.86499203972291
|
| 43 |
+
},
|
| 44 |
+
"Latency": {
|
| 45 |
+
"Min": 12.425836563110352,
|
| 46 |
+
"Max": 373.80425238609314,
|
| 47 |
+
"Med": 64.4289436340332
|
| 48 |
+
},
|
| 49 |
+
"Speed": {
|
| 50 |
+
"Min": 29.70057087539234,
|
| 51 |
+
"Max": 56.003630745295354,
|
| 52 |
+
"Med": 44.42532373669283
|
| 53 |
+
}
|
| 54 |
+
},
|
| 55 |
+
"Data Analysis": {
|
| 56 |
+
"Time to Answer": {
|
| 57 |
+
"Min": 6.513707979240609,
|
| 58 |
+
"Max": 662.4548862211922,
|
| 59 |
+
"Med": 50.387367917383386
|
| 60 |
+
},
|
| 61 |
+
"Latency": {
|
| 62 |
+
"Min": 8.310109853744507,
|
| 63 |
+
"Max": 688.9015896320343,
|
| 64 |
+
"Med": 60.394060373306274
|
| 65 |
+
},
|
| 66 |
+
"Speed": {
|
| 67 |
+
"Min": 27.866160798473338,
|
| 68 |
+
"Max": 55.563448472039894,
|
| 69 |
+
"Med": 44.31595511727765
|
| 70 |
+
}
|
| 71 |
+
},
|
| 72 |
+
"Reasoning": {
|
| 73 |
+
"Time to Answer": {
|
| 74 |
+
"Min": 11.777561432543752,
|
| 75 |
+
"Max": 650.9476703300404,
|
| 76 |
+
"Med": 77.94728694034356
|
| 77 |
+
},
|
| 78 |
+
"Latency": {
|
| 79 |
+
"Min": 16.024362087249756,
|
| 80 |
+
"Max": 668.085782289505,
|
| 81 |
+
"Med": 88.89124500751495
|
| 82 |
+
},
|
| 83 |
+
"Speed": {
|
| 84 |
+
"Min": 31.092492474628955,
|
| 85 |
+
"Max": 50.6252779439028,
|
| 86 |
+
"Med": 44.27561038703696
|
| 87 |
+
}
|
| 88 |
+
},
|
| 89 |
+
"Hallucination": {
|
| 90 |
+
"Time to Answer": {
|
| 91 |
+
"Min": 0.04750800132751465,
|
| 92 |
+
"Max": 244.72053700062895,
|
| 93 |
+
"Med": 31.597100689212525
|
| 94 |
+
},
|
| 95 |
+
"Latency": {
|
| 96 |
+
"Min": 5.460567951202393,
|
| 97 |
+
"Max": 327.6873710155487,
|
| 98 |
+
"Med": 55.2690349817276
|
| 99 |
+
},
|
| 100 |
+
"Speed": {
|
| 101 |
+
"Min": 30.190577882159456,
|
| 102 |
+
"Max": 51.843879801237385,
|
| 103 |
+
"Med": 44.90390378879441
|
| 104 |
+
}
|
| 105 |
+
},
|
| 106 |
+
"Safety": {
|
| 107 |
+
"Time to Answer": {
|
| 108 |
+
"Min": 4.24024046375638,
|
| 109 |
+
"Max": 170.15584615909734,
|
| 110 |
+
"Med": 21.908013919514374
|
| 111 |
+
},
|
| 112 |
+
"Latency": {
|
| 113 |
+
"Min": 4.962059259414673,
|
| 114 |
+
"Max": 181.9493372440338,
|
| 115 |
+
"Med": 47.42558240890503
|
| 116 |
+
},
|
| 117 |
+
"Speed": {
|
| 118 |
+
"Min": 29.473423162196262,
|
| 119 |
+
"Max": 49.371541840187,
|
| 120 |
+
"Med": 43.69696811113183
|
| 121 |
+
}
|
| 122 |
+
},
|
| 123 |
+
"Repetition": {
|
| 124 |
+
"Time to Answer": {
|
| 125 |
+
"Min": 22.753700505019047,
|
| 126 |
+
"Max": 561.4907359476722,
|
| 127 |
+
"Med": 99.96764908013014
|
| 128 |
+
},
|
| 129 |
+
"Latency": {
|
| 130 |
+
"Min": 29.67628502845764,
|
| 131 |
+
"Max": 1685.2101354599,
|
| 132 |
+
"Med": 124.29333961009979
|
| 133 |
+
},
|
| 134 |
+
"Speed": {
|
| 135 |
+
"Min": 29.66526343233663,
|
| 136 |
+
"Max": 51.682686076605144,
|
| 137 |
+
"Med": 43.84949112639627
|
| 138 |
+
}
|
| 139 |
+
},
|
| 140 |
+
"Summarization": {
|
| 141 |
+
"Time to Answer": {
|
| 142 |
+
"Min": 6.697763084475674,
|
| 143 |
+
"Max": 486.0299537912613,
|
| 144 |
+
"Med": 42.976535539723244
|
| 145 |
+
},
|
| 146 |
+
"Latency": {
|
| 147 |
+
"Min": 8.786500215530396,
|
| 148 |
+
"Max": 489.2409255504608,
|
| 149 |
+
"Med": 51.56357514858246
|
| 150 |
+
},
|
| 151 |
+
"Speed": {
|
| 152 |
+
"Min": 29.2644856978122,
|
| 153 |
+
"Max": 48.69414303312388,
|
| 154 |
+
"Med": 43.39629490720476
|
| 155 |
+
}
|
| 156 |
+
},
|
| 157 |
+
"Translation": {
|
| 158 |
+
"Time to Answer": {
|
| 159 |
+
"Min": 15.65578042784481,
|
| 160 |
+
"Max": 361.4257761741054,
|
| 161 |
+
"Med": 94.50096548687068
|
| 162 |
+
},
|
| 163 |
+
"Latency": {
|
| 164 |
+
"Min": 18.458808422088623,
|
| 165 |
+
"Max": 368.57612133026123,
|
| 166 |
+
"Med": 104.97938454151154
|
| 167 |
+
},
|
| 168 |
+
"Speed": {
|
| 169 |
+
"Min": 29.43716166031538,
|
| 170 |
+
"Max": 53.272603690387285,
|
| 171 |
+
"Med": 43.679275761958166
|
| 172 |
+
}
|
| 173 |
+
},
|
| 174 |
+
"Multi-Turn": {
|
| 175 |
+
"Time to Answer": {
|
| 176 |
+
"Min": 16.31281149502611,
|
| 177 |
+
"Max": 455.0286171197091,
|
| 178 |
+
"Med": 98.93747010243024
|
| 179 |
+
},
|
| 180 |
+
"Latency": {
|
| 181 |
+
"Min": 22.590834856033325,
|
| 182 |
+
"Max": 506.03700613975525,
|
| 183 |
+
"Med": 158.81773710250854
|
| 184 |
+
},
|
| 185 |
+
"Speed": {
|
| 186 |
+
"Min": 36.85509319068589,
|
| 187 |
+
"Max": 61.32207413470597,
|
| 188 |
+
"Med": 44.533473375170736
|
| 189 |
+
}
|
| 190 |
+
}
|
| 191 |
+
},
|
| 192 |
"Claude 4.1 Opus (20250805) (think)": {
|
| 193 |
"NUM_GPUS": 0,
|
| 194 |
"Overall": {
|
|
|
|
| 1519 |
}
|
| 1520 |
}
|
| 1521 |
},
|
| 1522 |
+
"Gemini 3 Pro Preview (Thinking Level: High)": {
|
| 1523 |
+
"NUM_GPUS": -1,
|
| 1524 |
+
"Overall": {
|
| 1525 |
+
"Time to Answer": {
|
| 1526 |
+
"Min": [
|
| 1527 |
+
0
|
| 1528 |
+
],
|
| 1529 |
+
"Max": [
|
| 1530 |
+
0
|
| 1531 |
+
],
|
| 1532 |
+
"Med": 0.0
|
| 1533 |
+
},
|
| 1534 |
+
"Latency": {
|
| 1535 |
+
"Min": 0,
|
| 1536 |
+
"Max": 169.1725790500641,
|
| 1537 |
+
"Med": 27.89457416534424
|
| 1538 |
+
},
|
| 1539 |
+
"Speed": {
|
| 1540 |
+
"Min": 0,
|
| 1541 |
+
"Max": 0,
|
| 1542 |
+
"Med": 0.0
|
| 1543 |
+
}
|
| 1544 |
+
},
|
| 1545 |
+
"Content Generation": {
|
| 1546 |
+
"Time to Answer": {
|
| 1547 |
+
"Min": [
|
| 1548 |
+
0
|
| 1549 |
+
],
|
| 1550 |
+
"Max": [
|
| 1551 |
+
0
|
| 1552 |
+
],
|
| 1553 |
+
"Med": 0.0
|
| 1554 |
+
},
|
| 1555 |
+
"Latency": {
|
| 1556 |
+
"Min": 0,
|
| 1557 |
+
"Max": 168.15567064285278,
|
| 1558 |
+
"Med": 30.950587153434753
|
| 1559 |
+
},
|
| 1560 |
+
"Speed": {
|
| 1561 |
+
"Min": 0,
|
| 1562 |
+
"Max": 0,
|
| 1563 |
+
"Med": 0.0
|
| 1564 |
+
}
|
| 1565 |
+
},
|
| 1566 |
+
"Editing": {
|
| 1567 |
+
"Time to Answer": {
|
| 1568 |
+
"Min": [
|
| 1569 |
+
0
|
| 1570 |
+
],
|
| 1571 |
+
"Max": [
|
| 1572 |
+
0
|
| 1573 |
+
],
|
| 1574 |
+
"Med": 0.0
|
| 1575 |
+
},
|
| 1576 |
+
"Latency": {
|
| 1577 |
+
"Min": 5.864927530288696,
|
| 1578 |
+
"Max": 109.41859698295593,
|
| 1579 |
+
"Med": 23.469240069389343
|
| 1580 |
+
},
|
| 1581 |
+
"Speed": {
|
| 1582 |
+
"Min": 0,
|
| 1583 |
+
"Max": 0,
|
| 1584 |
+
"Med": 0.0
|
| 1585 |
+
}
|
| 1586 |
+
},
|
| 1587 |
+
"Data Analysis": {
|
| 1588 |
+
"Time to Answer": {
|
| 1589 |
+
"Min": [
|
| 1590 |
+
0
|
| 1591 |
+
],
|
| 1592 |
+
"Max": [
|
| 1593 |
+
0
|
| 1594 |
+
],
|
| 1595 |
+
"Med": 0.0
|
| 1596 |
+
},
|
| 1597 |
+
"Latency": {
|
| 1598 |
+
"Min": 6.848255395889282,
|
| 1599 |
+
"Max": 151.04712963104248,
|
| 1600 |
+
"Med": 20.09416127204895
|
| 1601 |
+
},
|
| 1602 |
+
"Speed": {
|
| 1603 |
+
"Min": 0,
|
| 1604 |
+
"Max": 0,
|
| 1605 |
+
"Med": 0.0
|
| 1606 |
+
}
|
| 1607 |
+
},
|
| 1608 |
+
"Reasoning": {
|
| 1609 |
+
"Time to Answer": {
|
| 1610 |
+
"Min": [
|
| 1611 |
+
0
|
| 1612 |
+
],
|
| 1613 |
+
"Max": [
|
| 1614 |
+
0
|
| 1615 |
+
],
|
| 1616 |
+
"Med": 0.0
|
| 1617 |
+
},
|
| 1618 |
+
"Latency": {
|
| 1619 |
+
"Min": 0,
|
| 1620 |
+
"Max": 165.32855772972107,
|
| 1621 |
+
"Med": 26.79689347743988
|
| 1622 |
+
},
|
| 1623 |
+
"Speed": {
|
| 1624 |
+
"Min": 0,
|
| 1625 |
+
"Max": 0,
|
| 1626 |
+
"Med": 0.0
|
| 1627 |
+
}
|
| 1628 |
+
},
|
| 1629 |
+
"Hallucination": {
|
| 1630 |
+
"Time to Answer": {
|
| 1631 |
+
"Min": [
|
| 1632 |
+
0
|
| 1633 |
+
],
|
| 1634 |
+
"Max": [
|
| 1635 |
+
0
|
| 1636 |
+
],
|
| 1637 |
+
"Med": 0.0
|
| 1638 |
+
},
|
| 1639 |
+
"Latency": {
|
| 1640 |
+
"Min": 9.33104419708252,
|
| 1641 |
+
"Max": 90.23524713516235,
|
| 1642 |
+
"Med": 27.72087299823761
|
| 1643 |
+
},
|
| 1644 |
+
"Speed": {
|
| 1645 |
+
"Min": 0,
|
| 1646 |
+
"Max": 0,
|
| 1647 |
+
"Med": 0.0
|
| 1648 |
+
}
|
| 1649 |
+
},
|
| 1650 |
+
"Safety": {
|
| 1651 |
+
"Time to Answer": {
|
| 1652 |
+
"Min": [
|
| 1653 |
+
0
|
| 1654 |
+
],
|
| 1655 |
+
"Max": [
|
| 1656 |
+
0
|
| 1657 |
+
],
|
| 1658 |
+
"Med": 0.0
|
| 1659 |
+
},
|
| 1660 |
+
"Latency": {
|
| 1661 |
+
"Min": 13.609748363494873,
|
| 1662 |
+
"Max": 98.26702857017517,
|
| 1663 |
+
"Med": 28.671757698059082
|
| 1664 |
+
},
|
| 1665 |
+
"Speed": {
|
| 1666 |
+
"Min": 0,
|
| 1667 |
+
"Max": 0,
|
| 1668 |
+
"Med": 0.0
|
| 1669 |
+
}
|
| 1670 |
+
},
|
| 1671 |
+
"Repetition": {
|
| 1672 |
+
"Time to Answer": {
|
| 1673 |
+
"Min": [
|
| 1674 |
+
0
|
| 1675 |
+
],
|
| 1676 |
+
"Max": [
|
| 1677 |
+
0
|
| 1678 |
+
],
|
| 1679 |
+
"Med": 0.0
|
| 1680 |
+
},
|
| 1681 |
+
"Latency": {
|
| 1682 |
+
"Min": 9.140820264816284,
|
| 1683 |
+
"Max": 76.10930681228638,
|
| 1684 |
+
"Med": 25.359631299972534
|
| 1685 |
+
},
|
| 1686 |
+
"Speed": {
|
| 1687 |
+
"Min": 0,
|
| 1688 |
+
"Max": 0,
|
| 1689 |
+
"Med": 0.0
|
| 1690 |
+
}
|
| 1691 |
+
},
|
| 1692 |
+
"Summarization": {
|
| 1693 |
+
"Time to Answer": {
|
| 1694 |
+
"Min": [
|
| 1695 |
+
0
|
| 1696 |
+
],
|
| 1697 |
+
"Max": [
|
| 1698 |
+
0
|
| 1699 |
+
],
|
| 1700 |
+
"Med": 0.0
|
| 1701 |
+
},
|
| 1702 |
+
"Latency": {
|
| 1703 |
+
"Min": 8.343881130218506,
|
| 1704 |
+
"Max": 52.00087642669678,
|
| 1705 |
+
"Med": 18.741631627082825
|
| 1706 |
+
},
|
| 1707 |
+
"Speed": {
|
| 1708 |
+
"Min": 0,
|
| 1709 |
+
"Max": 0,
|
| 1710 |
+
"Med": 0.0
|
| 1711 |
+
}
|
| 1712 |
+
},
|
| 1713 |
+
"Translation": {
|
| 1714 |
+
"Time to Answer": {
|
| 1715 |
+
"Min": [
|
| 1716 |
+
0
|
| 1717 |
+
],
|
| 1718 |
+
"Max": [
|
| 1719 |
+
0
|
| 1720 |
+
],
|
| 1721 |
+
"Med": 0.0
|
| 1722 |
+
},
|
| 1723 |
+
"Latency": {
|
| 1724 |
+
"Min": 12.577407121658325,
|
| 1725 |
+
"Max": 103.21936011314392,
|
| 1726 |
+
"Med": 30.767643094062805
|
| 1727 |
+
},
|
| 1728 |
+
"Speed": {
|
| 1729 |
+
"Min": 0,
|
| 1730 |
+
"Max": 0,
|
| 1731 |
+
"Med": 0.0
|
| 1732 |
+
}
|
| 1733 |
+
},
|
| 1734 |
+
"Multi-Turn": {
|
| 1735 |
+
"Time to Answer": {
|
| 1736 |
+
"Min": [
|
| 1737 |
+
0
|
| 1738 |
+
],
|
| 1739 |
+
"Max": [
|
| 1740 |
+
0
|
| 1741 |
+
],
|
| 1742 |
+
"Med": 0.0
|
| 1743 |
+
},
|
| 1744 |
+
"Latency": {
|
| 1745 |
+
"Min": 12.984463930130005,
|
| 1746 |
+
"Max": 169.1725790500641,
|
| 1747 |
+
"Med": 65.30046927928925
|
| 1748 |
+
},
|
| 1749 |
+
"Speed": {
|
| 1750 |
+
"Min": 0,
|
| 1751 |
+
"Max": 0,
|
| 1752 |
+
"Med": 0.0
|
| 1753 |
+
}
|
| 1754 |
+
}
|
| 1755 |
+
},
|
| 1756 |
"Solar Pro Preview (top_p:0.95, temp: 0.7)": {
|
| 1757 |
"NUM_GPUS": 1,
|
| 1758 |
"Overall": {
|
|
|
|
| 3463 |
}
|
| 3464 |
}
|
| 3465 |
},
|
| 3466 |
+
"Claude 4.5 Opus (think)": {
|
| 3467 |
+
"NUM_GPUS": 0,
|
| 3468 |
"Overall": {
|
| 3469 |
"Time to Answer": {
|
| 3470 |
+
"Min": 0,
|
| 3471 |
+
"Max": 0,
|
| 3472 |
+
"Med": 0.0
|
| 3473 |
},
|
| 3474 |
"Latency": {
|
| 3475 |
+
"Min": 0,
|
| 3476 |
+
"Max": 0,
|
| 3477 |
+
"Med": 0.0
|
| 3478 |
},
|
| 3479 |
"Speed": {
|
| 3480 |
+
"Min": -1.0,
|
| 3481 |
+
"Max": -1.0,
|
| 3482 |
+
"Med": -1.0
|
| 3483 |
}
|
| 3484 |
},
|
| 3485 |
"Content Generation": {
|
| 3486 |
"Time to Answer": {
|
| 3487 |
+
"Min": 0,
|
| 3488 |
+
"Max": 0,
|
| 3489 |
+
"Med": 0.0
|
| 3490 |
},
|
| 3491 |
"Latency": {
|
| 3492 |
+
"Min": 0,
|
| 3493 |
+
"Max": 0,
|
| 3494 |
+
"Med": 0.0
|
| 3495 |
+
},
|
| 3496 |
+
"Speed": {
|
| 3497 |
+
"Min": -1.0,
|
| 3498 |
+
"Max": -1.0,
|
| 3499 |
+
"Med": -1.0
|
| 3500 |
+
}
|
| 3501 |
+
},
|
| 3502 |
+
"Editing": {
|
| 3503 |
+
"Time to Answer": {
|
| 3504 |
+
"Min": 0,
|
| 3505 |
+
"Max": 0,
|
| 3506 |
+
"Med": 0.0
|
| 3507 |
+
},
|
| 3508 |
+
"Latency": {
|
| 3509 |
+
"Min": 0,
|
| 3510 |
+
"Max": 0,
|
| 3511 |
+
"Med": 0.0
|
| 3512 |
+
},
|
| 3513 |
+
"Speed": {
|
| 3514 |
+
"Min": -1.0,
|
| 3515 |
+
"Max": -1.0,
|
| 3516 |
+
"Med": -1.0
|
| 3517 |
+
}
|
| 3518 |
+
},
|
| 3519 |
+
"Data Analysis": {
|
| 3520 |
+
"Time to Answer": {
|
| 3521 |
+
"Min": 0,
|
| 3522 |
+
"Max": 0,
|
| 3523 |
+
"Med": 0.0
|
| 3524 |
+
},
|
| 3525 |
+
"Latency": {
|
| 3526 |
+
"Min": 0,
|
| 3527 |
+
"Max": 0,
|
| 3528 |
+
"Med": 0.0
|
| 3529 |
+
},
|
| 3530 |
+
"Speed": {
|
| 3531 |
+
"Min": -1.0,
|
| 3532 |
+
"Max": -1.0,
|
| 3533 |
+
"Med": -1.0
|
| 3534 |
+
}
|
| 3535 |
+
},
|
| 3536 |
+
"Reasoning": {
|
| 3537 |
+
"Time to Answer": {
|
| 3538 |
+
"Min": 0,
|
| 3539 |
+
"Max": 0,
|
| 3540 |
+
"Med": 0.0
|
| 3541 |
+
},
|
| 3542 |
+
"Latency": {
|
| 3543 |
+
"Min": 0,
|
| 3544 |
+
"Max": 0,
|
| 3545 |
+
"Med": 0.0
|
| 3546 |
+
},
|
| 3547 |
+
"Speed": {
|
| 3548 |
+
"Min": -1.0,
|
| 3549 |
+
"Max": -1.0,
|
| 3550 |
+
"Med": -1.0
|
| 3551 |
+
}
|
| 3552 |
+
},
|
| 3553 |
+
"Hallucination": {
|
| 3554 |
+
"Time to Answer": {
|
| 3555 |
+
"Min": 0,
|
| 3556 |
+
"Max": 0,
|
| 3557 |
+
"Med": 0.0
|
| 3558 |
+
},
|
| 3559 |
+
"Latency": {
|
| 3560 |
+
"Min": 0,
|
| 3561 |
+
"Max": 0,
|
| 3562 |
+
"Med": 0.0
|
| 3563 |
+
},
|
| 3564 |
+
"Speed": {
|
| 3565 |
+
"Min": -1.0,
|
| 3566 |
+
"Max": -1.0,
|
| 3567 |
+
"Med": -1.0
|
| 3568 |
+
}
|
| 3569 |
+
},
|
| 3570 |
+
"Safety": {
|
| 3571 |
+
"Time to Answer": {
|
| 3572 |
+
"Min": 0,
|
| 3573 |
+
"Max": 0,
|
| 3574 |
+
"Med": 0.0
|
| 3575 |
+
},
|
| 3576 |
+
"Latency": {
|
| 3577 |
+
"Min": 0,
|
| 3578 |
+
"Max": 0,
|
| 3579 |
+
"Med": 0.0
|
| 3580 |
+
},
|
| 3581 |
+
"Speed": {
|
| 3582 |
+
"Min": -1.0,
|
| 3583 |
+
"Max": -1.0,
|
| 3584 |
+
"Med": -1.0
|
| 3585 |
+
}
|
| 3586 |
+
},
|
| 3587 |
+
"Repetition": {
|
| 3588 |
+
"Time to Answer": {
|
| 3589 |
+
"Min": 0,
|
| 3590 |
+
"Max": 0,
|
| 3591 |
+
"Med": 0.0
|
| 3592 |
+
},
|
| 3593 |
+
"Latency": {
|
| 3594 |
+
"Min": 0,
|
| 3595 |
+
"Max": 0,
|
| 3596 |
+
"Med": 0.0
|
| 3597 |
+
},
|
| 3598 |
+
"Speed": {
|
| 3599 |
+
"Min": -1.0,
|
| 3600 |
+
"Max": -1.0,
|
| 3601 |
+
"Med": -1.0
|
| 3602 |
+
}
|
| 3603 |
+
},
|
| 3604 |
+
"Summarization": {
|
| 3605 |
+
"Time to Answer": {
|
| 3606 |
+
"Min": 0,
|
| 3607 |
+
"Max": 0,
|
| 3608 |
+
"Med": 0.0
|
| 3609 |
+
},
|
| 3610 |
+
"Latency": {
|
| 3611 |
+
"Min": 0,
|
| 3612 |
+
"Max": 0,
|
| 3613 |
+
"Med": 0.0
|
| 3614 |
+
},
|
| 3615 |
+
"Speed": {
|
| 3616 |
+
"Min": -1.0,
|
| 3617 |
+
"Max": -1.0,
|
| 3618 |
+
"Med": -1.0
|
| 3619 |
+
}
|
| 3620 |
+
},
|
| 3621 |
+
"Translation": {
|
| 3622 |
+
"Time to Answer": {
|
| 3623 |
+
"Min": 0,
|
| 3624 |
+
"Max": 0,
|
| 3625 |
+
"Med": 0.0
|
| 3626 |
+
},
|
| 3627 |
+
"Latency": {
|
| 3628 |
+
"Min": 0,
|
| 3629 |
+
"Max": 0,
|
| 3630 |
+
"Med": 0.0
|
| 3631 |
+
},
|
| 3632 |
+
"Speed": {
|
| 3633 |
+
"Min": -1.0,
|
| 3634 |
+
"Max": -1.0,
|
| 3635 |
+
"Med": -1.0
|
| 3636 |
+
}
|
| 3637 |
+
},
|
| 3638 |
+
"Multi-Turn": {
|
| 3639 |
+
"Time to Answer": {
|
| 3640 |
+
"Min": 0,
|
| 3641 |
+
"Max": 0,
|
| 3642 |
+
"Med": 0.0
|
| 3643 |
+
},
|
| 3644 |
+
"Latency": {
|
| 3645 |
+
"Min": 0,
|
| 3646 |
+
"Max": 0,
|
| 3647 |
+
"Med": 0.0
|
| 3648 |
+
},
|
| 3649 |
+
"Speed": {
|
| 3650 |
+
"Min": -1.0,
|
| 3651 |
+
"Max": -1.0,
|
| 3652 |
+
"Med": -1.0
|
| 3653 |
+
}
|
| 3654 |
+
}
|
| 3655 |
+
},
|
| 3656 |
+
"GLM-4.5 FP8 (think)": {
|
| 3657 |
+
"NUM_GPUS": 8,
|
| 3658 |
+
"Overall": {
|
| 3659 |
+
"Time to Answer": {
|
| 3660 |
+
"Min": 0.11270952224731445,
|
| 3661 |
+
"Max": 1084.7877391024863,
|
| 3662 |
+
"Med": 25.261904125875603
|
| 3663 |
+
},
|
| 3664 |
+
"Latency": {
|
| 3665 |
+
"Min": 2.203545331954956,
|
| 3666 |
+
"Max": 2499.599281311035,
|
| 3667 |
+
"Med": 62.74959444999695
|
| 3668 |
+
},
|
| 3669 |
+
"Speed": {
|
| 3670 |
+
"Min": 19.420678190531984,
|
| 3671 |
+
"Max": 38.97772164575481,
|
| 3672 |
+
"Med": 23.293980879127712
|
| 3673 |
+
}
|
| 3674 |
+
},
|
| 3675 |
+
"Content Generation": {
|
| 3676 |
+
"Time to Answer": {
|
| 3677 |
+
"Min": 3.702089722433614,
|
| 3678 |
+
"Max": 278.2958468033817,
|
| 3679 |
+
"Med": 21.031848154986903
|
| 3680 |
+
},
|
| 3681 |
+
"Latency": {
|
| 3682 |
+
"Min": 14.542505025863647,
|
| 3683 |
+
"Max": 357.45922803878784,
|
| 3684 |
+
"Med": 52.71355986595154
|
| 3685 |
+
},
|
| 3686 |
+
"Speed": {
|
| 3687 |
"Min": 19.45536289987673,
|
| 3688 |
"Max": 27.213499913336133,
|
| 3689 |
"Med": 23.29757774645036
|
|
|
|
| 4033 |
}
|
| 4034 |
}
|
| 4035 |
},
|
| 4036 |
+
"MiniMax-M2 (230B A10B)": {
|
| 4037 |
+
"NUM_GPUS": -1,
|
| 4038 |
"Overall": {
|
| 4039 |
"Time to Answer": {
|
| 4040 |
"Min": 0,
|
| 4041 |
+
"Max": 0,
|
| 4042 |
+
"Med": 0.0
|
| 4043 |
},
|
| 4044 |
"Latency": {
|
| 4045 |
"Min": 0,
|
| 4046 |
+
"Max": 0,
|
| 4047 |
+
"Med": 0.0
|
| 4048 |
},
|
| 4049 |
"Speed": {
|
| 4050 |
"Min": -1.0,
|
| 4051 |
+
"Max": -1.0,
|
| 4052 |
+
"Med": -1.0
|
| 4053 |
}
|
| 4054 |
},
|
| 4055 |
"Content Generation": {
|
| 4056 |
"Time to Answer": {
|
| 4057 |
+
"Min": 0,
|
| 4058 |
+
"Max": 0,
|
| 4059 |
+
"Med": 0.0
|
| 4060 |
},
|
| 4061 |
"Latency": {
|
| 4062 |
+
"Min": 0,
|
| 4063 |
+
"Max": 0,
|
| 4064 |
+
"Med": 0.0
|
| 4065 |
},
|
| 4066 |
"Speed": {
|
| 4067 |
+
"Min": -1.0,
|
| 4068 |
+
"Max": -1.0,
|
| 4069 |
+
"Med": -1.0
|
| 4070 |
}
|
| 4071 |
},
|
| 4072 |
"Editing": {
|
| 4073 |
"Time to Answer": {
|
| 4074 |
"Min": 0,
|
| 4075 |
+
"Max": 0,
|
| 4076 |
+
"Med": 0.0
|
| 4077 |
},
|
| 4078 |
"Latency": {
|
| 4079 |
"Min": 0,
|
| 4080 |
+
"Max": 0,
|
| 4081 |
+
"Med": 0.0
|
| 4082 |
},
|
| 4083 |
"Speed": {
|
| 4084 |
"Min": -1.0,
|
| 4085 |
+
"Max": -1.0,
|
| 4086 |
+
"Med": -1.0
|
| 4087 |
}
|
| 4088 |
},
|
| 4089 |
"Data Analysis": {
|
| 4090 |
"Time to Answer": {
|
| 4091 |
+
"Min": 0,
|
| 4092 |
+
"Max": 0,
|
| 4093 |
+
"Med": 0.0
|
| 4094 |
},
|
| 4095 |
"Latency": {
|
| 4096 |
+
"Min": 0,
|
| 4097 |
+
"Max": 0,
|
| 4098 |
+
"Med": 0.0
|
| 4099 |
},
|
| 4100 |
"Speed": {
|
| 4101 |
+
"Min": -1.0,
|
| 4102 |
+
"Max": -1.0,
|
| 4103 |
+
"Med": -1.0
|
| 4104 |
}
|
| 4105 |
},
|
| 4106 |
"Reasoning": {
|
| 4107 |
"Time to Answer": {
|
| 4108 |
+
"Min": 0,
|
| 4109 |
+
"Max": 0,
|
| 4110 |
+
"Med": 0.0
|
| 4111 |
},
|
| 4112 |
"Latency": {
|
| 4113 |
+
"Min": 0,
|
| 4114 |
+
"Max": 0,
|
| 4115 |
+
"Med": 0.0
|
| 4116 |
},
|
| 4117 |
"Speed": {
|
| 4118 |
+
"Min": -1.0,
|
| 4119 |
+
"Max": -1.0,
|
| 4120 |
+
"Med": -1.0
|
| 4121 |
}
|
| 4122 |
},
|
| 4123 |
"Hallucination": {
|
| 4124 |
"Time to Answer": {
|
| 4125 |
+
"Min": 0,
|
| 4126 |
+
"Max": 0,
|
| 4127 |
+
"Med": 0.0
|
| 4128 |
},
|
| 4129 |
"Latency": {
|
| 4130 |
+
"Min": 0,
|
| 4131 |
+
"Max": 0,
|
| 4132 |
+
"Med": 0.0
|
| 4133 |
+
},
|
| 4134 |
+
"Speed": {
|
| 4135 |
+
"Min": -1.0,
|
| 4136 |
+
"Max": -1.0,
|
| 4137 |
+
"Med": -1.0
|
| 4138 |
+
}
|
| 4139 |
+
},
|
| 4140 |
+
"Safety": {
|
| 4141 |
+
"Time to Answer": {
|
| 4142 |
+
"Min": 0,
|
| 4143 |
+
"Max": 0,
|
| 4144 |
+
"Med": 0.0
|
| 4145 |
+
},
|
| 4146 |
+
"Latency": {
|
| 4147 |
+
"Min": 0,
|
| 4148 |
+
"Max": 0,
|
| 4149 |
+
"Med": 0.0
|
| 4150 |
+
},
|
| 4151 |
+
"Speed": {
|
| 4152 |
+
"Min": -1.0,
|
| 4153 |
+
"Max": -1.0,
|
| 4154 |
+
"Med": -1.0
|
| 4155 |
+
}
|
| 4156 |
+
},
|
| 4157 |
+
"Repetition": {
|
| 4158 |
+
"Time to Answer": {
|
| 4159 |
+
"Min": 0,
|
| 4160 |
+
"Max": 0,
|
| 4161 |
+
"Med": 0.0
|
| 4162 |
+
},
|
| 4163 |
+
"Latency": {
|
| 4164 |
+
"Min": 0,
|
| 4165 |
+
"Max": 0,
|
| 4166 |
+
"Med": 0.0
|
| 4167 |
+
},
|
| 4168 |
+
"Speed": {
|
| 4169 |
+
"Min": -1.0,
|
| 4170 |
+
"Max": -1.0,
|
| 4171 |
+
"Med": -1.0
|
| 4172 |
+
}
|
| 4173 |
+
},
|
| 4174 |
+
"Summarization": {
|
| 4175 |
+
"Time to Answer": {
|
| 4176 |
+
"Min": 0,
|
| 4177 |
+
"Max": 0,
|
| 4178 |
+
"Med": 0.0
|
| 4179 |
+
},
|
| 4180 |
+
"Latency": {
|
| 4181 |
+
"Min": 0,
|
| 4182 |
+
"Max": 0,
|
| 4183 |
+
"Med": 0.0
|
| 4184 |
+
},
|
| 4185 |
+
"Speed": {
|
| 4186 |
+
"Min": -1.0,
|
| 4187 |
+
"Max": -1.0,
|
| 4188 |
+
"Med": -1.0
|
| 4189 |
+
}
|
| 4190 |
+
},
|
| 4191 |
+
"Translation": {
|
| 4192 |
+
"Time to Answer": {
|
| 4193 |
+
"Min": 0,
|
| 4194 |
+
"Max": 0,
|
| 4195 |
+
"Med": 0.0
|
| 4196 |
+
},
|
| 4197 |
+
"Latency": {
|
| 4198 |
+
"Min": 0,
|
| 4199 |
+
"Max": 0,
|
| 4200 |
+
"Med": 0.0
|
| 4201 |
+
},
|
| 4202 |
+
"Speed": {
|
| 4203 |
+
"Min": -1.0,
|
| 4204 |
+
"Max": -1.0,
|
| 4205 |
+
"Med": -1.0
|
| 4206 |
+
}
|
| 4207 |
+
},
|
| 4208 |
+
"Multi-Turn": {
|
| 4209 |
+
"Time to Answer": {
|
| 4210 |
+
"Min": 0,
|
| 4211 |
+
"Max": 0,
|
| 4212 |
+
"Med": 0.0
|
| 4213 |
+
},
|
| 4214 |
+
"Latency": {
|
| 4215 |
+
"Min": 0,
|
| 4216 |
+
"Max": 0,
|
| 4217 |
+
"Med": 0.0
|
| 4218 |
+
},
|
| 4219 |
+
"Speed": {
|
| 4220 |
+
"Min": -1.0,
|
| 4221 |
+
"Max": -1.0,
|
| 4222 |
+
"Med": -1.0
|
| 4223 |
+
}
|
| 4224 |
+
}
|
| 4225 |
+
},
|
| 4226 |
+
"gpt-oss-120B (Reasoning: medium)": {
|
| 4227 |
+
"NUM_GPUS": 8,
|
| 4228 |
+
"Overall": {
|
| 4229 |
+
"Time to Answer": {
|
| 4230 |
+
"Min": 0,
|
| 4231 |
+
"Max": 101.66583281847353,
|
| 4232 |
+
"Med": 7.694922740481965
|
| 4233 |
+
},
|
| 4234 |
+
"Latency": {
|
| 4235 |
+
"Min": 0,
|
| 4236 |
+
"Max": 108.71509218215942,
|
| 4237 |
+
"Med": 12.121336698532104
|
| 4238 |
+
},
|
| 4239 |
+
"Speed": {
|
| 4240 |
+
"Min": -1.0,
|
| 4241 |
+
"Max": 295.4744570001622,
|
| 4242 |
+
"Med": 103.31935460342275
|
| 4243 |
+
}
|
| 4244 |
+
},
|
| 4245 |
+
"Content Generation": {
|
| 4246 |
+
"Time to Answer": {
|
| 4247 |
+
"Min": 1.4268165264489516,
|
| 4248 |
+
"Max": 76.09343232158227,
|
| 4249 |
+
"Med": 5.27987206336147
|
| 4250 |
+
},
|
| 4251 |
+
"Latency": {
|
| 4252 |
+
"Min": 2.564422369003296,
|
| 4253 |
+
"Max": 77.78296256065369,
|
| 4254 |
+
"Med": 12.131241917610168
|
| 4255 |
+
},
|
| 4256 |
+
"Speed": {
|
| 4257 |
+
"Min": 39.854399049254106,
|
| 4258 |
+
"Max": 164.11560898062044,
|
| 4259 |
+
"Med": 102.26319280893972
|
| 4260 |
+
}
|
| 4261 |
+
},
|
| 4262 |
+
"Editing": {
|
| 4263 |
+
"Time to Answer": {
|
| 4264 |
+
"Min": 0,
|
| 4265 |
+
"Max": 34.97314937730854,
|
| 4266 |
+
"Med": 7.72154927398273
|
| 4267 |
+
},
|
| 4268 |
+
"Latency": {
|
| 4269 |
+
"Min": 0,
|
| 4270 |
+
"Max": 38.421292781829834,
|
| 4271 |
+
"Med": 10.624043703079224
|
| 4272 |
+
},
|
| 4273 |
+
"Speed": {
|
| 4274 |
+
"Min": -1.0,
|
| 4275 |
+
"Max": 295.4744570001622,
|
| 4276 |
+
"Med": 97.16836666526689
|
| 4277 |
+
}
|
| 4278 |
+
},
|
| 4279 |
+
"Data Analysis": {
|
| 4280 |
+
"Time to Answer": {
|
| 4281 |
+
"Min": 1.5500787364112005,
|
| 4282 |
+
"Max": 47.44580010794223,
|
| 4283 |
+
"Med": 7.938084126425333
|
| 4284 |
+
},
|
| 4285 |
+
"Latency": {
|
| 4286 |
+
"Min": 2.21130108833313,
|
| 4287 |
+
"Max": 48.52851939201355,
|
| 4288 |
+
"Med": 10.561246871948242
|
| 4289 |
+
},
|
| 4290 |
+
"Speed": {
|
| 4291 |
+
"Min": 33.67196833472543,
|
| 4292 |
+
"Max": 218.4742163852268,
|
| 4293 |
+
"Med": 111.7668417486227
|
| 4294 |
+
}
|
| 4295 |
+
},
|
| 4296 |
+
"Reasoning": {
|
| 4297 |
+
"Time to Answer": {
|
| 4298 |
+
"Min": 1.313997881221957,
|
| 4299 |
+
"Max": 71.11774356968237,
|
| 4300 |
+
"Med": 9.761283050834567
|
| 4301 |
+
},
|
| 4302 |
+
"Latency": {
|
| 4303 |
+
"Min": 2.4778506755828857,
|
| 4304 |
+
"Max": 77.51551747322083,
|
| 4305 |
+
"Med": 14.6117924451828
|
| 4306 |
+
},
|
| 4307 |
+
"Speed": {
|
| 4308 |
+
"Min": 52.34248336770566,
|
| 4309 |
+
"Max": 162.3515087876845,
|
| 4310 |
+
"Med": 121.07198234072999
|
| 4311 |
+
}
|
| 4312 |
+
},
|
| 4313 |
+
"Hallucination": {
|
| 4314 |
+
"Time to Answer": {
|
| 4315 |
+
"Min": 2.2739883409985806,
|
| 4316 |
+
"Max": 25.163233752980585,
|
| 4317 |
+
"Med": 7.324965414201975
|
| 4318 |
+
},
|
| 4319 |
+
"Latency": {
|
| 4320 |
+
"Min": 3.2160396575927734,
|
| 4321 |
+
"Max": 41.42578959465027,
|
| 4322 |
+
"Med": 13.071247458457947
|
| 4323 |
+
},
|
| 4324 |
+
"Speed": {
|
| 4325 |
"Min": 36.912669174553955,
|
| 4326 |
"Max": 207.27878278068064,
|
| 4327 |
"Med": 124.86484051787805
|
|
|
|
| 5173 |
}
|
| 5174 |
}
|
| 5175 |
},
|
| 5176 |
+
"Grok-4": {
|
| 5177 |
"NUM_GPUS": 0,
|
| 5178 |
"Overall": {
|
| 5179 |
"Time to Answer": {
|
|
|
|
| 5363 |
}
|
| 5364 |
}
|
| 5365 |
},
|
| 5366 |
+
"Claude 4 Opus (20250514) (think)": {
|
| 5367 |
"NUM_GPUS": 0,
|
| 5368 |
"Overall": {
|
| 5369 |
"Time to Answer": {
|
|
|
|
| 5553 |
}
|
| 5554 |
}
|
| 5555 |
},
|
| 5556 |
+
"Gemini 2.5 Pro": {
|
| 5557 |
+
"NUM_GPUS": 0,
|
| 5558 |
"Overall": {
|
| 5559 |
"Time to Answer": {
|
| 5560 |
+
"Min": 0,
|
| 5561 |
+
"Max": 0,
|
| 5562 |
+
"Med": 0.0
|
| 5563 |
},
|
| 5564 |
"Latency": {
|
| 5565 |
+
"Min": 0,
|
| 5566 |
+
"Max": 0,
|
| 5567 |
+
"Med": 0.0
|
| 5568 |
},
|
| 5569 |
"Speed": {
|
| 5570 |
+
"Min": 0.0,
|
| 5571 |
+
"Max": 0.0,
|
| 5572 |
+
"Med": 0.0
|
| 5573 |
}
|
| 5574 |
},
|
| 5575 |
"Content Generation": {
|
| 5576 |
"Time to Answer": {
|
| 5577 |
+
"Min": 0,
|
| 5578 |
+
"Max": 0,
|
| 5579 |
+
"Med": 0.0
|
| 5580 |
},
|
| 5581 |
"Latency": {
|
| 5582 |
+
"Min": 0,
|
| 5583 |
+
"Max": 0,
|
| 5584 |
+
"Med": 0.0
|
| 5585 |
},
|
| 5586 |
"Speed": {
|
| 5587 |
+
"Min": 0.0,
|
| 5588 |
+
"Max": 0.0,
|
| 5589 |
+
"Med": 0.0
|
| 5590 |
}
|
| 5591 |
},
|
| 5592 |
"Editing": {
|
| 5593 |
"Time to Answer": {
|
| 5594 |
+
"Min": 0,
|
| 5595 |
+
"Max": 0,
|
| 5596 |
+
"Med": 0.0
|
| 5597 |
},
|
| 5598 |
"Latency": {
|
| 5599 |
+
"Min": 0,
|
| 5600 |
+
"Max": 0,
|
| 5601 |
+
"Med": 0.0
|
| 5602 |
},
|
| 5603 |
"Speed": {
|
| 5604 |
+
"Min": 0.0,
|
| 5605 |
+
"Max": 0.0,
|
| 5606 |
+
"Med": 0.0
|
| 5607 |
}
|
| 5608 |
},
|
| 5609 |
"Data Analysis": {
|
| 5610 |
"Time to Answer": {
|
| 5611 |
+
"Min": 0,
|
| 5612 |
+
"Max": 0,
|
| 5613 |
+
"Med": 0.0
|
| 5614 |
},
|
| 5615 |
"Latency": {
|
| 5616 |
+
"Min": 0,
|
| 5617 |
+
"Max": 0,
|
| 5618 |
+
"Med": 0.0
|
| 5619 |
+
},
|
| 5620 |
+
"Speed": {
|
| 5621 |
+
"Min": 0.0,
|
| 5622 |
+
"Max": 0.0,
|
| 5623 |
+
"Med": 0.0
|
| 5624 |
+
}
|
| 5625 |
+
},
|
| 5626 |
+
"Reasoning": {
|
| 5627 |
+
"Time to Answer": {
|
| 5628 |
+
"Min": 0,
|
| 5629 |
+
"Max": 0,
|
| 5630 |
+
"Med": 0.0
|
| 5631 |
+
},
|
| 5632 |
+
"Latency": {
|
| 5633 |
+
"Min": 0,
|
| 5634 |
+
"Max": 0,
|
| 5635 |
+
"Med": 0.0
|
| 5636 |
+
},
|
| 5637 |
+
"Speed": {
|
| 5638 |
+
"Min": 0.0,
|
| 5639 |
+
"Max": 0.0,
|
| 5640 |
+
"Med": 0.0
|
| 5641 |
+
}
|
| 5642 |
+
},
|
| 5643 |
+
"Hallucination": {
|
| 5644 |
+
"Time to Answer": {
|
| 5645 |
+
"Min": 0,
|
| 5646 |
+
"Max": 0,
|
| 5647 |
+
"Med": 0.0
|
| 5648 |
+
},
|
| 5649 |
+
"Latency": {
|
| 5650 |
+
"Min": 0,
|
| 5651 |
+
"Max": 0,
|
| 5652 |
+
"Med": 0.0
|
| 5653 |
+
},
|
| 5654 |
+
"Speed": {
|
| 5655 |
+
"Min": 0.0,
|
| 5656 |
+
"Max": 0.0,
|
| 5657 |
+
"Med": 0.0
|
| 5658 |
+
}
|
| 5659 |
+
},
|
| 5660 |
+
"Safety": {
|
| 5661 |
+
"Time to Answer": {
|
| 5662 |
+
"Min": 0,
|
| 5663 |
+
"Max": 0,
|
| 5664 |
+
"Med": 0.0
|
| 5665 |
+
},
|
| 5666 |
+
"Latency": {
|
| 5667 |
+
"Min": 0,
|
| 5668 |
+
"Max": 0,
|
| 5669 |
+
"Med": 0.0
|
| 5670 |
+
},
|
| 5671 |
+
"Speed": {
|
| 5672 |
+
"Min": 0.0,
|
| 5673 |
+
"Max": 0.0,
|
| 5674 |
+
"Med": 0.0
|
| 5675 |
+
}
|
| 5676 |
+
},
|
| 5677 |
+
"Repetition": {
|
| 5678 |
+
"Time to Answer": {
|
| 5679 |
+
"Min": 0,
|
| 5680 |
+
"Max": 0,
|
| 5681 |
+
"Med": 0.0
|
| 5682 |
+
},
|
| 5683 |
+
"Latency": {
|
| 5684 |
+
"Min": 0,
|
| 5685 |
+
"Max": 0,
|
| 5686 |
+
"Med": 0.0
|
| 5687 |
+
},
|
| 5688 |
+
"Speed": {
|
| 5689 |
+
"Min": 0.0,
|
| 5690 |
+
"Max": 0.0,
|
| 5691 |
+
"Med": 0.0
|
| 5692 |
+
}
|
| 5693 |
+
},
|
| 5694 |
+
"Summarization": {
|
| 5695 |
+
"Time to Answer": {
|
| 5696 |
+
"Min": 0,
|
| 5697 |
+
"Max": 0,
|
| 5698 |
+
"Med": 0.0
|
| 5699 |
+
},
|
| 5700 |
+
"Latency": {
|
| 5701 |
+
"Min": 0,
|
| 5702 |
+
"Max": 0,
|
| 5703 |
+
"Med": 0.0
|
| 5704 |
+
},
|
| 5705 |
+
"Speed": {
|
| 5706 |
+
"Min": 0.0,
|
| 5707 |
+
"Max": 0.0,
|
| 5708 |
+
"Med": 0.0
|
| 5709 |
+
}
|
| 5710 |
+
},
|
| 5711 |
+
"Translation": {
|
| 5712 |
+
"Time to Answer": {
|
| 5713 |
+
"Min": 0,
|
| 5714 |
+
"Max": 0,
|
| 5715 |
+
"Med": 0.0
|
| 5716 |
+
},
|
| 5717 |
+
"Latency": {
|
| 5718 |
+
"Min": 0,
|
| 5719 |
+
"Max": 0,
|
| 5720 |
+
"Med": 0.0
|
| 5721 |
+
},
|
| 5722 |
+
"Speed": {
|
| 5723 |
+
"Min": 0.0,
|
| 5724 |
+
"Max": 0.0,
|
| 5725 |
+
"Med": 0.0
|
| 5726 |
+
}
|
| 5727 |
+
},
|
| 5728 |
+
"Multi-Turn": {
|
| 5729 |
+
"Time to Answer": {
|
| 5730 |
+
"Min": 0,
|
| 5731 |
+
"Max": 0,
|
| 5732 |
+
"Med": 0.0
|
| 5733 |
+
},
|
| 5734 |
+
"Latency": {
|
| 5735 |
+
"Min": 0,
|
| 5736 |
+
"Max": 0,
|
| 5737 |
+
"Med": 0.0
|
| 5738 |
+
},
|
| 5739 |
+
"Speed": {
|
| 5740 |
+
"Min": 0.0,
|
| 5741 |
+
"Max": 0.0,
|
| 5742 |
+
"Med": 0.0
|
| 5743 |
+
}
|
| 5744 |
+
}
|
| 5745 |
+
},
|
| 5746 |
+
"Tongyi DeepResearch 30B A3B": {
|
| 5747 |
+
"NUM_GPUS": 4,
|
| 5748 |
+
"Overall": {
|
| 5749 |
+
"Time to Answer": {
|
| 5750 |
+
"Min": 1.4505500793457031,
|
| 5751 |
+
"Max": 244.41708384257143,
|
| 5752 |
+
"Med": 45.23295979184195
|
| 5753 |
+
},
|
| 5754 |
+
"Latency": {
|
| 5755 |
+
"Min": 9.191470384597778,
|
| 5756 |
+
"Max": 749.16233086586,
|
| 5757 |
+
"Med": 52.387412667274475
|
| 5758 |
+
},
|
| 5759 |
+
"Speed": {
|
| 5760 |
+
"Min": 18.069738498345682,
|
| 5761 |
+
"Max": 122.37478932044478,
|
| 5762 |
+
"Med": 62.676624491545525
|
| 5763 |
+
}
|
| 5764 |
+
},
|
| 5765 |
+
"Content Generation": {
|
| 5766 |
+
"Time to Answer": {
|
| 5767 |
+
"Min": 4.802471643031882,
|
| 5768 |
+
"Max": 203.55154156596308,
|
| 5769 |
+
"Med": 41.43268650270611
|
| 5770 |
+
},
|
| 5771 |
+
"Latency": {
|
| 5772 |
+
"Min": 22.65742540359497,
|
| 5773 |
+
"Max": 738.4437143802643,
|
| 5774 |
+
"Med": 51.50689494609833
|
| 5775 |
+
},
|
| 5776 |
+
"Speed": {
|
| 5777 |
+
"Min": 23.70791793357093,
|
| 5778 |
+
"Max": 118.23891726695051,
|
| 5779 |
+
"Med": 61.95675692618596
|
| 5780 |
+
}
|
| 5781 |
+
},
|
| 5782 |
+
"Editing": {
|
| 5783 |
+
"Time to Answer": {
|
| 5784 |
+
"Min": 9.566574335098267,
|
| 5785 |
+
"Max": 106.69052745386706,
|
| 5786 |
+
"Med": 43.39687190468506
|
| 5787 |
+
},
|
| 5788 |
+
"Latency": {
|
| 5789 |
+
"Min": 11.078340530395508,
|
| 5790 |
+
"Max": 716.9541938304901,
|
| 5791 |
+
"Med": 48.262219190597534
|
| 5792 |
+
},
|
| 5793 |
+
"Speed": {
|
| 5794 |
+
"Min": 21.45800243159038,
|
| 5795 |
+
"Max": 110.85390192747607,
|
| 5796 |
+
"Med": 60.728454906690686
|
| 5797 |
+
}
|
| 5798 |
+
},
|
| 5799 |
+
"Data Analysis": {
|
| 5800 |
+
"Time to Answer": {
|
| 5801 |
+
"Min": 11.874105177737047,
|
| 5802 |
+
"Max": 231.70031813596688,
|
| 5803 |
+
"Med": 45.77187383012706
|
| 5804 |
+
},
|
| 5805 |
+
"Latency": {
|
| 5806 |
+
"Min": 22.660792589187622,
|
| 5807 |
+
"Max": 716.5539243221283,
|
| 5808 |
+
"Med": 51.057066202163696
|
| 5809 |
+
},
|
| 5810 |
+
"Speed": {
|
| 5811 |
"Min": 18.806653004549222,
|
| 5812 |
"Max": 113.17314697322944,
|
| 5813 |
"Med": 61.908515815844005
|
|
|
|
| 6503 |
}
|
| 6504 |
}
|
| 6505 |
},
|
| 6506 |
+
"GPT-5.1 (Reasoning: medium, verbosity: medium)": {
|
| 6507 |
+
"NUM_GPUS": -1,
|
| 6508 |
"Overall": {
|
| 6509 |
"Time to Answer": {
|
| 6510 |
+
"Min": [
|
| 6511 |
+
0
|
| 6512 |
+
],
|
| 6513 |
+
"Max": [
|
| 6514 |
+
0
|
| 6515 |
+
],
|
| 6516 |
+
"Med": 0.0
|
| 6517 |
},
|
| 6518 |
"Latency": {
|
| 6519 |
+
"Min": 1.4775474071502686,
|
| 6520 |
+
"Max": 747.6701903343201,
|
| 6521 |
+
"Med": 11.673096776008606
|
| 6522 |
},
|
| 6523 |
"Speed": {
|
| 6524 |
+
"Min": 0,
|
| 6525 |
+
"Max": 0,
|
| 6526 |
+
"Med": 0.0
|
| 6527 |
}
|
| 6528 |
},
|
| 6529 |
"Content Generation": {
|
| 6530 |
"Time to Answer": {
|
| 6531 |
+
"Min": [
|
| 6532 |
+
0
|
| 6533 |
+
],
|
| 6534 |
+
"Max": [
|
| 6535 |
+
0
|
| 6536 |
+
],
|
| 6537 |
+
"Med": 0.0
|
| 6538 |
},
|
| 6539 |
"Latency": {
|
| 6540 |
+
"Min": 3.002990961074829,
|
| 6541 |
+
"Max": 421.94651198387146,
|
| 6542 |
+
"Med": 16.867193579673767
|
| 6543 |
},
|
| 6544 |
"Speed": {
|
| 6545 |
+
"Min": 0,
|
| 6546 |
+
"Max": 0,
|
| 6547 |
+
"Med": 0.0
|
| 6548 |
}
|
| 6549 |
},
|
| 6550 |
"Editing": {
|
| 6551 |
"Time to Answer": {
|
| 6552 |
+
"Min": [
|
| 6553 |
+
0
|
| 6554 |
+
],
|
| 6555 |
+
"Max": [
|
| 6556 |
+
0
|
| 6557 |
+
],
|
| 6558 |
+
"Med": 0.0
|
| 6559 |
},
|
| 6560 |
"Latency": {
|
| 6561 |
+
"Min": 1.4775474071502686,
|
| 6562 |
+
"Max": 82.63027286529541,
|
| 6563 |
+
"Med": 5.669041872024536
|
| 6564 |
},
|
| 6565 |
"Speed": {
|
| 6566 |
+
"Min": 0,
|
| 6567 |
+
"Max": 0,
|
| 6568 |
+
"Med": 0.0
|
| 6569 |
}
|
| 6570 |
},
|
| 6571 |
"Data Analysis": {
|
| 6572 |
"Time to Answer": {
|
| 6573 |
+
"Min": [
|
| 6574 |
+
0
|
| 6575 |
+
],
|
| 6576 |
+
"Max": [
|
| 6577 |
+
0
|
| 6578 |
+
],
|
| 6579 |
+
"Med": 0.0
|
| 6580 |
},
|
| 6581 |
"Latency": {
|
| 6582 |
+
"Min": 1.664919137954712,
|
| 6583 |
+
"Max": 358.5435652732849,
|
| 6584 |
+
"Med": 7.0718772411346436
|
| 6585 |
},
|
| 6586 |
"Speed": {
|
| 6587 |
+
"Min": 0,
|
| 6588 |
+
"Max": 0,
|
| 6589 |
+
"Med": 0.0
|
| 6590 |
}
|
| 6591 |
},
|
| 6592 |
"Reasoning": {
|
| 6593 |
"Time to Answer": {
|
| 6594 |
+
"Min": [
|
| 6595 |
+
0
|
| 6596 |
+
],
|
| 6597 |
+
"Max": [
|
| 6598 |
+
0
|
| 6599 |
+
],
|
| 6600 |
+
"Med": 0.0
|
| 6601 |
},
|
| 6602 |
"Latency": {
|
| 6603 |
+
"Min": 1.82639479637146,
|
| 6604 |
+
"Max": 747.6701903343201,
|
| 6605 |
+
"Med": 12.99689531326294
|
| 6606 |
},
|
| 6607 |
"Speed": {
|
| 6608 |
+
"Min": 0,
|
| 6609 |
+
"Max": 0,
|
| 6610 |
+
"Med": 0.0
|
| 6611 |
}
|
| 6612 |
},
|
| 6613 |
"Hallucination": {
|
| 6614 |
"Time to Answer": {
|
| 6615 |
+
"Min": [
|
| 6616 |
+
0
|
| 6617 |
+
],
|
| 6618 |
+
"Max": [
|
| 6619 |
+
0
|
| 6620 |
+
],
|
| 6621 |
+
"Med": 0.0
|
| 6622 |
},
|
| 6623 |
"Latency": {
|
| 6624 |
+
"Min": 2.273186445236206,
|
| 6625 |
+
"Max": 115.95099306106567,
|
| 6626 |
+
"Med": 22.67124307155609
|
| 6627 |
},
|
| 6628 |
"Speed": {
|
| 6629 |
+
"Min": 0,
|
| 6630 |
+
"Max": 0,
|
| 6631 |
+
"Med": 0.0
|
| 6632 |
}
|
| 6633 |
},
|
| 6634 |
"Safety": {
|
| 6635 |
"Time to Answer": {
|
| 6636 |
+
"Min": [
|
| 6637 |
+
0
|
| 6638 |
+
],
|
| 6639 |
+
"Max": [
|
| 6640 |
+
0
|
| 6641 |
+
],
|
| 6642 |
+
"Med": 0.0
|
| 6643 |
},
|
| 6644 |
"Latency": {
|
| 6645 |
+
"Min": 3.3134090900421143,
|
| 6646 |
+
"Max": 140.77250027656555,
|
| 6647 |
+
"Med": 18.410767793655396
|
| 6648 |
},
|
| 6649 |
"Speed": {
|
| 6650 |
+
"Min": 0,
|
| 6651 |
+
"Max": 0,
|
| 6652 |
+
"Med": 0.0
|
| 6653 |
}
|
| 6654 |
},
|
| 6655 |
"Repetition": {
|
| 6656 |
"Time to Answer": {
|
| 6657 |
+
"Min": [
|
| 6658 |
+
0
|
| 6659 |
+
],
|
| 6660 |
+
"Max": [
|
| 6661 |
+
0
|
| 6662 |
+
],
|
| 6663 |
+
"Med": 0.0
|
| 6664 |
},
|
| 6665 |
"Latency": {
|
| 6666 |
+
"Min": 2.3753366470336914,
|
| 6667 |
+
"Max": 428.47876358032227,
|
| 6668 |
+
"Med": 19.905622720718384
|
| 6669 |
},
|
| 6670 |
"Speed": {
|
| 6671 |
+
"Min": 0,
|
| 6672 |
+
"Max": 0,
|
| 6673 |
+
"Med": 0.0
|
| 6674 |
}
|
| 6675 |
},
|
| 6676 |
"Summarization": {
|
| 6677 |
"Time to Answer": {
|
| 6678 |
+
"Min": [
|
| 6679 |
+
0
|
| 6680 |
+
],
|
| 6681 |
+
"Max": [
|
| 6682 |
+
0
|
| 6683 |
+
],
|
| 6684 |
+
"Med": 0.0
|
| 6685 |
},
|
| 6686 |
"Latency": {
|
| 6687 |
+
"Min": 2.2187492847442627,
|
| 6688 |
+
"Max": 126.85083556175232,
|
| 6689 |
+
"Med": 5.20970344543457
|
| 6690 |
},
|
| 6691 |
"Speed": {
|
| 6692 |
+
"Min": 0,
|
| 6693 |
+
"Max": 0,
|
| 6694 |
+
"Med": 0.0
|
| 6695 |
}
|
| 6696 |
},
|
| 6697 |
"Translation": {
|
| 6698 |
"Time to Answer": {
|
| 6699 |
+
"Min": [
|
| 6700 |
+
0
|
| 6701 |
+
],
|
| 6702 |
+
"Max": [
|
| 6703 |
+
0
|
| 6704 |
+
],
|
| 6705 |
+
"Med": 0.0
|
| 6706 |
},
|
| 6707 |
"Latency": {
|
| 6708 |
+
"Min": 2.0158095359802246,
|
| 6709 |
+
"Max": 64.36819744110107,
|
| 6710 |
+
"Med": 9.735138773918152
|
| 6711 |
},
|
| 6712 |
"Speed": {
|
| 6713 |
+
"Min": 0,
|
| 6714 |
+
"Max": 0,
|
| 6715 |
+
"Med": 0.0
|
| 6716 |
}
|
| 6717 |
},
|
| 6718 |
"Multi-Turn": {
|
| 6719 |
"Time to Answer": {
|
| 6720 |
+
"Min": [
|
| 6721 |
+
0
|
| 6722 |
+
],
|
| 6723 |
+
"Max": [
|
| 6724 |
+
0
|
| 6725 |
+
],
|
| 6726 |
+
"Med": 0.0
|
| 6727 |
},
|
| 6728 |
"Latency": {
|
| 6729 |
+
"Min": 4.493000507354736,
|
| 6730 |
+
"Max": 501.9931924343109,
|
| 6731 |
+
"Med": 38.35947251319885
|
| 6732 |
},
|
| 6733 |
"Speed": {
|
| 6734 |
+
"Min": 0,
|
| 6735 |
+
"Max": 0,
|
| 6736 |
+
"Med": 0.0
|
| 6737 |
}
|
| 6738 |
}
|
| 6739 |
},
|
| 6740 |
+
"KAT Dev 72B Exp": {
|
| 6741 |
+
"NUM_GPUS": 8,
|
| 6742 |
"Overall": {
|
| 6743 |
"Time to Answer": {
|
| 6744 |
"Min": 0,
|
| 6745 |
+
"Max": 1.8852267265319824,
|
| 6746 |
+
"Med": 0.06221652030944824
|
| 6747 |
},
|
| 6748 |
"Latency": {
|
| 6749 |
"Min": 0,
|
| 6750 |
+
"Max": 1739.6013979911804,
|
| 6751 |
+
"Med": 8.492375493049622
|
| 6752 |
},
|
| 6753 |
"Speed": {
|
| 6754 |
+
"Min": 11.841053492664015,
|
| 6755 |
+
"Max": 179.6668529421545,
|
| 6756 |
+
"Med": 50.601864763867184
|
| 6757 |
}
|
| 6758 |
},
|
| 6759 |
"Content Generation": {
|
| 6760 |
"Time to Answer": {
|
| 6761 |
+
"Min": 0.05644536018371582,
|
| 6762 |
+
"Max": 1.8852267265319824,
|
| 6763 |
+
"Med": 0.06070876121520996
|
| 6764 |
},
|
| 6765 |
"Latency": {
|
| 6766 |
+
"Min": 0.5495977401733398,
|
| 6767 |
+
"Max": 1734.7112760543823,
|
| 6768 |
+
"Med": 11.42176365852356
|
| 6769 |
},
|
| 6770 |
"Speed": {
|
| 6771 |
+
"Min": 33.34421066358906,
|
| 6772 |
+
"Max": 61.99760541627945,
|
| 6773 |
+
"Med": 51.61603722996896
|
| 6774 |
}
|
| 6775 |
},
|
| 6776 |
"Editing": {
|
| 6777 |
"Time to Answer": {
|
| 6778 |
+
"Min": 0.056221723556518555,
|
| 6779 |
+
"Max": 0.21474575996398926,
|
| 6780 |
+
"Med": 0.06082558631896973
|
| 6781 |
},
|
| 6782 |
"Latency": {
|
| 6783 |
+
"Min": 0.2646908760070801,
|
| 6784 |
+
"Max": 1595.0533018112183,
|
| 6785 |
+
"Med": 4.54656982421875
|
| 6786 |
},
|
| 6787 |
"Speed": {
|
| 6788 |
+
"Min": 33.0970793748843,
|
| 6789 |
+
"Max": 67.8011225008837,
|
| 6790 |
+
"Med": 49.877649602198524
|
| 6791 |
}
|
| 6792 |
},
|
| 6793 |
"Data Analysis": {
|
| 6794 |
"Time to Answer": {
|
| 6795 |
+
"Min": 0.057355642318725586,
|
| 6796 |
+
"Max": 1.3980965614318848,
|
| 6797 |
+
"Med": 0.10868549346923828
|
| 6798 |
},
|
| 6799 |
"Latency": {
|
| 6800 |
+
"Min": 0.4547910690307617,
|
| 6801 |
+
"Max": 343.81701016426086,
|
| 6802 |
+
"Med": 6.172606706619263
|
| 6803 |
},
|
| 6804 |
"Speed": {
|
| 6805 |
+
"Min": 33.23552955310353,
|
| 6806 |
+
"Max": 61.68261581762401,
|
| 6807 |
+
"Med": 49.023444483063024
|
| 6808 |
}
|
| 6809 |
},
|
| 6810 |
"Reasoning": {
|
| 6811 |
"Time to Answer": {
|
| 6812 |
+
"Min": 0.05635571479797363,
|
| 6813 |
+
"Max": 0.11922383308410645,
|
| 6814 |
+
"Med": 0.06026041507720947
|
| 6815 |
},
|
| 6816 |
"Latency": {
|
| 6817 |
+
"Min": 0.3007025718688965,
|
| 6818 |
+
"Max": 1650.0614280700684,
|
| 6819 |
+
"Med": 10.36361300945282
|
| 6820 |
},
|
| 6821 |
"Speed": {
|
| 6822 |
+
"Min": 35.11266626219754,
|
| 6823 |
+
"Max": 61.64151659547226,
|
| 6824 |
+
"Med": 48.08432696922458
|
| 6825 |
}
|
| 6826 |
},
|
| 6827 |
"Hallucination": {
|
| 6828 |
"Time to Answer": {
|
| 6829 |
+
"Min": 0.05710172653198242,
|
| 6830 |
+
"Max": 0.2662782669067383,
|
| 6831 |
+
"Med": 0.06034708023071289
|
| 6832 |
},
|
| 6833 |
"Latency": {
|
| 6834 |
+
"Min": 0.4679543972015381,
|
| 6835 |
+
"Max": 1617.9812409877777,
|
| 6836 |
+
"Med": 8.860660910606384
|
| 6837 |
},
|
| 6838 |
"Speed": {
|
| 6839 |
+
"Min": 34.20033736500205,
|
| 6840 |
+
"Max": 63.47312348668281,
|
| 6841 |
+
"Med": 49.06857210020506
|
| 6842 |
}
|
| 6843 |
},
|
| 6844 |
"Safety": {
|
| 6845 |
"Time to Answer": {
|
| 6846 |
+
"Min": 0.05749773979187012,
|
| 6847 |
+
"Max": 0.1197052001953125,
|
| 6848 |
+
"Med": 0.06013894081115723
|
| 6849 |
},
|
| 6850 |
"Latency": {
|
| 6851 |
+
"Min": 0.4509849548339844,
|
| 6852 |
+
"Max": 1738.2601640224457,
|
| 6853 |
+
"Med": 7.764802932739258
|
| 6854 |
},
|
| 6855 |
"Speed": {
|
| 6856 |
+
"Min": 31.161307774126723,
|
| 6857 |
+
"Max": 62.96165283098918,
|
| 6858 |
+
"Med": 46.6376538248078
|
| 6859 |
}
|
| 6860 |
},
|
| 6861 |
"Repetition": {
|
| 6862 |
"Time to Answer": {
|
| 6863 |
+
"Min": 0.05688214302062988,
|
| 6864 |
+
"Max": 0.11974930763244629,
|
| 6865 |
+
"Med": 0.05986011028289795
|
| 6866 |
},
|
| 6867 |
"Latency": {
|
| 6868 |
+
"Min": 1.6353342533111572,
|
| 6869 |
+
"Max": 1736.6408779621124,
|
| 6870 |
+
"Med": 9.356394052505493
|
| 6871 |
},
|
| 6872 |
"Speed": {
|
| 6873 |
+
"Min": 33.02125276478502,
|
| 6874 |
+
"Max": 61.19217468044336,
|
| 6875 |
+
"Med": 46.27108150127529
|
| 6876 |
}
|
| 6877 |
},
|
| 6878 |
"Summarization": {
|
| 6879 |
"Time to Answer": {
|
| 6880 |
+
"Min": 0.05743670463562012,
|
| 6881 |
+
"Max": 0.4117448329925537,
|
| 6882 |
+
"Med": 0.1098024845123291
|
| 6883 |
},
|
| 6884 |
"Latency": {
|
| 6885 |
+
"Min": 0.989130973815918,
|
| 6886 |
+
"Max": 1565.9191603660583,
|
| 6887 |
+
"Med": 6.263204216957092
|
| 6888 |
},
|
| 6889 |
"Speed": {
|
| 6890 |
+
"Min": 33.11188589504452,
|
| 6891 |
+
"Max": 57.96248782433984,
|
| 6892 |
+
"Med": 49.54728638934513
|
| 6893 |
}
|
| 6894 |
},
|
| 6895 |
"Translation": {
|
| 6896 |
"Time to Answer": {
|
| 6897 |
+
"Min": 0.05727648735046387,
|
| 6898 |
+
"Max": 0.3603339195251465,
|
| 6899 |
+
"Med": 0.06304633617401123
|
| 6900 |
},
|
| 6901 |
"Latency": {
|
| 6902 |
+
"Min": 0.2665116786956787,
|
| 6903 |
+
"Max": 1739.6013979911804,
|
| 6904 |
+
"Med": 7.2887866497039795
|
| 6905 |
},
|
| 6906 |
"Speed": {
|
| 6907 |
+
"Min": 33.701689123361014,
|
| 6908 |
+
"Max": 61.86641373663972,
|
| 6909 |
+
"Med": 50.14262477886369
|
| 6910 |
}
|
| 6911 |
},
|
| 6912 |
"Multi-Turn": {
|
| 6913 |
"Time to Answer": {
|
| 6914 |
"Min": 0,
|
| 6915 |
+
"Max": 1.4983344078063965,
|
| 6916 |
+
"Med": 0.2324150800704956
|
| 6917 |
},
|
| 6918 |
"Latency": {
|
| 6919 |
"Min": 0,
|
| 6920 |
+
"Max": 888.0258376598358,
|
| 6921 |
+
"Med": 21.200571298599243
|
| 6922 |
},
|
| 6923 |
"Speed": {
|
| 6924 |
+
"Min": 11.841053492664015,
|
| 6925 |
+
"Max": 179.6668529421545,
|
| 6926 |
+
"Med": 52.505335801448915
|
| 6927 |
}
|
| 6928 |
}
|
| 6929 |
},
|
| 6930 |
+
"gpt-oss-20B (Reasoning: medium)": {
|
| 6931 |
+
"NUM_GPUS": 4,
|
| 6932 |
+
"Overall": {
|
| 6933 |
+
"Time to Answer": {
|
| 6934 |
+
"Min": 2.0265204472169556,
|
| 6935 |
+
"Max": 129.33762687935325,
|
| 6936 |
+
"Med": 26.04652036871504
|
| 6937 |
+
},
|
| 6938 |
+
"Latency": {
|
| 6939 |
+
"Min": 7.263976097106934,
|
| 6940 |
+
"Max": 138.10640954971313,
|
| 6941 |
+
"Med": 29.767700791358948
|
| 6942 |
+
},
|
| 6943 |
+
"Speed": {
|
| 6944 |
+
"Min": 5.317348253806318,
|
| 6945 |
+
"Max": 369.6802851223203,
|
| 6946 |
+
"Med": 108.53633696847938
|
| 6947 |
+
}
|
| 6948 |
+
},
|
| 6949 |
+
"Content Generation": {
|
| 6950 |
+
"Time to Answer": {
|
| 6951 |
+
"Min": 2.919738582967988,
|
| 6952 |
+
"Max": 69.87850048414344,
|
| 6953 |
+
"Med": 25.258961631303542
|
| 6954 |
+
},
|
| 6955 |
+
"Latency": {
|
| 6956 |
+
"Min": 7.263976097106934,
|
| 6957 |
+
"Max": 73.72067332267761,
|
| 6958 |
+
"Med": 29.7125141620636
|
| 6959 |
+
},
|
| 6960 |
+
"Speed": {
|
| 6961 |
+
"Min": 13.604860508942371,
|
| 6962 |
+
"Max": 224.07024522186745,
|
| 6963 |
+
"Med": 124.14591263963385
|
| 6964 |
+
}
|
| 6965 |
+
},
|
| 6966 |
+
"Editing": {
|
| 6967 |
+
"Time to Answer": {
|
| 6968 |
+
"Min": 2.0265204472169556,
|
| 6969 |
+
"Max": 49.25446497234138,
|
| 6970 |
+
"Med": 25.30023380826225
|
| 6971 |
+
},
|
| 6972 |
+
"Latency": {
|
| 6973 |
+
"Min": 7.319023847579956,
|
| 6974 |
+
"Max": 56.10624122619629,
|
| 6975 |
+
"Med": 27.072497606277466
|
| 6976 |
+
},
|
| 6977 |
+
"Speed": {
|
| 6978 |
+
"Min": 12.140381961027476,
|
| 6979 |
+
"Max": 250.484990902761,
|
| 6980 |
+
"Med": 104.93834609385715
|
| 6981 |
+
}
|
| 6982 |
+
},
|
| 6983 |
+
"Data Analysis": {
|
| 6984 |
+
"Time to Answer": {
|
| 6985 |
+
"Min": 3.9151465271321517,
|
| 6986 |
+
"Max": 75.99197716704057,
|
| 6987 |
+
"Med": 26.328562295325447
|
| 6988 |
+
},
|
| 6989 |
+
"Latency": {
|
| 6990 |
+
"Min": 7.286376476287842,
|
| 6991 |
+
"Max": 84.5227108001709,
|
| 6992 |
+
"Med": 28.301609992980957
|
| 6993 |
+
},
|
| 6994 |
+
"Speed": {
|
| 6995 |
+
"Min": 11.70440500661996,
|
| 6996 |
+
"Max": 306.1703155318347,
|
| 6997 |
+
"Med": 113.21800589706349
|
| 6998 |
+
}
|
| 6999 |
+
},
|
| 7000 |
+
"Reasoning": {
|
| 7001 |
+
"Time to Answer": {
|
| 7002 |
+
"Min": 8.324975468895651,
|
| 7003 |
+
"Max": 129.33762687935325,
|
| 7004 |
+
"Med": 27.582642474460418
|
| 7005 |
+
},
|
| 7006 |
+
"Latency": {
|
| 7007 |
+
"Min": 18.050434589385986,
|
| 7008 |
+
"Max": 138.10640954971313,
|
| 7009 |
+
"Med": 32.32542634010315
|
| 7010 |
+
},
|
| 7011 |
+
"Speed": {
|
| 7012 |
+
"Min": 17.82180729362148,
|
| 7013 |
+
"Max": 207.91746863187615,
|
| 7014 |
+
"Med": 106.44936231341633
|
| 7015 |
+
}
|
| 7016 |
+
},
|
| 7017 |
+
"Hallucination": {
|
| 7018 |
+
"Time to Answer": {
|
| 7019 |
+
"Min": 5.195440284614488,
|
| 7020 |
+
"Max": 61.22046760794428,
|
| 7021 |
+
"Med": 25.105323415675343
|
| 7022 |
+
},
|
| 7023 |
+
"Latency": {
|
| 7024 |
+
"Min": 7.313647270202637,
|
| 7025 |
+
"Max": 63.91348838806152,
|
| 7026 |
+
"Med": 32.319284319877625
|
| 7027 |
+
},
|
| 7028 |
+
"Speed": {
|
| 7029 |
+
"Min": 18.438031290688347,
|
| 7030 |
+
"Max": 226.50662559152707,
|
| 7031 |
+
"Med": 110.66899398987842
|
| 7032 |
+
}
|
| 7033 |
+
},
|
| 7034 |
+
"Safety": {
|
| 7035 |
+
"Time to Answer": {
|
| 7036 |
+
"Min": 4.932410193462053,
|
| 7037 |
+
"Max": 47.165975079516905,
|
| 7038 |
+
"Med": 23.45146352177868
|
| 7039 |
+
},
|
| 7040 |
+
"Latency": {
|
| 7041 |
+
"Min": 7.2830750942230225,
|
| 7042 |
+
"Max": 53.09182548522949,
|
| 7043 |
+
"Med": 24.52879786491394
|
| 7044 |
+
},
|
| 7045 |
+
"Speed": {
|
| 7046 |
+
"Min": 5.317348253806318,
|
| 7047 |
+
"Max": 250.7582211510182,
|
| 7048 |
+
"Med": 90.91284402754488
|
| 7049 |
+
}
|
| 7050 |
+
},
|
| 7051 |
+
"Repetition": {
|
| 7052 |
+
"Time to Answer": {
|
| 7053 |
+
"Min": 2.557051893849964,
|
| 7054 |
+
"Max": 120.32689256267814,
|
| 7055 |
+
"Med": 28.832852398544777
|
| 7056 |
+
},
|
| 7057 |
+
"Latency": {
|
| 7058 |
+
"Min": 20.129476308822632,
|
| 7059 |
+
"Max": 125.98315095901489,
|
| 7060 |
+
"Med": 33.87077188491821
|
| 7061 |
+
},
|
| 7062 |
+
"Speed": {
|
| 7063 |
+
"Min": 27.080736058951963,
|
| 7064 |
+
"Max": 266.27309790215196,
|
| 7065 |
+
"Med": 120.19876605631327
|
| 7066 |
+
}
|
| 7067 |
+
},
|
| 7068 |
+
"Summarization": {
|
| 7069 |
+
"Time to Answer": {
|
| 7070 |
+
"Min": 2.4394841513682888,
|
| 7071 |
+
"Max": 113.86724343465069,
|
| 7072 |
+
"Med": 23.029374821644574
|
| 7073 |
+
},
|
| 7074 |
+
"Latency": {
|
| 7075 |
+
"Min": 13.429885149002075,
|
| 7076 |
+
"Max": 115.32083773612976,
|
| 7077 |
+
"Med": 25.605836629867554
|
| 7078 |
+
},
|
| 7079 |
+
"Speed": {
|
| 7080 |
+
"Min": 10.53590514256035,
|
| 7081 |
+
"Max": 304.7862966713593,
|
| 7082 |
+
"Med": 101.49503613110383
|
| 7083 |
+
}
|
| 7084 |
+
},
|
| 7085 |
+
"Translation": {
|
| 7086 |
+
"Time to Answer": {
|
| 7087 |
+
"Min": 4.240170876932201,
|
| 7088 |
+
"Max": 72.23964902074354,
|
| 7089 |
+
"Med": 26.80100677708995
|
| 7090 |
+
},
|
| 7091 |
+
"Latency": {
|
| 7092 |
+
"Min": 17.525670528411865,
|
| 7093 |
+
"Max": 115.61775875091553,
|
| 7094 |
+
"Med": 30.075977206230164
|
| 7095 |
+
},
|
| 7096 |
+
"Speed": {
|
| 7097 |
+
"Min": 22.395176057735203,
|
| 7098 |
+
"Max": 369.6802851223203,
|
| 7099 |
+
"Med": 122.92398147980118
|
| 7100 |
+
}
|
| 7101 |
+
},
|
| 7102 |
+
"Multi-Turn": {
|
| 7103 |
+
"Time to Answer": {
|
| 7104 |
+
"Min": 9.995788375397986,
|
| 7105 |
+
"Max": 91.21509669950703,
|
| 7106 |
+
"Med": 39.29442425858453
|
| 7107 |
+
},
|
| 7108 |
+
"Latency": {
|
| 7109 |
+
"Min": 16.48517942428589,
|
| 7110 |
+
"Max": 112.05223345756531,
|
| 7111 |
+
"Med": 66.7337509393692
|
| 7112 |
+
},
|
| 7113 |
+
"Speed": {
|
| 7114 |
+
"Min": 21.53344632125304,
|
| 7115 |
+
"Max": 273.4009542241851,
|
| 7116 |
+
"Med": 87.97283614240237
|
| 7117 |
+
}
|
| 7118 |
+
}
|
| 7119 |
+
},
|
| 7120 |
+
"o3-pro (Reasoning: medium)": {
|
| 7121 |
"NUM_GPUS": 0,
|
| 7122 |
"Overall": {
|
| 7123 |
"Time to Answer": {
|