diff --git "a/src/data/open/time_data.json" "b/src/data/open/time_data.json" deleted file mode 100644--- "a/src/data/open/time_data.json" +++ /dev/null @@ -1,9020 +0,0 @@ -{ - "Olmo 3 32B Think": { - "NUM_GPUS": 4, - "Overall": { - "Time to Answer": { - "Min": 0.04750800132751465, - "Max": 662.4548862211922, - "Med": 60.18788400716624 - }, - "Latency": { - "Min": 4.962059259414673, - "Max": 1685.2101354599, - "Med": 77.51256728172302 - }, - "Speed": { - "Min": 27.866160798473338, - "Max": 61.32207413470597, - "Med": 44.30514641537086 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 9.247790738980477, - "Max": 535.0928019830272, - "Med": 54.36686619573619 - }, - "Latency": { - "Min": 15.104989528656006, - "Max": 1151.913678407669, - "Med": 73.72976446151733 - }, - "Speed": { - "Min": 28.739949330273706, - "Max": 56.69105335090586, - "Med": 44.640501961119014 - } - }, - "Editing": { - "Time to Answer": { - "Min": 9.188008042039543, - "Max": 332.63859022997735, - "Med": 53.86499203972291 - }, - "Latency": { - "Min": 12.425836563110352, - "Max": 373.80425238609314, - "Med": 64.4289436340332 - }, - "Speed": { - "Min": 29.70057087539234, - "Max": 56.003630745295354, - "Med": 44.42532373669283 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 6.513707979240609, - "Max": 662.4548862211922, - "Med": 50.387367917383386 - }, - "Latency": { - "Min": 8.310109853744507, - "Max": 688.9015896320343, - "Med": 60.394060373306274 - }, - "Speed": { - "Min": 27.866160798473338, - "Max": 55.563448472039894, - "Med": 44.31595511727765 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 11.777561432543752, - "Max": 650.9476703300404, - "Med": 77.94728694034356 - }, - "Latency": { - "Min": 16.024362087249756, - "Max": 668.085782289505, - "Med": 88.89124500751495 - }, - "Speed": { - "Min": 31.092492474628955, - "Max": 50.6252779439028, - "Med": 44.27561038703696 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 0.04750800132751465, - "Max": 244.72053700062895, - "Med": 31.597100689212525 - }, - "Latency": { - "Min": 5.460567951202393, - "Max": 327.6873710155487, - "Med": 55.2690349817276 - }, - "Speed": { - "Min": 30.190577882159456, - "Max": 51.843879801237385, - "Med": 44.90390378879441 - } - }, - "Safety": { - "Time to Answer": { - "Min": 4.24024046375638, - "Max": 170.15584615909734, - "Med": 21.908013919514374 - }, - "Latency": { - "Min": 4.962059259414673, - "Max": 181.9493372440338, - "Med": 47.42558240890503 - }, - "Speed": { - "Min": 29.473423162196262, - "Max": 49.371541840187, - "Med": 43.69696811113183 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 22.753700505019047, - "Max": 561.4907359476722, - "Med": 99.96764908013014 - }, - "Latency": { - "Min": 29.67628502845764, - "Max": 1685.2101354599, - "Med": 124.29333961009979 - }, - "Speed": { - "Min": 29.66526343233663, - "Max": 51.682686076605144, - "Med": 43.84949112639627 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 6.697763084475674, - "Max": 486.0299537912613, - "Med": 42.976535539723244 - }, - "Latency": { - "Min": 8.786500215530396, - "Max": 489.2409255504608, - "Med": 51.56357514858246 - }, - "Speed": { - "Min": 29.2644856978122, - "Max": 48.69414303312388, - "Med": 43.39629490720476 - } - }, - "Translation": { - "Time to Answer": { - "Min": 15.65578042784481, - "Max": 361.4257761741054, - "Med": 94.50096548687068 - }, - "Latency": { - "Min": 18.458808422088623, - "Max": 368.57612133026123, - "Med": 104.97938454151154 - }, - "Speed": { - "Min": 29.43716166031538, - "Max": 53.272603690387285, - "Med": 43.679275761958166 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 16.31281149502611, - "Max": 455.0286171197091, - "Med": 98.93747010243024 - }, - "Latency": { - "Min": 22.590834856033325, - "Max": 506.03700613975525, - "Med": 158.81773710250854 - }, - "Speed": { - "Min": 36.85509319068589, - "Max": 61.32207413470597, - "Med": 44.533473375170736 - } - } - }, - "Claude 4.1 Opus (20250805) (think)": { - "NUM_GPUS": 0, - "Overall": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Editing": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Safety": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Translation": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - } - }, - "EXAONE 4.0 32B (think)": { - "NUM_GPUS": 4, - "Overall": { - "Time to Answer": { - "Min": 3.4144101727869094, - "Max": 513.2891264184284, - "Med": 40.64476558326666 - }, - "Latency": { - "Min": 20.99729013442993, - "Max": 934.4734632968903, - "Med": 52.11687910556793 - }, - "Speed": { - "Min": 16.61057134664868, - "Max": 162.10935159553736, - "Med": 51.19312170664125 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 7.633849993628102, - "Max": 231.7280806002598, - "Med": 36.54088380167833 - }, - "Latency": { - "Min": 21.040523767471313, - "Max": 232.21923518180847, - "Med": 50.24720823764801 - }, - "Speed": { - "Min": 20.787888946136608, - "Max": 114.20518584211445, - "Med": 50.1604639402628 - } - }, - "Editing": { - "Time to Answer": { - "Min": 12.500720415079948, - "Max": 173.007215905767, - "Med": 38.760838347107224 - }, - "Latency": { - "Min": 22.887081623077393, - "Max": 849.9458158016205, - "Med": 46.940423011779785 - }, - "Speed": { - "Min": 16.979373098700997, - "Max": 121.20728194275716, - "Med": 48.33737263239857 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 14.844229666154776, - "Max": 300.4666561994639, - "Med": 45.13543711943175 - }, - "Latency": { - "Min": 24.72298240661621, - "Max": 869.9708209037781, - "Med": 52.51446557044983 - }, - "Speed": { - "Min": 18.523824227289303, - "Max": 89.4858437813844, - "Med": 53.716948749406555 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 14.410717908401937, - "Max": 513.2891264184284, - "Med": 83.9800359480807 - }, - "Latency": { - "Min": 24.732884168624878, - "Max": 877.5212540626526, - "Med": 96.65035057067871 - }, - "Speed": { - "Min": 23.647823414611626, - "Max": 107.57304182314931, - "Med": 67.40433996129815 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 3.4144101727869094, - "Max": 151.71826684676424, - "Med": 32.2312330293491 - }, - "Latency": { - "Min": 22.7819344997406, - "Max": 906.8727037906647, - "Med": 47.90342080593109 - }, - "Speed": { - "Min": 19.75722080101205, - "Max": 106.04260407717635, - "Med": 54.645308284660544 - } - }, - "Safety": { - "Time to Answer": { - "Min": 10.566644570988077, - "Max": 133.074610241198, - "Med": 35.94165070145218 - }, - "Latency": { - "Min": 24.716280698776245, - "Max": 918.9995102882385, - "Med": 50.52194547653198 - }, - "Speed": { - "Min": 18.889271374572864, - "Max": 91.60838632371379, - "Med": 48.55592538932731 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 9.202736462568625, - "Max": 335.47354093049165, - "Med": 40.20272950007865 - }, - "Latency": { - "Min": 25.63792395591736, - "Max": 934.4734632968903, - "Med": 64.67785906791687 - }, - "Speed": { - "Min": 31.14245792960796, - "Max": 81.95230771829375, - "Med": 64.25079400285846 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 11.943953691297523, - "Max": 119.86676215301935, - "Med": 33.615661293560336 - }, - "Latency": { - "Min": 20.99729013442993, - "Max": 123.07580637931824, - "Med": 41.15629196166992 - }, - "Speed": { - "Min": 16.61057134664868, - "Max": 162.10935159553736, - "Med": 41.75138646630421 - } - }, - "Translation": { - "Time to Answer": { - "Min": 8.466631611159183, - "Max": 217.10657279066066, - "Med": 37.943330019898575 - }, - "Latency": { - "Min": 24.699921369552612, - "Max": 912.5899341106415, - "Med": 46.74183750152588 - }, - "Speed": { - "Min": 20.5247673666553, - "Max": 118.11393114160076, - "Med": 52.023265197182106 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 16.078199565715938, - "Max": 368.63741803588266, - "Med": 62.4157434530232 - }, - "Latency": { - "Min": 32.095335245132446, - "Max": 875.7617001533508, - "Med": 101.20691084861755 - }, - "Speed": { - "Min": 20.182050423772992, - "Max": 86.75993195561644, - "Med": 49.33242302190656 - } - } - }, - "DeepSeek V3.1 (think)": { - "NUM_GPUS": 16, - "Overall": { - "Time to Answer": { - "Min": 0, - "Max": 1514.3208455816791, - "Med": 14.323043732258654 - }, - "Latency": { - "Min": 0, - "Max": 1548.4317767620087, - "Med": 35.32915151119232 - }, - "Speed": { - "Min": -1.0, - "Max": 28.380223497796603, - "Med": 16.64962453842425 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 0, - "Max": 292.73746985682794, - "Med": 12.940513394842753 - }, - "Latency": { - "Min": 0, - "Max": 322.73060417175293, - "Med": 39.03947854042053 - }, - "Speed": { - "Min": -1.0, - "Max": 28.380223497796603, - "Med": 16.53439711366846 - } - }, - "Editing": { - "Time to Answer": { - "Min": 0, - "Max": 102.25363540295453, - "Med": 13.516447650094339 - }, - "Latency": { - "Min": 0, - "Max": 128.12800359725952, - "Med": 29.857558369636536 - }, - "Speed": { - "Min": -1.0, - "Max": 20.64577150160594, - "Med": 16.73059544342773 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 0, - "Max": 792.5189144205846, - "Med": 17.191278174137455 - }, - "Latency": { - "Min": 0, - "Max": 852.4884746074677, - "Med": 33.02548694610596 - }, - "Speed": { - "Min": -1.0, - "Max": 18.80186724003173, - "Med": 16.62865467531657 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 0, - "Max": 1514.3208455816791, - "Med": 35.65744647163726 - }, - "Latency": { - "Min": 0, - "Max": 1548.4317767620087, - "Med": 52.65497827529907 - }, - "Speed": { - "Min": -1.0, - "Max": 21.66945947088214, - "Med": 16.885690193138235 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 0, - "Max": 914.7124823739028, - "Med": 14.703980887673278 - }, - "Latency": { - "Min": 0, - "Max": 933.5806381702423, - "Med": 49.471826910972595 - }, - "Speed": { - "Min": -1.0, - "Max": 20.64133531206756, - "Med": 16.90358601838942 - } - }, - "Safety": { - "Time to Answer": { - "Min": 0, - "Max": 117.22981431321395, - "Med": 14.034935183285102 - }, - "Latency": { - "Min": 0, - "Max": 162.29183912277222, - "Med": 27.645153045654297 - }, - "Speed": { - "Min": -1.0, - "Max": 19.172742080313082, - "Med": 16.74046817378121 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 0, - "Max": 496.8597844394218, - "Med": 12.098461383382187 - }, - "Latency": { - "Min": 0, - "Max": 523.7148969173431, - "Med": 38.20433712005615 - }, - "Speed": { - "Min": -1.0, - "Max": 19.229234610965776, - "Med": 16.781320995773125 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 0, - "Max": 35.473093050782396, - "Med": 11.53080370438754 - }, - "Latency": { - "Min": 0, - "Max": 88.18386340141296, - "Med": 24.485626935958862 - }, - "Speed": { - "Min": -1.0, - "Max": 18.834387813117782, - "Med": 16.30809724018161 - } - }, - "Translation": { - "Time to Answer": { - "Min": 0, - "Max": 383.7323840006509, - "Med": 11.370327075525665 - }, - "Latency": { - "Min": 0, - "Max": 406.4747407436371, - "Med": 28.53658616542816 - }, - "Speed": { - "Min": -1.0, - "Max": 18.960980799351745, - "Med": 16.523481825406922 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 9.909065169295065, - "Max": 326.3710213392814, - "Med": 45.60986189453898 - }, - "Latency": { - "Min": 18.950550317764282, - "Max": 404.59281516075134, - "Med": 150.14288103580475 - }, - "Speed": { - "Min": 7.525416988646301, - "Max": 20.36390864889695, - "Med": 16.521596307021433 - } - } - }, - "Qwen3 30B A3B Thinking 2507": { - "NUM_GPUS": 4, - "Overall": { - "Time to Answer": { - "Min": 9.16166023526873, - "Max": 364.95654688521864, - "Med": 76.69636714346468 - }, - "Latency": { - "Min": 12.041510105133057, - "Max": 638.0842490196228, - "Med": 82.98819828033447 - }, - "Speed": { - "Min": 20.531162611679708, - "Max": 160.68073251847574, - "Med": 72.08537789542703 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 15.603441069680793, - "Max": 156.136086798576, - "Med": 74.2512004429204 - }, - "Latency": { - "Min": 25.29334020614624, - "Max": 185.96662783622742, - "Med": 81.27271902561188 - }, - "Speed": { - "Min": 23.55863710673906, - "Max": 119.56682470049108, - "Med": 70.5280253944976 - } - }, - "Editing": { - "Time to Answer": { - "Min": 18.931664539420087, - "Max": 140.52891474750427, - "Med": 72.34712931268933 - }, - "Latency": { - "Min": 25.34669280052185, - "Max": 149.4457712173462, - "Med": 76.52463150024414 - }, - "Speed": { - "Min": 29.141972461197753, - "Max": 153.14692409355675, - "Med": 71.85130189103711 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 17.90101413771371, - "Max": 240.63571648617832, - "Med": 63.29520852116787 - }, - "Latency": { - "Min": 25.32020354270935, - "Max": 247.22261834144592, - "Med": 66.36373686790466 - }, - "Speed": { - "Min": 21.62812172501451, - "Max": 112.01221458276083, - "Med": 70.29399612711825 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 22.21024972066539, - "Max": 259.5017886986011, - "Med": 70.73901353660817 - }, - "Latency": { - "Min": 25.334996461868286, - "Max": 271.74351620674133, - "Med": 76.10741257667542 - }, - "Speed": { - "Min": 20.531162611679708, - "Max": 120.30331096595025, - "Med": 75.42575217747617 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 19.701701633168454, - "Max": 100.74079518546846, - "Med": 60.796034373365224 - }, - "Latency": { - "Min": 25.253796339035034, - "Max": 114.25222158432007, - "Med": 72.2257593870163 - }, - "Speed": { - "Min": 23.8532082395033, - "Max": 103.3885698052657, - "Med": 72.67218795664121 - } - }, - "Safety": { - "Time to Answer": { - "Min": 9.16166023526873, - "Max": 111.38319833050322, - "Med": 60.724467924398965 - }, - "Latency": { - "Min": 12.043068885803223, - "Max": 638.0842490196228, - "Med": 67.80109643936157 - }, - "Speed": { - "Min": 21.86735467703005, - "Max": 132.72409234201703, - "Med": 70.4746933063947 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 37.93137662416011, - "Max": 364.95654688521864, - "Med": 88.71181365304685 - }, - "Latency": { - "Min": 44.675970792770386, - "Max": 540.3333976268768, - "Med": 92.67982578277588 - }, - "Speed": { - "Min": 38.430408956782586, - "Max": 143.8738963634062, - "Med": 78.14860060316678 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 10.028880362622221, - "Max": 151.20631558973804, - "Med": 69.38398601624172 - }, - "Latency": { - "Min": 12.041510105133057, - "Max": 154.90500736236572, - "Med": 72.39357209205627 - }, - "Speed": { - "Min": 24.578887801007905, - "Max": 144.1143078036554, - "Med": 71.0753593152036 - } - }, - "Translation": { - "Time to Answer": { - "Min": 22.066381338888483, - "Max": 193.3720690851547, - "Med": 92.6595917392112 - }, - "Latency": { - "Min": 25.218303203582764, - "Max": 218.31157541275024, - "Med": 97.52076303958893 - }, - "Speed": { - "Min": 23.268377999555767, - "Max": 160.68073251847574, - "Med": 75.40180261850259 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 43.39296992482472, - "Max": 217.28051232948138, - "Med": 125.749306148734 - }, - "Latency": { - "Min": 48.033796310424805, - "Max": 251.25360369682312, - "Med": 159.0379959344864 - }, - "Speed": { - "Min": 22.783500624220178, - "Max": 131.28729389175638, - "Med": 57.13612948239522 - } - } - }, - "o4-mini": { - "NUM_GPUS": 0, - "Overall": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Editing": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Safety": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Translation": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - } - }, - "Gemini 2.5 Flash": { - "NUM_GPUS": 0, - "Overall": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Editing": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Safety": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Translation": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - } - }, - "Claude 4 Sonnet (20250514) (think)": { - "NUM_GPUS": 0, - "Overall": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Editing": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Safety": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Translation": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - } - }, - "Gemini 3 Pro Preview (Thinking Level: High)": { - "NUM_GPUS": -1, - "Overall": { - "Time to Answer": { - "Min": [ - 0 - ], - "Max": [ - 0 - ], - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 169.1725790500641, - "Med": 27.89457416534424 - }, - "Speed": { - "Min": 0, - "Max": 0, - "Med": 0.0 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": [ - 0 - ], - "Max": [ - 0 - ], - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 168.15567064285278, - "Med": 30.950587153434753 - }, - "Speed": { - "Min": 0, - "Max": 0, - "Med": 0.0 - } - }, - "Editing": { - "Time to Answer": { - "Min": [ - 0 - ], - "Max": [ - 0 - ], - "Med": 0.0 - }, - "Latency": { - "Min": 5.864927530288696, - "Max": 109.41859698295593, - "Med": 23.469240069389343 - }, - "Speed": { - "Min": 0, - "Max": 0, - "Med": 0.0 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": [ - 0 - ], - "Max": [ - 0 - ], - "Med": 0.0 - }, - "Latency": { - "Min": 6.848255395889282, - "Max": 151.04712963104248, - "Med": 20.09416127204895 - }, - "Speed": { - "Min": 0, - "Max": 0, - "Med": 0.0 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": [ - 0 - ], - "Max": [ - 0 - ], - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 165.32855772972107, - "Med": 26.79689347743988 - }, - "Speed": { - "Min": 0, - "Max": 0, - "Med": 0.0 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": [ - 0 - ], - "Max": [ - 0 - ], - "Med": 0.0 - }, - "Latency": { - "Min": 9.33104419708252, - "Max": 90.23524713516235, - "Med": 27.72087299823761 - }, - "Speed": { - "Min": 0, - "Max": 0, - "Med": 0.0 - } - }, - "Safety": { - "Time to Answer": { - "Min": [ - 0 - ], - "Max": [ - 0 - ], - "Med": 0.0 - }, - "Latency": { - "Min": 13.609748363494873, - "Max": 98.26702857017517, - "Med": 28.671757698059082 - }, - "Speed": { - "Min": 0, - "Max": 0, - "Med": 0.0 - } - }, - "Repetition": { - "Time to Answer": { - "Min": [ - 0 - ], - "Max": [ - 0 - ], - "Med": 0.0 - }, - "Latency": { - "Min": 9.140820264816284, - "Max": 76.10930681228638, - "Med": 25.359631299972534 - }, - "Speed": { - "Min": 0, - "Max": 0, - "Med": 0.0 - } - }, - "Summarization": { - "Time to Answer": { - "Min": [ - 0 - ], - "Max": [ - 0 - ], - "Med": 0.0 - }, - "Latency": { - "Min": 8.343881130218506, - "Max": 52.00087642669678, - "Med": 18.741631627082825 - }, - "Speed": { - "Min": 0, - "Max": 0, - "Med": 0.0 - } - }, - "Translation": { - "Time to Answer": { - "Min": [ - 0 - ], - "Max": [ - 0 - ], - "Med": 0.0 - }, - "Latency": { - "Min": 12.577407121658325, - "Max": 103.21936011314392, - "Med": 30.767643094062805 - }, - "Speed": { - "Min": 0, - "Max": 0, - "Med": 0.0 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": [ - 0 - ], - "Max": [ - 0 - ], - "Med": 0.0 - }, - "Latency": { - "Min": 12.984463930130005, - "Max": 169.1725790500641, - "Med": 65.30046927928925 - }, - "Speed": { - "Min": 0, - "Max": 0, - "Med": 0.0 - } - } - }, - "Solar Pro Preview (top_p:0.95, temp: 0.7)": { - "NUM_GPUS": 1, - "Overall": { - "Time to Answer": { - "Min": 0, - "Max": 52.261695861816406, - "Med": 12.68759036064148 - }, - "Latency": { - "Min": 0, - "Max": 128.3711211681366, - "Med": 39.932666063308716 - }, - "Speed": { - "Min": -1.0, - "Max": 176.29849781105915, - "Med": 11.341528558845873 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 2.5073390007019043, - "Max": 46.18695425987244, - "Med": 12.061493039131165 - }, - "Latency": { - "Min": 3.4709603786468506, - "Max": 99.35639119148254, - "Med": 41.951385855674744 - }, - "Speed": { - "Min": 0.9562495773678651, - "Max": 175.0839893904906, - "Med": 12.811208487516122 - } - }, - "Editing": { - "Time to Answer": { - "Min": 2.500551462173462, - "Max": 45.94177055358887, - "Med": 12.504145979881287 - }, - "Latency": { - "Min": 6.036438941955566, - "Max": 91.7774338722229, - "Med": 38.36840772628784 - }, - "Speed": { - "Min": 0.5818634515678456, - "Max": 176.29849781105915, - "Med": 9.401264124292883 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 0, - "Max": 45.905545234680176, - "Med": 12.560058355331421 - }, - "Latency": { - "Min": 0, - "Max": 87.02808952331543, - "Med": 36.24109864234924 - }, - "Speed": { - "Min": -1.0, - "Max": 65.34941641982536, - "Med": 5.821421300518367 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 2.7311787605285645, - "Max": 46.377206563949585, - "Med": 12.860952854156494 - }, - "Latency": { - "Min": 3.253126859664917, - "Max": 92.76429653167725, - "Med": 38.43627142906189 - }, - "Speed": { - "Min": 0.12638988093619305, - "Max": 128.32896830253335, - "Med": 9.423681689970572 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 0, - "Max": 46.227262020111084, - "Med": 12.791918754577637 - }, - "Latency": { - "Min": 0, - "Max": 91.77620100975037, - "Med": 38.5276939868927 - }, - "Speed": { - "Min": -1.0, - "Max": 50.61121343936651, - "Med": 7.155769956372106 - } - }, - "Safety": { - "Time to Answer": { - "Min": 2.721907377243042, - "Max": 45.699371337890625, - "Med": 11.323314189910889 - }, - "Latency": { - "Min": 6.107160806655884, - "Max": 79.03122425079346, - "Med": 36.222095012664795 - }, - "Speed": { - "Min": 1.62681542631282, - "Max": 145.32870756042576, - "Med": 6.758604853709128 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 2.7228612899780273, - "Max": 45.76867389678955, - "Med": 12.789806723594666 - }, - "Latency": { - "Min": 13.044578075408936, - "Max": 128.3711211681366, - "Med": 41.10053992271423 - }, - "Speed": { - "Min": 2.151628748553778, - "Max": 54.045861238451074, - "Med": 10.074748947258634 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 0, - "Max": 45.63481116294861, - "Med": 12.297437906265259 - }, - "Latency": { - "Min": 0, - "Max": 78.26319169998169, - "Med": 40.80217635631561 - }, - "Speed": { - "Min": -1.0, - "Max": 65.44425836740314, - "Med": 7.998497924399052 - } - }, - "Translation": { - "Time to Answer": { - "Min": 0, - "Max": 51.38699960708618, - "Med": 13.076258778572083 - }, - "Latency": { - "Min": 0, - "Max": 96.5646870136261, - "Med": 43.765286564826965 - }, - "Speed": { - "Min": -1.0, - "Max": 82.86680764205305, - "Med": 12.541236811520616 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 1.711451530456543, - "Max": 52.261695861816406, - "Med": 17.96825420856476 - }, - "Latency": { - "Min": 9.450973272323608, - "Max": 101.22957396507263, - "Med": 48.650840520858765 - }, - "Speed": { - "Min": 13.167392213279486, - "Max": 69.88855936616866, - "Med": 31.507930803476533 - } - } - }, - "DeepSeek R1 (0528) (top_p: 0.95, temp:0.6)": { - "NUM_GPUS": 16, - "Overall": { - "Time to Answer": { - "Min": 2.764301555573673, - "Max": 1768.1222511705028, - "Med": 28.558620557700998 - }, - "Latency": { - "Min": 13.4767746925354, - "Max": 1806.1744508743286, - "Med": 70.60028326511383 - }, - "Speed": { - "Min": 16.435337537728117, - "Max": 26.925445952838658, - "Med": 17.625838630215213 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 4.884691836275731, - "Max": 760.0874121810232, - "Med": 26.83833759766359 - }, - "Latency": { - "Min": 17.949687719345093, - "Max": 777.3156294822693, - "Med": 73.53469789028168 - }, - "Speed": { - "Min": 16.435337537728117, - "Max": 26.925445952838658, - "Med": 17.639176342810917 - } - }, - "Editing": { - "Time to Answer": { - "Min": 2.764301555573673, - "Max": 606.8995444370772, - "Med": 27.459145599987394 - }, - "Latency": { - "Min": 14.386600732803345, - "Max": 660.2321028709412, - "Med": 60.644097089767456 - }, - "Speed": { - "Min": 16.795739100887705, - "Max": 21.712067546201926, - "Med": 17.678619206496855 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 10.881833032185853, - "Max": 912.8203162033251, - "Med": 43.32579209743402 - }, - "Latency": { - "Min": 14.52626633644104, - "Max": 999.2858347892761, - "Med": 69.08415055274963 - }, - "Speed": { - "Min": 16.700107800979566, - "Max": 18.918757519366398, - "Med": 17.673182556294357 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 9.944265248816524, - "Max": 1768.1222511705028, - "Med": 167.58135768087703 - }, - "Latency": { - "Min": 30.2700412273407, - "Max": 1806.1744508743286, - "Med": 205.80969643592834 - }, - "Speed": { - "Min": 17.255187337252412, - "Max": 23.52525689517377, - "Med": 17.696726172790516 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 6.184463609348644, - "Max": 710.169617076485, - "Med": 23.44426478357876 - }, - "Latency": { - "Min": 15.7332923412323, - "Max": 723.5113129615784, - "Med": 70.26296675205231 - }, - "Speed": { - "Min": 16.96543152555245, - "Max": 20.156420916929314, - "Med": 17.7217068438469 - } - }, - "Safety": { - "Time to Answer": { - "Min": 6.436277964285442, - "Max": 95.17965811625005, - "Med": 22.51649953017488 - }, - "Latency": { - "Min": 13.4767746925354, - "Max": 293.1685571670532, - "Med": 57.650511026382446 - }, - "Speed": { - "Min": 16.760181071674328, - "Max": 18.066919194308944, - "Med": 17.644351721000103 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 4.954117686327766, - "Max": 759.8583697054489, - "Med": 25.7581103899819 - }, - "Latency": { - "Min": 30.598492860794067, - "Max": 899.0715620517731, - "Med": 89.80700302124023 - }, - "Speed": { - "Min": 17.108408319010014, - "Max": 19.625805315884072, - "Med": 17.654616963594634 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 7.104722378640202, - "Max": 235.5764555132692, - "Med": 22.36237211647579 - }, - "Latency": { - "Min": 18.451950550079346, - "Max": 247.45313572883606, - "Med": 41.69261300563812 - }, - "Speed": { - "Min": 16.73555874639253, - "Max": 18.625725897831447, - "Med": 17.408287017072333 - } - }, - "Translation": { - "Time to Answer": { - "Min": 3.557063277129533, - "Max": 566.7627203144982, - "Med": 24.635573384405554 - }, - "Latency": { - "Min": 14.454368591308594, - "Max": 610.273509979248, - "Med": 58.34543836116791 - }, - "Speed": { - "Min": 16.781036551228826, - "Max": 20.12358736830915, - "Med": 17.57016447334408 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 20.240695120835664, - "Max": 913.0140343389244, - "Med": 75.17357507472337 - }, - "Latency": { - "Min": 32.679646730422974, - "Max": 985.9503352642059, - "Med": 199.6272430419922 - }, - "Speed": { - "Min": 16.474947268250585, - "Max": 21.87697389763361, - "Med": 17.5033191879073 - } - } - }, - "A.X 4.0": { - "NUM_GPUS": 8, - "Overall": { - "Time to Answer": { - "Min": 0, - "Max": 10.526954889297485, - "Med": 0.6553128957748413 - }, - "Latency": { - "Min": 0, - "Max": 1308.2780318260193, - "Med": 7.924791574478149 - }, - "Speed": { - "Min": 11.02534014678359, - "Max": 125.73984471025572, - "Med": 57.95526130360478 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 0.16420459747314453, - "Max": 2.7126715183258057, - "Med": 0.6686064004898071 - }, - "Latency": { - "Min": 0.5585525035858154, - "Max": 1106.499571800232, - "Med": 9.92771577835083 - }, - "Speed": { - "Min": 12.521039284376254, - "Max": 125.73984471025572, - "Med": 58.29241910041864 - } - }, - "Editing": { - "Time to Answer": { - "Min": 0.08281683921813965, - "Max": 2.8162050247192383, - "Med": 0.6682661771774292 - }, - "Latency": { - "Min": 0.6691253185272217, - "Max": 1090.3874669075012, - "Med": 4.9546427726745605 - }, - "Speed": { - "Min": 23.981836615138615, - "Max": 77.22708848730595, - "Med": 57.99892645871563 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 0.07539105415344238, - "Max": 2.8166723251342773, - "Med": 0.5909883975982666 - }, - "Latency": { - "Min": 0.45367908477783203, - "Max": 1133.0001804828644, - "Med": 5.2564778327941895 - }, - "Speed": { - "Min": 17.565116882899332, - "Max": 64.48262656118239, - "Med": 57.69638702455105 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 0.06842374801635742, - "Max": 1.7800352573394775, - "Med": 0.6370612382888794 - }, - "Latency": { - "Min": 0.38808178901672363, - "Max": 1143.0868384838104, - "Med": 8.91679060459137 - }, - "Speed": { - "Min": 11.02534014678359, - "Max": 94.302763225937, - "Med": 59.14737395732456 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 0.06906628608703613, - "Max": 2.123112201690674, - "Med": 0.6469535827636719 - }, - "Latency": { - "Min": 1.22810959815979, - "Max": 1061.943632364273, - "Med": 9.127522230148315 - }, - "Speed": { - "Min": 31.134162445710597, - "Max": 69.89706232565841, - "Med": 58.912189169814084 - } - }, - "Safety": { - "Time to Answer": { - "Min": 0.11001443862915039, - "Max": 2.2236733436584473, - "Med": 0.6548991203308105 - }, - "Latency": { - "Min": 0.6948869228363037, - "Max": 1146.4017734527588, - "Med": 8.849064350128174 - }, - "Speed": { - "Min": 21.322296378068902, - "Max": 64.05987249863603, - "Med": 58.170504908654124 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 0.14055109024047852, - "Max": 2.115959405899048, - "Med": 0.6609587669372559 - }, - "Latency": { - "Min": 1.7079215049743652, - "Max": 1051.8595185279846, - "Med": 7.366745352745056 - }, - "Speed": { - "Min": 39.525365322666715, - "Max": 68.83972775167882, - "Med": 58.07212212077468 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 0.06618690490722656, - "Max": 1.8745179176330566, - "Med": 0.610399603843689 - }, - "Latency": { - "Min": 0.8354628086090088, - "Max": 224.89702439308167, - "Med": 5.501296639442444 - }, - "Speed": { - "Min": 38.93359501835824, - "Max": 82.45687743171538, - "Med": 57.35378052075117 - } - }, - "Translation": { - "Time to Answer": { - "Min": 0.07382607460021973, - "Max": 6.137380361557007, - "Med": 0.5327284336090088 - }, - "Latency": { - "Min": 0.4180417060852051, - "Max": 116.0861234664917, - "Med": 6.411357164382935 - }, - "Speed": { - "Min": 48.0134203882505, - "Max": 66.00195171170768, - "Med": 58.10338367550699 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 0, - "Max": 10.526954889297485, - "Med": 1.9606951475143433 - }, - "Latency": { - "Min": 0, - "Max": 1308.2780318260193, - "Med": 28.2254718542099 - }, - "Speed": { - "Min": 18.30412983153189, - "Max": 92.931879716338, - "Med": 54.44005294446889 - } - } - }, - "DeepSeek V3.1 Terminus (think)": { - "NUM_GPUS": 16, - "Overall": { - "Time to Answer": { - "Min": 3.415008340563093, - "Max": 1545.5577580106956, - "Med": 17.055466594943752 - }, - "Latency": { - "Min": 7.143633842468262, - "Max": 3658.917438030243, - "Med": 47.552645206451416 - }, - "Speed": { - "Min": 14.880365173111262, - "Max": 26.82140614708103, - "Med": 17.890508425613742 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 6.386302808614877, - "Max": 550.3507635682957, - "Med": 15.336940233087702 - }, - "Latency": { - "Min": 7.143633842468262, - "Max": 671.2960011959076, - "Med": 51.84824335575104 - }, - "Speed": { - "Min": 16.537231123432836, - "Max": 23.86251292538409, - "Med": 17.96431779474341 - } - }, - "Editing": { - "Time to Answer": { - "Min": 6.485150018208463, - "Max": 401.7660860866688, - "Med": 15.089953586642348 - }, - "Latency": { - "Min": 7.158470869064331, - "Max": 409.95285391807556, - "Med": 38.49427795410156 - }, - "Speed": { - "Min": 14.880365173111262, - "Max": 18.63958504538977, - "Med": 17.983750440163814 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 5.312816272038576, - "Max": 804.9826353702472, - "Med": 22.785784674631532 - }, - "Latency": { - "Min": 7.302240371704102, - "Max": 842.7938885688782, - "Med": 41.25356364250183 - }, - "Speed": { - "Min": 15.86222512374095, - "Max": 22.730006888880855, - "Med": 17.89391027519262 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 7.8878182199032825, - "Max": 1545.5577580106956, - "Med": 57.998326347282315 - }, - "Latency": { - "Min": 15.681542158126831, - "Max": 3658.917438030243, - "Med": 77.8536776304245 - }, - "Speed": { - "Min": 16.507054642141266, - "Max": 26.82140614708103, - "Med": 18.062922832689473 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 7.510706953793408, - "Max": 578.5017807997543, - "Med": 16.021254616111158 - }, - "Latency": { - "Min": 12.842243909835815, - "Max": 597.9634590148926, - "Med": 64.10096645355225 - }, - "Speed": { - "Min": 16.559331266924172, - "Max": 18.758053541131343, - "Med": 18.115431787127967 - } - }, - "Safety": { - "Time to Answer": { - "Min": 7.833054889773213, - "Max": 99.65802871837266, - "Med": 15.631929102645582 - }, - "Latency": { - "Min": 11.54340410232544, - "Max": 216.2049753665924, - "Med": 37.590699672698975 - }, - "Speed": { - "Min": 16.6348213937014, - "Max": 18.972825829298692, - "Med": 17.941115349281368 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 6.6485912111353755, - "Max": 382.029754544358, - "Med": 14.313094206284964 - }, - "Latency": { - "Min": 16.13406538963318, - "Max": 3203.4312913417816, - "Med": 58.34576392173767 - }, - "Speed": { - "Min": 16.91267991284933, - "Max": 20.45223208175133, - "Med": 18.041494104195927 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 6.838876978295749, - "Max": 104.90984603633052, - "Med": 13.311735474109334 - }, - "Latency": { - "Min": 8.293706178665161, - "Max": 129.21354985237122, - "Med": 25.392707109451294 - }, - "Speed": { - "Min": 16.159825670667708, - "Max": 19.27061528907847, - "Med": 17.611228916123018 - } - }, - "Translation": { - "Time to Answer": { - "Min": 3.415008340563093, - "Max": 499.89064122746515, - "Med": 13.361526400106925 - }, - "Latency": { - "Min": 10.667519330978394, - "Max": 526.4252800941467, - "Med": 34.35068929195404 - }, - "Speed": { - "Min": 16.3019611465303, - "Max": 20.779157629032014, - "Med": 17.900308355974385 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 14.35590019735318, - "Max": 641.663802425182, - "Med": 43.34595441717043 - }, - "Latency": { - "Min": 17.120458126068115, - "Max": 853.4725525379181, - "Med": 155.66978204250336 - }, - "Speed": { - "Min": 15.33233580541384, - "Max": 22.23187227690289, - "Med": 17.67237883880672 - } - } - }, - "Dhanishtha-2.0 Preview": { - "NUM_GPUS": 1, - "Overall": { - "Time to Answer": { - "Min": 1.5839078426361084, - "Max": 31.054526805877686, - "Med": 4.368606805801392 - }, - "Latency": { - "Min": 9.942606449127197, - "Max": 131.58703541755676, - "Med": 35.15699875354767 - }, - "Speed": { - "Min": 3.7315392457146315, - "Max": 83.12757800749918, - "Med": 17.75738514863349 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 1.5839078426361084, - "Max": 14.249487161636353, - "Med": 4.174551248550415 - }, - "Latency": { - "Min": 15.61229681968689, - "Max": 68.42075729370117, - "Med": 35.556575536727905 - }, - "Speed": { - "Min": 3.7315392457146315, - "Max": 78.70559503273634, - "Med": 18.569924892372086 - } - }, - "Editing": { - "Time to Answer": { - "Min": 1.9209108352661133, - "Max": 14.220961093902588, - "Med": 4.214044094085693 - }, - "Latency": { - "Min": 15.878032684326172, - "Max": 51.72106313705444, - "Med": 34.23416888713837 - }, - "Speed": { - "Min": 4.349397272605004, - "Max": 53.4553157438125, - "Med": 15.59048741844877 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 2.0026886463165283, - "Max": 14.330097436904907, - "Med": 4.264540910720825 - }, - "Latency": { - "Min": 15.492263317108154, - "Max": 86.72672486305237, - "Med": 34.66960024833679 - }, - "Speed": { - "Min": 5.711889702488061, - "Max": 78.99341854325922, - "Med": 16.223466498569906 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 1.6599221229553223, - "Max": 14.43197774887085, - "Med": 3.9918036460876465 - }, - "Latency": { - "Min": 15.9655921459198, - "Max": 51.98690748214722, - "Med": 35.49437427520752 - }, - "Speed": { - "Min": 6.382185072861903, - "Max": 77.07097090018262, - "Med": 20.275446641877387 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 1.6478140354156494, - "Max": 12.624572038650513, - "Med": 4.188554286956787 - }, - "Latency": { - "Min": 14.814888954162598, - "Max": 84.91817879676819, - "Med": 34.31693959236145 - }, - "Speed": { - "Min": 5.576878248063783, - "Max": 79.0218001634417, - "Med": 16.994417851887242 - } - }, - "Safety": { - "Time to Answer": { - "Min": 2.003626585006714, - "Max": 12.981850862503052, - "Med": 4.0139992237091064 - }, - "Latency": { - "Min": 14.703532457351685, - "Max": 46.97670245170593, - "Med": 33.39446020126343 - }, - "Speed": { - "Min": 5.835292654577009, - "Max": 57.624133083858574, - "Med": 13.772987124238506 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 1.6010737419128418, - "Max": 12.8489511013031, - "Med": 3.9335577487945557 - }, - "Latency": { - "Min": 15.96399211883545, - "Max": 83.84577012062073, - "Med": 35.164939522743225 - }, - "Speed": { - "Min": 7.913820950042516, - "Max": 61.16802651575593, - "Med": 19.237509583858355 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 2.0594398975372314, - "Max": 12.99567985534668, - "Med": 4.562246680259705 - }, - "Latency": { - "Min": 9.942606449127197, - "Max": 49.9459753036499, - "Med": 33.85763645172119 - }, - "Speed": { - "Min": 5.511090930667453, - "Max": 74.90896346809664, - "Med": 14.041764327517052 - } - }, - "Translation": { - "Time to Answer": { - "Min": 1.6257987022399902, - "Max": 18.216102361679077, - "Med": 4.544387936592102 - }, - "Latency": { - "Min": 13.830479621887207, - "Max": 51.186935901641846, - "Med": 34.61378490924835 - }, - "Speed": { - "Min": 3.7880436790911665, - "Max": 83.12757800749918, - "Med": 15.479612589205376 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 2.355271100997925, - "Max": 31.054526805877686, - "Med": 14.455448269844055 - }, - "Latency": { - "Min": 24.809295892715454, - "Max": 131.58703541755676, - "Med": 61.22457039356232 - }, - "Speed": { - "Min": 24.85843420960008, - "Max": 78.04249041256476, - "Med": 43.742556439353024 - } - } - }, - "GPT-5 (Reasoning: medium)": { - "NUM_GPUS": 0, - "Overall": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Editing": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Safety": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Translation": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - } - }, - "Kanana 1.5 15.7B A3B Instruct": { - "NUM_GPUS": 4, - "Overall": { - "Time to Answer": { - "Min": 0.2950704097747803, - "Max": 19.190000534057617, - "Med": 2.999279260635376 - }, - "Latency": { - "Min": 1.070239782333374, - "Max": 203.20437669754028, - "Med": 14.037613034248352 - }, - "Speed": { - "Min": 0.6410989541092627, - "Max": 1909.5329304684637, - "Med": 39.50831768498445 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 0.490856409072876, - "Max": 15.175626277923584, - "Med": 3.3811464309692383 - }, - "Latency": { - "Min": 1.7633733749389648, - "Max": 49.41284513473511, - "Med": 14.430387616157532 - }, - "Speed": { - "Min": 5.5875015763942875, - "Max": 539.69077471023, - "Med": 42.75565553572446 - } - }, - "Editing": { - "Time to Answer": { - "Min": 0.38567614555358887, - "Max": 13.241939783096313, - "Med": 2.523622512817383 - }, - "Latency": { - "Min": 1.3466732501983643, - "Max": 32.80991268157959, - "Med": 11.42404818534851 - }, - "Speed": { - "Min": 5.234891182135263, - "Max": 507.04835589941973, - "Med": 31.254749960295268 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 0.6028447151184082, - "Max": 8.340348958969116, - "Med": 2.91544771194458 - }, - "Latency": { - "Min": 1.070239782333374, - "Max": 29.14335012435913, - "Med": 13.805753707885742 - }, - "Speed": { - "Min": 0.6410989541092627, - "Max": 141.1044409952281, - "Med": 40.787436278283884 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 0.3281252384185791, - "Max": 13.222564220428467, - "Med": 2.2433005571365356 - }, - "Latency": { - "Min": 1.2685565948486328, - "Max": 28.794756412506104, - "Med": 14.119416952133179 - }, - "Speed": { - "Min": 13.641077677607358, - "Max": 163.52627706116195, - "Med": 48.15016969012067 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 0.42233896255493164, - "Max": 11.167439222335815, - "Med": 2.5416932106018066 - }, - "Latency": { - "Min": 4.380939722061157, - "Max": 203.20437669754028, - "Med": 14.08097231388092 - }, - "Speed": { - "Min": 10.586995966039487, - "Max": 175.01734588401268, - "Med": 41.519872568736545 - } - }, - "Safety": { - "Time to Answer": { - "Min": 0.574749231338501, - "Max": 6.5914833545684814, - "Med": 2.0713586807250977 - }, - "Latency": { - "Min": 2.4983015060424805, - "Max": 18.455265283584595, - "Med": 13.543225526809692 - }, - "Speed": { - "Min": 5.826858917772635, - "Max": 140.0235890228829, - "Med": 40.47326435464191 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 0.4560980796813965, - "Max": 6.587333917617798, - "Med": 2.0242170095443726 - }, - "Latency": { - "Min": 5.732140779495239, - "Max": 17.347358226776123, - "Med": 12.338063716888428 - }, - "Speed": { - "Min": 14.0354186260475, - "Max": 150.95745017437025, - "Med": 33.40031712244046 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 0.2950704097747803, - "Max": 14.430352687835693, - "Med": 2.8797518014907837 - }, - "Latency": { - "Min": 3.3674020767211914, - "Max": 41.07946181297302, - "Med": 12.902186274528503 - }, - "Speed": { - "Min": 7.964564870312544, - "Max": 1909.5329304684637, - "Med": 33.549174860572876 - } - }, - "Translation": { - "Time to Answer": { - "Min": 0.42589521408081055, - "Max": 14.840283393859863, - "Med": 2.7693560123443604 - }, - "Latency": { - "Min": 2.22856068611145, - "Max": 33.374823331832886, - "Med": 12.456492066383362 - }, - "Speed": { - "Min": 11.784218581941628, - "Max": 261.568104315782, - "Med": 35.834903894877215 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 1.7325413227081299, - "Max": 19.190000534057617, - "Med": 5.854122042655945 - }, - "Latency": { - "Min": 4.46362829208374, - "Max": 178.65205764770508, - "Med": 29.114682912826538 - }, - "Speed": { - "Min": 10.455827672869331, - "Max": 1199.7649674591546, - "Med": 50.27327767302565 - } - } - }, - "DeepSeek V3 (0324) (top_p: 0.95, temp:1.3)": { - "NUM_GPUS": 16, - "Overall": { - "Time to Answer": { - "Min": 0.10785865783691406, - "Max": 8.64589548110962, - "Med": 0.21145284175872803 - }, - "Latency": { - "Min": 0.2305757999420166, - "Max": 403.65662026405334, - "Med": 23.471113204956055 - }, - "Speed": { - "Min": 9.426897650001193, - "Max": 76.5957011632791, - "Med": 17.62487523518351 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 0.10920858383178711, - "Max": 6.918291091918945, - "Med": 0.16250300407409668 - }, - "Latency": { - "Min": 0.4426717758178711, - "Max": 150.40352630615234, - "Med": 25.61068093776703 - }, - "Speed": { - "Min": 11.342064076866532, - "Max": 23.99137142655132, - "Med": 17.626346629213206 - } - }, - "Editing": { - "Time to Answer": { - "Min": 0.10785865783691406, - "Max": 8.645653486251831, - "Med": 0.17302322387695312 - }, - "Latency": { - "Min": 0.5186541080474854, - "Max": 65.11374545097351, - "Med": 18.160674571990967 - }, - "Speed": { - "Min": 15.364075059205426, - "Max": 19.618921485231414, - "Med": 17.688557316343122 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 0.1098175048828125, - "Max": 8.64589548110962, - "Med": 0.21381735801696777 - }, - "Latency": { - "Min": 0.2305757999420166, - "Max": 197.29790377616882, - "Med": 18.819954872131348 - }, - "Speed": { - "Min": 14.758350807114091, - "Max": 76.5957011632791, - "Med": 17.54876951895393 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 0.1092844009399414, - "Max": 1.1797599792480469, - "Med": 0.16037893295288086 - }, - "Latency": { - "Min": 0.8072242736816406, - "Max": 354.78934025764465, - "Med": 27.786561489105225 - }, - "Speed": { - "Min": 16.84148262159971, - "Max": 21.160641475350214, - "Med": 17.784017805373594 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 0.11016988754272461, - "Max": 1.633366346359253, - "Med": 0.12740850448608398 - }, - "Latency": { - "Min": 4.553909540176392, - "Max": 111.47468686103821, - "Med": 26.221610069274902 - }, - "Speed": { - "Min": 16.05812025233548, - "Max": 18.864871338982017, - "Med": 17.892269666949126 - } - }, - "Safety": { - "Time to Answer": { - "Min": 0.1089620590209961, - "Max": 0.22528696060180664, - "Med": 0.1165308952331543 - }, - "Latency": { - "Min": 1.2472314834594727, - "Max": 207.18200206756592, - "Med": 22.932410717010498 - }, - "Speed": { - "Min": 15.393203592277645, - "Max": 18.897589008005845, - "Med": 17.645136017022427 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 0.10803794860839844, - "Max": 0.3633410930633545, - "Med": 0.12484323978424072 - }, - "Latency": { - "Min": 5.136878252029419, - "Max": 105.00382542610168, - "Med": 20.594692945480347 - }, - "Speed": { - "Min": 15.908643280336022, - "Max": 19.299295362008348, - "Med": 17.80418140664076 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 0.1102299690246582, - "Max": 0.7326507568359375, - "Med": 0.22149121761322021 - }, - "Latency": { - "Min": 1.9279124736785889, - "Max": 55.85229849815369, - "Med": 13.784390568733215 - }, - "Speed": { - "Min": 13.446999915153956, - "Max": 18.962539066022238, - "Med": 17.26362941552464 - } - }, - "Translation": { - "Time to Answer": { - "Min": 0.10993146896362305, - "Max": 0.47490572929382324, - "Med": 0.21384954452514648 - }, - "Latency": { - "Min": 0.7841920852661133, - "Max": 124.22478747367859, - "Med": 20.944645285606384 - }, - "Speed": { - "Min": 14.957888073607162, - "Max": 23.135152306098593, - "Med": 17.718757346734954 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 0.2285633087158203, - "Max": 7.203721284866333, - "Med": 0.6101471185684204 - }, - "Latency": { - "Min": 0.7318305969238281, - "Max": 403.65662026405334, - "Med": 81.63708293437958 - }, - "Speed": { - "Min": 9.426897650001193, - "Max": 21.987528001573125, - "Med": 17.31493440431644 - } - } - }, - "GLM-4.6 FP8": { - "NUM_GPUS": 8, - "Overall": { - "Time to Answer": { - "Min": 2.7801706412481884, - "Max": 905.4030683168569, - "Med": 81.414294828216 - }, - "Latency": { - "Min": 5.523756742477417, - "Max": 980.8252513408661, - "Med": 110.0251989364624 - }, - "Speed": { - "Min": 20.652261657167596, - "Max": 36.354950611136516, - "Med": 24.034975709814915 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 29.890998142677663, - "Max": 320.5243282603581, - "Med": 76.96281691191001 - }, - "Latency": { - "Min": 30.132781982421875, - "Max": 351.22037172317505, - "Med": 107.16252553462982 - }, - "Speed": { - "Min": 20.652261657167596, - "Max": 35.4397064742687, - "Med": 24.039399357118008 - } - }, - "Editing": { - "Time to Answer": { - "Min": 35.89468906466801, - "Max": 196.146778273612, - "Med": 77.49828573974564 - }, - "Latency": { - "Min": 41.15352416038513, - "Max": 202.58619260787964, - "Med": 98.65248310565948 - }, - "Speed": { - "Min": 21.295797397952708, - "Max": 33.831551273128895, - "Med": 24.082747556876612 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 19.664140036953057, - "Max": 337.85498350948535, - "Med": 63.25398513389442 - }, - "Latency": { - "Min": 20.856733560562134, - "Max": 378.37875533103943, - "Med": 72.9943277835846 - }, - "Speed": { - "Min": 20.920471319397066, - "Max": 25.168615131246383, - "Med": 24.003023739331354 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 11.92279735786102, - "Max": 905.4030683168569, - "Med": 72.8766796650493 - }, - "Latency": { - "Min": 21.14651870727539, - "Max": 980.8252513408661, - "Med": 107.8480840921402 - }, - "Speed": { - "Min": 21.215214398369586, - "Max": 36.31697692204238, - "Med": 24.003531614763205 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 30.728043304422876, - "Max": 418.7279145737321, - "Med": 65.38079940075421 - }, - "Latency": { - "Min": 35.595698595047, - "Max": 427.6212487220764, - "Med": 100.73350870609283 - }, - "Speed": { - "Min": 21.166907180284447, - "Max": 36.354950611136516, - "Med": 24.089094372942107 - } - }, - "Safety": { - "Time to Answer": { - "Min": 6.689368647883138, - "Max": 114.24630745221162, - "Med": 68.01278338339722 - }, - "Latency": { - "Min": 10.623645067214966, - "Max": 304.0291910171509, - "Med": 99.59718537330627 - }, - "Speed": { - "Min": 21.261282608777865, - "Max": 26.579140843066543, - "Med": 23.996135940138497 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 41.73994091380337, - "Max": 398.56112897990215, - "Med": 83.64501283479864 - }, - "Latency": { - "Min": 47.23124670982361, - "Max": 414.76913619041443, - "Med": 108.74924111366272 - }, - "Speed": { - "Min": 21.687954122536873, - "Max": 27.464165243137447, - "Med": 23.985507727750566 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 37.70941982508703, - "Max": 267.0972462650341, - "Med": 73.78317064204931 - }, - "Latency": { - "Min": 39.781272888183594, - "Max": 276.51572942733765, - "Med": 82.75416028499603 - }, - "Speed": { - "Min": 21.199364619742536, - "Max": 28.6440687779337, - "Med": 24.108296001406465 - } - }, - "Translation": { - "Time to Answer": { - "Min": 2.7801706412481884, - "Max": 335.1839381075824, - "Med": 120.5664046766091 - }, - "Latency": { - "Min": 5.523756742477417, - "Max": 441.54190707206726, - "Med": 142.0722097158432 - }, - "Speed": { - "Min": 22.85048852310985, - "Max": 34.848891493893916, - "Med": 24.116170361432413 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 67.31233654758816, - "Max": 394.56204520280323, - "Med": 177.67190055075207 - }, - "Latency": { - "Min": 70.5745906829834, - "Max": 576.4176054000854, - "Med": 284.6316658258438 - }, - "Speed": { - "Min": 21.172719034413866, - "Max": 35.1579950481419, - "Med": 23.57853134165275 - } - } - }, - "Claude 4.5 Opus (think)": { - "NUM_GPUS": 0, - "Overall": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": -1.0, - "Max": -1.0, - "Med": -1.0 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": -1.0, - "Max": -1.0, - "Med": -1.0 - } - }, - "Editing": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": -1.0, - "Max": -1.0, - "Med": -1.0 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": -1.0, - "Max": -1.0, - "Med": -1.0 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": -1.0, - "Max": -1.0, - "Med": -1.0 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": -1.0, - "Max": -1.0, - "Med": -1.0 - } - }, - "Safety": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": -1.0, - "Max": -1.0, - "Med": -1.0 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": -1.0, - "Max": -1.0, - "Med": -1.0 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": -1.0, - "Max": -1.0, - "Med": -1.0 - } - }, - "Translation": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": -1.0, - "Max": -1.0, - "Med": -1.0 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": -1.0, - "Max": -1.0, - "Med": -1.0 - } - } - }, - "GLM-4.5 FP8 (think)": { - "NUM_GPUS": 8, - "Overall": { - "Time to Answer": { - "Min": 0.11270952224731445, - "Max": 1084.7877391024863, - "Med": 25.261904125875603 - }, - "Latency": { - "Min": 2.203545331954956, - "Max": 2499.599281311035, - "Med": 62.74959444999695 - }, - "Speed": { - "Min": 19.420678190531984, - "Max": 38.97772164575481, - "Med": 23.293980879127712 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 3.702089722433614, - "Max": 278.2958468033817, - "Med": 21.031848154986903 - }, - "Latency": { - "Min": 14.542505025863647, - "Max": 357.45922803878784, - "Med": 52.71355986595154 - }, - "Speed": { - "Min": 19.45536289987673, - "Max": 27.213499913336133, - "Med": 23.29757774645036 - } - }, - "Editing": { - "Time to Answer": { - "Min": 0.19978117942810059, - "Max": 394.63220830460335, - "Med": 20.454006360666682 - }, - "Latency": { - "Min": 14.876377820968628, - "Max": 2066.514055490494, - "Med": 52.174468755722046 - }, - "Speed": { - "Min": 19.420678190531984, - "Max": 31.54312042960219, - "Med": 23.364851518772213 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 9.215853875000729, - "Max": 688.713021743005, - "Med": 38.58885724584223 - }, - "Latency": { - "Min": 12.500055313110352, - "Max": 752.6626207828522, - "Med": 61.195499897003174 - }, - "Speed": { - "Min": 19.990698401225018, - "Max": 28.047258818413045, - "Med": 23.297073068577525 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 9.862705428512008, - "Max": 1084.7877391024863, - "Med": 109.94514346540936 - }, - "Latency": { - "Min": 24.787205457687378, - "Max": 1097.3383736610413, - "Med": 140.0307160615921 - }, - "Speed": { - "Min": 20.437700069364222, - "Max": 31.250693111753982, - "Med": 23.35565404917262 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 0.16067725088861254, - "Max": 601.690997086051, - "Med": 18.579426623188965 - }, - "Latency": { - "Min": 2.203545331954956, - "Max": 612.0083646774292, - "Med": 62.917097330093384 - }, - "Speed": { - "Min": 20.651703882224417, - "Max": 27.452833898163252, - "Med": 23.430786062799363 - } - }, - "Safety": { - "Time to Answer": { - "Min": 0.15618736586294885, - "Max": 133.02884544318744, - "Med": 17.7997559837673 - }, - "Latency": { - "Min": 7.498217344284058, - "Max": 195.27267146110535, - "Med": 60.937514543533325 - }, - "Speed": { - "Min": 20.21535654077273, - "Max": 26.828424510310285, - "Med": 23.35511595078081 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 0.11270952224731445, - "Max": 374.49009441974084, - "Med": 20.14366245721619 - }, - "Latency": { - "Min": 13.710553884506226, - "Max": 2499.599281311035, - "Med": 72.9760570526123 - }, - "Speed": { - "Min": 20.559959869060805, - "Max": 38.97772164575481, - "Med": 23.404308920757103 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 5.713983329566749, - "Max": 129.1120622830403, - "Med": 16.900230228371974 - }, - "Latency": { - "Min": 8.128101587295532, - "Max": 207.84777903556824, - "Med": 29.653738737106323 - }, - "Speed": { - "Min": 20.391392131811244, - "Max": 27.744718175546115, - "Med": 23.15602205338773 - } - }, - "Translation": { - "Time to Answer": { - "Min": 3.058561062776069, - "Max": 485.3224275992627, - "Med": 34.79738415302708 - }, - "Latency": { - "Min": 6.466905355453491, - "Max": 553.075273513794, - "Med": 62.73571598529816 - }, - "Speed": { - "Min": 21.90260361916803, - "Max": 32.51747932436603, - "Med": 23.33685855465572 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 22.30403824284371, - "Max": 587.8023806741965, - "Med": 61.16007972987599 - }, - "Latency": { - "Min": 36.948426723480225, - "Max": 667.0348196029663, - "Med": 176.54958140850067 - }, - "Speed": { - "Min": 19.95701235109698, - "Max": 34.41947095445466, - "Med": 23.028289777243636 - } - } - }, - "Gauss2.3 Hybrid": { - "NUM_GPUS": 4, - "Overall": { - "Time to Answer": { - "Min": 0.07430458068847656, - "Max": 404.43652454478104, - "Med": 6.750162363052368 - }, - "Latency": { - "Min": 1.1905558109283447, - "Max": 442.95297265052795, - "Med": 17.980867981910706 - }, - "Speed": { - "Min": 6.22494458920455, - "Max": 178.05142867740236, - "Med": 42.58336125102582 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 0.33391857147216797, - "Max": 119.58586805627984, - "Med": 2.413415551185608 - }, - "Latency": { - "Min": 1.1905558109283447, - "Max": 124.55183362960815, - "Med": 12.04367458820343 - }, - "Speed": { - "Min": 8.539283660183946, - "Max": 148.61945633894183, - "Med": 45.59633315615385 - } - }, - "Editing": { - "Time to Answer": { - "Min": 0.1668376922607422, - "Max": 68.57276079468438, - "Med": 2.8465179204940796 - }, - "Latency": { - "Min": 1.715730905532837, - "Max": 68.93872094154358, - "Med": 9.637513637542725 - }, - "Speed": { - "Min": 6.22494458920455, - "Max": 104.9981573500631, - "Med": 32.18832571491936 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 0.33438849449157715, - "Max": 183.91956615075247, - "Med": 26.36451454345997 - }, - "Latency": { - "Min": 4.558962106704712, - "Max": 189.24558639526367, - "Med": 30.90804362297058 - }, - "Speed": { - "Min": 9.283606195364527, - "Max": 127.68257997438197, - "Med": 37.69453314606262 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 13.466628007751575, - "Max": 404.43652454478104, - "Med": 37.81252260218433 - }, - "Latency": { - "Min": 20.26698136329651, - "Max": 442.95297265052795, - "Med": 44.70000433921814 - }, - "Speed": { - "Min": 17.752318582810926, - "Max": 174.13943080903434, - "Med": 62.22939089962192 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 0.07430458068847656, - "Max": 110.69626592765815, - "Med": 22.999072928542155 - }, - "Latency": { - "Min": 2.747601270675659, - "Max": 410.4162223339081, - "Med": 34.972920656204224 - }, - "Speed": { - "Min": 12.459183500486317, - "Max": 107.39356542249726, - "Med": 45.32455341067882 - } - }, - "Safety": { - "Time to Answer": { - "Min": 0.23634743690490723, - "Max": 60.92045502601678, - "Med": 13.743300940255699 - }, - "Latency": { - "Min": 1.4686648845672607, - "Max": 439.31902408599854, - "Med": 23.66022753715515 - }, - "Speed": { - "Min": 11.705696315452787, - "Max": 162.84358621002892, - "Med": 35.70773160367466 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 0.5989758968353271, - "Max": 140.0028244791762, - "Med": 7.549682021141052 - }, - "Latency": { - "Min": 5.534611463546753, - "Max": 406.3446002006531, - "Med": 24.169201731681824 - }, - "Speed": { - "Min": 13.567085589811283, - "Max": 107.04087463063387, - "Med": 56.17958055142098 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 1.4356021881103516, - "Max": 42.68623673570329, - "Med": 4.869752407073975 - }, - "Latency": { - "Min": 3.396906852722168, - "Max": 50.57925343513489, - "Med": 12.588263869285583 - }, - "Speed": { - "Min": 8.560161183194618, - "Max": 178.05142867740236, - "Med": 34.589997435220816 - } - }, - "Translation": { - "Time to Answer": { - "Min": 1.2858543395996094, - "Max": 93.25060401021577, - "Med": 3.3288110494613647 - }, - "Latency": { - "Min": 3.8765246868133545, - "Max": 95.34082579612732, - "Med": 11.053936958312988 - }, - "Speed": { - "Min": 8.171821256105877, - "Max": 97.29960107904722, - "Med": 40.55621157324127 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 1.6803455352783203, - "Max": 159.63909476885257, - "Med": 48.116984559203765 - }, - "Latency": { - "Min": 4.690212249755859, - "Max": 360.991272687912, - "Med": 73.32911562919617 - }, - "Speed": { - "Min": 12.399445429162894, - "Max": 101.21891838399478, - "Med": 50.829541415044645 - } - } - }, - "MiniMax-M2 (230B A10B)": { - "NUM_GPUS": -1, - "Overall": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": -1.0, - "Max": -1.0, - "Med": -1.0 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": -1.0, - "Max": -1.0, - "Med": -1.0 - } - }, - "Editing": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": -1.0, - "Max": -1.0, - "Med": -1.0 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": -1.0, - "Max": -1.0, - "Med": -1.0 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": -1.0, - "Max": -1.0, - "Med": -1.0 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": -1.0, - "Max": -1.0, - "Med": -1.0 - } - }, - "Safety": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": -1.0, - "Max": -1.0, - "Med": -1.0 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": -1.0, - "Max": -1.0, - "Med": -1.0 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": -1.0, - "Max": -1.0, - "Med": -1.0 - } - }, - "Translation": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": -1.0, - "Max": -1.0, - "Med": -1.0 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": -1.0, - "Max": -1.0, - "Med": -1.0 - } - } - }, - "gpt-oss-120B (Reasoning: medium)": { - "NUM_GPUS": 8, - "Overall": { - "Time to Answer": { - "Min": 0, - "Max": 101.66583281847353, - "Med": 7.694922740481965 - }, - "Latency": { - "Min": 0, - "Max": 108.71509218215942, - "Med": 12.121336698532104 - }, - "Speed": { - "Min": -1.0, - "Max": 295.4744570001622, - "Med": 103.31935460342275 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 1.4268165264489516, - "Max": 76.09343232158227, - "Med": 5.27987206336147 - }, - "Latency": { - "Min": 2.564422369003296, - "Max": 77.78296256065369, - "Med": 12.131241917610168 - }, - "Speed": { - "Min": 39.854399049254106, - "Max": 164.11560898062044, - "Med": 102.26319280893972 - } - }, - "Editing": { - "Time to Answer": { - "Min": 0, - "Max": 34.97314937730854, - "Med": 7.72154927398273 - }, - "Latency": { - "Min": 0, - "Max": 38.421292781829834, - "Med": 10.624043703079224 - }, - "Speed": { - "Min": -1.0, - "Max": 295.4744570001622, - "Med": 97.16836666526689 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 1.5500787364112005, - "Max": 47.44580010794223, - "Med": 7.938084126425333 - }, - "Latency": { - "Min": 2.21130108833313, - "Max": 48.52851939201355, - "Med": 10.561246871948242 - }, - "Speed": { - "Min": 33.67196833472543, - "Max": 218.4742163852268, - "Med": 111.7668417486227 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 1.313997881221957, - "Max": 71.11774356968237, - "Med": 9.761283050834567 - }, - "Latency": { - "Min": 2.4778506755828857, - "Max": 77.51551747322083, - "Med": 14.6117924451828 - }, - "Speed": { - "Min": 52.34248336770566, - "Max": 162.3515087876845, - "Med": 121.07198234072999 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 2.2739883409985806, - "Max": 25.163233752980585, - "Med": 7.324965414201975 - }, - "Latency": { - "Min": 3.2160396575927734, - "Max": 41.42578959465027, - "Med": 13.071247458457947 - }, - "Speed": { - "Min": 36.912669174553955, - "Max": 207.27878278068064, - "Med": 124.86484051787805 - } - }, - "Safety": { - "Time to Answer": { - "Min": 0.9411572388240269, - "Max": 29.16168449305106, - "Med": 6.375555159399182 - }, - "Latency": { - "Min": 1.034454584121704, - "Max": 29.336507081985474, - "Med": 7.1419289112091064 - }, - "Speed": { - "Min": 17.16222430736172, - "Max": 196.00731233624074, - "Med": 78.16903714344288 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 1.2979496504257548, - "Max": 101.66583281847353, - "Med": 8.344213386152383 - }, - "Latency": { - "Min": 3.055628538131714, - "Max": 107.58205056190491, - "Med": 13.63849675655365 - }, - "Speed": { - "Min": 38.28047898391144, - "Max": 179.7345615456471, - "Med": 124.36280725656007 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 0.5831270155427758, - "Max": 77.53254542272322, - "Med": 5.93259947539976 - }, - "Latency": { - "Min": 2.371164083480835, - "Max": 78.0918653011322, - "Med": 9.356532216072083 - }, - "Speed": { - "Min": 45.693706343648756, - "Max": 216.1960526719776, - "Med": 84.3925290745797 - } - }, - "Translation": { - "Time to Answer": { - "Min": 1.0900659526852396, - "Max": 33.898595255753975, - "Med": 7.079862544752899 - }, - "Latency": { - "Min": 2.618936777114868, - "Max": 46.68135929107666, - "Med": 10.769065737724304 - }, - "Speed": { - "Min": 31.37753288836188, - "Max": 208.64392138296813, - "Med": 101.55799933773132 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 2.6155751339606996, - "Max": 53.633529920125504, - "Med": 15.617037601160938 - }, - "Latency": { - "Min": 6.383960247039795, - "Max": 108.71509218215942, - "Med": 37.77703034877777 - }, - "Speed": { - "Min": 29.365181318212365, - "Max": 170.3217087668346, - "Med": 105.44867644748324 - } - } - }, - "K2-Think": { - "NUM_GPUS": 4, - "Overall": { - "Time to Answer": { - "Min": 0.39023494720458984, - "Max": 107.81846922492139, - "Med": 24.296926397169038 - }, - "Latency": { - "Min": 3.209083318710327, - "Max": 133.52706599235535, - "Med": 43.299455642700195 - }, - "Speed": { - "Min": 14.2702795912481, - "Max": 79.4706528361737, - "Med": 42.72123101353567 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 0.44143056869506836, - "Max": 73.18523997840788, - "Med": 24.582850728465154 - }, - "Latency": { - "Min": 9.866496324539185, - "Max": 80.28995537757874, - "Med": 43.84091126918793 - }, - "Speed": { - "Min": 17.936151898557416, - "Max": 71.47744263787135, - "Med": 45.60329345803582 - } - }, - "Editing": { - "Time to Answer": { - "Min": 0.4004693031311035, - "Max": 65.30304066862526, - "Med": 23.98450198337123 - }, - "Latency": { - "Min": 3.209083318710327, - "Max": 79.1864972114563, - "Med": 42.39327788352966 - }, - "Speed": { - "Min": 15.532097928971599, - "Max": 72.94322441998251, - "Med": 38.39627650122442 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 0.4039599895477295, - "Max": 58.61894807022766, - "Med": 21.03662378743316 - }, - "Latency": { - "Min": 10.735275983810425, - "Max": 65.61320924758911, - "Med": 42.17452073097229 - }, - "Speed": { - "Min": 17.142349804946267, - "Max": 71.73705521705818, - "Med": 36.94472519816862 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 0.44495224952697754, - "Max": 58.786338485296355, - "Med": 17.792553327433026 - }, - "Latency": { - "Min": 15.264659881591797, - "Max": 64.66554498672485, - "Med": 41.75425410270691 - }, - "Speed": { - "Min": 17.027642949195506, - "Max": 73.09506796506054, - "Med": 44.956746493572254 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 0.44803428649902344, - "Max": 65.09010206188158, - "Med": 25.54643277689422 - }, - "Latency": { - "Min": 7.509713172912598, - "Max": 71.54717373847961, - "Med": 41.966774582862854 - }, - "Speed": { - "Min": 15.824725477707522, - "Max": 73.3195073653948, - "Med": 44.77705206856967 - } - }, - "Safety": { - "Time to Answer": { - "Min": 0.5748910903930664, - "Max": 56.63274266022045, - "Med": 15.733287176857257 - }, - "Latency": { - "Min": 7.720470666885376, - "Max": 64.05832099914551, - "Med": 41.12168335914612 - }, - "Speed": { - "Min": 15.283124913301318, - "Max": 72.2793442259468, - "Med": 39.460751271000966 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 0.44273805618286133, - "Max": 62.37181219833113, - "Med": 1.8220877647399902 - }, - "Latency": { - "Min": 16.803712606430054, - "Max": 64.06053471565247, - "Med": 43.311216950416565 - }, - "Speed": { - "Min": 26.246213684599176, - "Max": 73.9054764779668, - "Med": 47.713578787928355 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 0.5464873313903809, - "Max": 64.41515535422235, - "Med": 21.90518209568203 - }, - "Latency": { - "Min": 9.075395345687866, - "Max": 69.79841995239258, - "Med": 37.46022117137909 - }, - "Speed": { - "Min": 16.478342004537748, - "Max": 73.6811676907248, - "Med": 32.566976716039974 - } - }, - "Translation": { - "Time to Answer": { - "Min": 0.39023494720458984, - "Max": 67.1600794855101, - "Med": 28.382482812355192 - }, - "Latency": { - "Min": 5.557899475097656, - "Max": 74.01309156417847, - "Med": 43.5409197807312 - }, - "Speed": { - "Min": 16.349977905241172, - "Max": 73.99961831441902, - "Med": 45.62670982618776 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 2.050525665283203, - "Max": 107.81846922492139, - "Med": 43.177629332715924 - }, - "Latency": { - "Min": 11.843125581741333, - "Max": 133.52706599235535, - "Med": 83.44266498088837 - }, - "Speed": { - "Min": 14.2702795912481, - "Max": 79.4706528361737, - "Med": 44.37472457880071 - } - } - }, - "Qwen3 32B (think)": { - "NUM_GPUS": 4, - "Overall": { - "Time to Answer": { - "Min": 0.54599928855896, - "Max": 253.51797756280675, - "Med": 27.26490248867746 - }, - "Latency": { - "Min": 8.805663108825684, - "Max": 473.0266854763031, - "Med": 39.635579228401184 - }, - "Speed": { - "Min": 15.712798254325218, - "Max": 127.8660606777894, - "Med": 37.74973909656839 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 8.375178462794038, - "Max": 70.13045678190097, - "Med": 26.449072357707664 - }, - "Latency": { - "Min": 8.805663108825684, - "Max": 426.83560967445374, - "Med": 40.536200761795044 - }, - "Speed": { - "Min": 17.14418071749721, - "Max": 78.08270978342156, - "Med": 36.93018587269333 - } - }, - "Editing": { - "Time to Answer": { - "Min": 5.969575643539429, - "Max": 70.8358676198869, - "Med": 23.843003199575065 - }, - "Latency": { - "Min": 15.329861640930176, - "Max": 473.0266854763031, - "Med": 33.783005118370056 - }, - "Speed": { - "Min": 18.512331053276277, - "Max": 75.89404763145406, - "Med": 32.71572442434736 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 8.216406373144354, - "Max": 208.6786009031018, - "Med": 30.210928570074664 - }, - "Latency": { - "Min": 15.335684299468994, - "Max": 215.6806709766388, - "Med": 38.575217723846436 - }, - "Speed": { - "Min": 16.948133101303448, - "Max": 83.92843333421662, - "Med": 34.782649874103726 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 2.4713733196258545, - "Max": 253.51797756280675, - "Med": 35.343380248684106 - }, - "Latency": { - "Min": 18.05628275871277, - "Max": 460.52875542640686, - "Med": 44.058998703956604 - }, - "Speed": { - "Min": 23.472023599135944, - "Max": 84.26334740614728, - "Med": 49.60432628225235 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 4.943585395812988, - "Max": 116.09944371879614, - "Med": 21.916232851223548 - }, - "Latency": { - "Min": 15.322106122970581, - "Max": 455.17924761772156, - "Med": 40.07017743587494 - }, - "Speed": { - "Min": 15.88939112779162, - "Max": 75.5911216056083, - "Med": 48.360419914178166 - } - }, - "Safety": { - "Time to Answer": { - "Min": 7.444906338818671, - "Max": 58.15606645859435, - "Med": 20.445144451794356 - }, - "Latency": { - "Min": 15.193650960922241, - "Max": 74.93927335739136, - "Med": 35.17757558822632 - }, - "Speed": { - "Min": 15.712798254325218, - "Max": 84.68622720278519, - "Med": 37.38984667711986 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 0.54599928855896, - "Max": 161.4213222050101, - "Med": 27.37289314962119 - }, - "Latency": { - "Min": 20.44338822364807, - "Max": 468.6172957420349, - "Med": 43.49423110485077 - }, - "Speed": { - "Min": 25.123617720974075, - "Max": 80.21015089080971, - "Med": 56.65450640236608 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 7.059545403539997, - "Max": 85.15657951922476, - "Med": 18.01694448780064 - }, - "Latency": { - "Min": 12.706315279006958, - "Max": 89.78911590576172, - "Med": 27.14172089099884 - }, - "Speed": { - "Min": 17.672402752587608, - "Max": 78.49479198313107, - "Med": 29.84571055215188 - } - }, - "Translation": { - "Time to Answer": { - "Min": 11.448223740399959, - "Max": 148.23681348486792, - "Med": 28.346218079862798 - }, - "Latency": { - "Min": 15.745723724365234, - "Max": 435.20751571655273, - "Med": 38.3886981010437 - }, - "Speed": { - "Min": 20.82031285237568, - "Max": 85.56565569615155, - "Med": 38.70998117979687 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 12.764698800547027, - "Max": 128.81752944562336, - "Med": 49.972902765000214 - }, - "Latency": { - "Min": 25.775176763534546, - "Max": 154.98958587646484, - "Med": 86.45885813236237 - }, - "Speed": { - "Min": 17.754478471086838, - "Max": 127.8660606777894, - "Med": 45.78127621340502 - } - } - }, - "ERNIE 4.5 21B A3B Thinking": { - "NUM_GPUS": 4, - "Overall": { - "Time to Answer": { - "Min": 4.7185364037575805, - "Max": 381.4451079786094, - "Med": 48.24206436969081 - }, - "Latency": { - "Min": 5.468899726867676, - "Max": 385.8374490737915, - "Med": 56.95321476459503 - }, - "Speed": { - "Min": 13.077787135906146, - "Max": 417.72623387458174, - "Med": 78.52955859303597 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 4.7185364037575805, - "Max": 129.3670628652074, - "Med": 45.59457072222219 - }, - "Latency": { - "Min": 5.468899726867676, - "Max": 139.42385578155518, - "Med": 54.9374281167984 - }, - "Speed": { - "Min": 23.724916695181278, - "Max": 118.9078519152317, - "Med": 79.91422631176387 - } - }, - "Editing": { - "Time to Answer": { - "Min": 6.0666987432252375, - "Max": 126.93325961218814, - "Med": 44.36998889852029 - }, - "Latency": { - "Min": 25.85543918609619, - "Max": 134.31295132637024, - "Med": 50.293137311935425 - }, - "Speed": { - "Min": 13.077787135906146, - "Max": 121.91964927817727, - "Med": 75.21894332279093 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 15.90459621203699, - "Max": 245.1909949400944, - "Med": 49.41435811211081 - }, - "Latency": { - "Min": 23.064457893371582, - "Max": 262.91962575912476, - "Med": 55.597994565963745 - }, - "Speed": { - "Min": 25.860160543559193, - "Max": 178.7519783531233, - "Med": 83.1847008973788 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 23.29042621571812, - "Max": 381.4451079786094, - "Med": 83.1362957135191 - }, - "Latency": { - "Min": 37.18455362319946, - "Max": 385.8374490737915, - "Med": 90.58713603019714 - }, - "Speed": { - "Min": 33.073378076089554, - "Max": 176.22334624571502, - "Med": 91.87947652451497 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 12.17904435294651, - "Max": 108.41935803534291, - "Med": 43.472502338394364 - }, - "Latency": { - "Min": 27.566946983337402, - "Max": 113.22209453582764, - "Med": 55.10632348060608 - }, - "Speed": { - "Min": 31.383849405723346, - "Max": 140.26318805991497, - "Med": 79.95363531323457 - } - }, - "Safety": { - "Time to Answer": { - "Min": 12.202679136297206, - "Max": 92.32982209840914, - "Med": 45.34896118679985 - }, - "Latency": { - "Min": 27.01205086708069, - "Max": 124.52750062942505, - "Med": 53.7378511428833 - }, - "Speed": { - "Min": 19.79494652308793, - "Max": 117.6375440238341, - "Med": 71.73261494588591 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 13.347472214276472, - "Max": 208.86529871551306, - "Med": 54.5054725177881 - }, - "Latency": { - "Min": 42.52283978462219, - "Max": 216.81795072555542, - "Med": 65.68744933605194 - }, - "Speed": { - "Min": 30.676562954760897, - "Max": 175.54727768720753, - "Med": 83.16909413233267 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 11.983194166872682, - "Max": 84.55116112605583, - "Med": 44.33250133798725 - }, - "Latency": { - "Min": 25.412912845611572, - "Max": 85.89464926719666, - "Med": 48.754326939582825 - }, - "Speed": { - "Min": 16.55996270290764, - "Max": 136.80404527811402, - "Med": 71.55777387657217 - } - }, - "Translation": { - "Time to Answer": { - "Min": 7.315804266118575, - "Max": 189.66452063032918, - "Med": 50.54453291387922 - }, - "Latency": { - "Min": 27.38025212287903, - "Max": 198.81798601150513, - "Med": 57.48841047286987 - }, - "Speed": { - "Min": 24.94779667636523, - "Max": 195.0659165908506, - "Med": 80.65473253716607 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 29.315509924879656, - "Max": 235.27182159976888, - "Med": 82.68275937017145 - }, - "Latency": { - "Min": 55.98664903640747, - "Max": 255.9167935848236, - "Med": 117.85961723327637 - }, - "Speed": { - "Min": 21.929595166537542, - "Max": 417.72623387458174, - "Med": 64.8448510518927 - } - } - }, - "Qwen3 235B A22B Instruct 2507": { - "NUM_GPUS": 8, - "Overall": { - "Time to Answer": { - "Min": 0.0599977970123291, - "Max": 4.078007459640503, - "Med": 0.13876307010650635 - }, - "Latency": { - "Min": 0.12203764915466309, - "Max": 1261.3192930221558, - "Med": 14.262101531028748 - }, - "Speed": { - "Min": 10.556379666545459, - "Max": 617.5576615986813, - "Med": 31.359207215387023 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 0.06099367141723633, - "Max": 2.687199831008911, - "Med": 0.12971913814544678 - }, - "Latency": { - "Min": 0.33237648010253906, - "Max": 183.88701963424683, - "Med": 16.43335199356079 - }, - "Speed": { - "Min": 10.556379666545459, - "Max": 38.608949238883106, - "Med": 31.445926161159562 - } - }, - "Editing": { - "Time to Answer": { - "Min": 0.06218910217285156, - "Max": 2.642557144165039, - "Med": 0.1309736967086792 - }, - "Latency": { - "Min": 0.29999446868896484, - "Max": 65.33856987953186, - "Med": 8.179410696029663 - }, - "Speed": { - "Min": 12.6401908540417, - "Max": 44.45711026310425, - "Med": 31.36421987206994 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 0.0599977970123291, - "Max": 2.645786762237549, - "Med": 0.148118257522583 - }, - "Latency": { - "Min": 0.15612459182739258, - "Max": 162.63672947883606, - "Med": 11.867389917373657 - }, - "Speed": { - "Min": 18.4301654543505, - "Max": 43.61722961354029, - "Med": 31.28247348885467 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 0.0611729621887207, - "Max": 2.6831188201904297, - "Med": 0.12159299850463867 - }, - "Latency": { - "Min": 0.12203764915466309, - "Max": 313.1483278274536, - "Med": 24.277913689613342 - }, - "Speed": { - "Min": 26.4874306098914, - "Max": 402.9497550196945, - "Med": 31.200576618350894 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 0.06073927879333496, - "Max": 2.6409244537353516, - "Med": 0.11911571025848389 - }, - "Latency": { - "Min": 1.2278521060943604, - "Max": 81.46263074874878, - "Med": 21.183393478393555 - }, - "Speed": { - "Min": 22.858739915962545, - "Max": 35.232987689635024, - "Med": 31.565060329062877 - } - }, - "Safety": { - "Time to Answer": { - "Min": 0.06101679801940918, - "Max": 2.676365852355957, - "Med": 0.1175997257232666 - }, - "Latency": { - "Min": 0.48310065269470215, - "Max": 71.77724814414978, - "Med": 11.744874715805054 - }, - "Speed": { - "Min": 23.513437972659286, - "Max": 36.19694244902809, - "Med": 31.303756894761722 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 0.062200069427490234, - "Max": 2.4229557514190674, - "Med": 0.1241919994354248 - }, - "Latency": { - "Min": 2.4022955894470215, - "Max": 1261.3192930221558, - "Med": 12.741782903671265 - }, - "Speed": { - "Min": 23.5107156399246, - "Max": 68.06365929199522, - "Med": 31.52941810934405 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 0.06840300559997559, - "Max": 4.078007459640503, - "Med": 0.16679656505584717 - }, - "Latency": { - "Min": 1.1936404705047607, - "Max": 62.33108639717102, - "Med": 8.373995900154114 - }, - "Speed": { - "Min": 23.08773481941014, - "Max": 39.41478301821861, - "Med": 31.03914567272312 - } - }, - "Translation": { - "Time to Answer": { - "Min": 0.06093740463256836, - "Max": 1.074122667312622, - "Med": 0.15650403499603271 - }, - "Latency": { - "Min": 0.3120737075805664, - "Max": 76.32828378677368, - "Med": 9.615698099136353 - }, - "Speed": { - "Min": 19.415233107966593, - "Max": 47.822716790737076, - "Med": 31.548506884106185 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 0.14957475662231445, - "Max": 3.722768545150757, - "Med": 0.9216043949127197 - }, - "Latency": { - "Min": 0.8602428436279297, - "Max": 174.10348057746887, - "Med": 55.74303138256073 - }, - "Speed": { - "Min": 23.401832986167108, - "Max": 617.5576615986813, - "Med": 31.20416098029552 - } - } - }, - "Grok-4": { - "NUM_GPUS": 0, - "Overall": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Editing": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Safety": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Translation": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - } - }, - "Claude 4 Opus (20250514) (think)": { - "NUM_GPUS": 0, - "Overall": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Editing": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Safety": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Translation": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - } - }, - "Gemini 2.5 Pro": { - "NUM_GPUS": 0, - "Overall": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Editing": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Safety": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Translation": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - } - }, - "Tongyi DeepResearch 30B A3B": { - "NUM_GPUS": 4, - "Overall": { - "Time to Answer": { - "Min": 1.4505500793457031, - "Max": 244.41708384257143, - "Med": 45.23295979184195 - }, - "Latency": { - "Min": 9.191470384597778, - "Max": 749.16233086586, - "Med": 52.387412667274475 - }, - "Speed": { - "Min": 18.069738498345682, - "Max": 122.37478932044478, - "Med": 62.676624491545525 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 4.802471643031882, - "Max": 203.55154156596308, - "Med": 41.43268650270611 - }, - "Latency": { - "Min": 22.65742540359497, - "Max": 738.4437143802643, - "Med": 51.50689494609833 - }, - "Speed": { - "Min": 23.70791793357093, - "Max": 118.23891726695051, - "Med": 61.95675692618596 - } - }, - "Editing": { - "Time to Answer": { - "Min": 9.566574335098267, - "Max": 106.69052745386706, - "Med": 43.39687190468506 - }, - "Latency": { - "Min": 11.078340530395508, - "Max": 716.9541938304901, - "Med": 48.262219190597534 - }, - "Speed": { - "Min": 21.45800243159038, - "Max": 110.85390192747607, - "Med": 60.728454906690686 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 11.874105177737047, - "Max": 231.70031813596688, - "Med": 45.77187383012706 - }, - "Latency": { - "Min": 22.660792589187622, - "Max": 716.5539243221283, - "Med": 51.057066202163696 - }, - "Speed": { - "Min": 18.806653004549222, - "Max": 113.17314697322944, - "Med": 61.908515815844005 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 10.599196672439575, - "Max": 201.39174237725598, - "Med": 53.02374980579952 - }, - "Latency": { - "Min": 22.712077379226685, - "Max": 739.214604139328, - "Med": 60.78978753089905 - }, - "Speed": { - "Min": 27.728295895773794, - "Max": 116.14861413460089, - "Med": 71.35437312649003 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 9.607359849257868, - "Max": 76.17806077624542, - "Med": 42.09523495410829 - }, - "Latency": { - "Min": 22.647829294204712, - "Max": 691.1959676742554, - "Med": 51.206714153289795 - }, - "Speed": { - "Min": 22.367360550403653, - "Max": 105.47876939683458, - "Med": 61.07899532945913 - } - }, - "Safety": { - "Time to Answer": { - "Min": 8.461828413862772, - "Max": 76.82411024149727, - "Med": 41.093502702586576 - }, - "Latency": { - "Min": 11.213352680206299, - "Max": 726.0364253520966, - "Med": 49.5799560546875 - }, - "Speed": { - "Min": 21.10544967333718, - "Max": 122.37478932044478, - "Med": 61.27658515689627 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 1.4505500793457031, - "Max": 244.41708384257143, - "Med": 43.37484079377687 - }, - "Latency": { - "Min": 34.8076434135437, - "Max": 741.4165444374084, - "Med": 65.52483582496643 - }, - "Speed": { - "Min": 44.05263366278615, - "Max": 114.31062221045683, - "Med": 80.14168131596111 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 8.468284732551986, - "Max": 89.47725016152823, - "Med": 41.727662494933156 - }, - "Latency": { - "Min": 11.212054014205933, - "Max": 90.75218868255615, - "Med": 47.14983403682709 - }, - "Speed": { - "Min": 18.069738498345682, - "Max": 114.2828308432808, - "Med": 58.47885951688892 - } - }, - "Translation": { - "Time to Answer": { - "Min": 11.533191122756978, - "Max": 148.20878136635764, - "Med": 47.005148283081056 - }, - "Latency": { - "Min": 22.636559009552002, - "Max": 749.16233086586, - "Med": 53.13997745513916 - }, - "Speed": { - "Min": 23.257318497563766, - "Max": 121.45067041024606, - "Med": 67.40439675128356 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 6.2053979334940434, - "Max": 169.23244223887497, - "Med": 67.0981146490129 - }, - "Latency": { - "Min": 9.191470384597778, - "Max": 596.1811451911926, - "Med": 98.24455618858337 - }, - "Speed": { - "Min": 22.742636950738575, - "Max": 118.95423414956997, - "Med": 58.58010161782478 - } - } - }, - "GPT-5 mini (Reasoning: medium)": { - "NUM_GPUS": 0, - "Overall": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Editing": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Safety": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Translation": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - } - }, - "Gemma 3 27B it": { - "NUM_GPUS": 4, - "Overall": { - "Time to Answer": { - "Min": 0.22171759605407715, - "Max": 24.022038459777832, - "Med": 3.391351342201233 - }, - "Latency": { - "Min": 1.2437419891357422, - "Max": 80.82627582550049, - "Med": 13.303653597831726 - }, - "Speed": { - "Min": 3.01364195656977, - "Max": 229.08035233700036, - "Med": 39.94050750809835 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 1.231619119644165, - "Max": 12.02305817604065, - "Med": 3.3667547702789307 - }, - "Latency": { - "Min": 2.004427194595337, - "Max": 59.13508725166321, - "Med": 14.327769875526428 - }, - "Speed": { - "Min": 9.955632160192955, - "Max": 78.68711055707439, - "Med": 45.86855319507289 - } - }, - "Editing": { - "Time to Answer": { - "Min": 0.429884672164917, - "Max": 11.786498069763184, - "Med": 2.6755807399749756 - }, - "Latency": { - "Min": 1.2437419891357422, - "Max": 28.552533388137817, - "Med": 10.97027313709259 - }, - "Speed": { - "Min": 5.711890466028699, - "Max": 122.61128069960643, - "Med": 34.90802071679944 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 0.8460655212402344, - "Max": 9.334107398986816, - "Med": 2.7619566917419434 - }, - "Latency": { - "Min": 2.272484302520752, - "Max": 54.87088966369629, - "Med": 9.565546035766602 - }, - "Speed": { - "Min": 5.301823017395899, - "Max": 72.08295999954434, - "Med": 29.63113262120674 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 0.22171759605407715, - "Max": 13.662389755249023, - "Med": 2.729213833808899 - }, - "Latency": { - "Min": 2.4233341217041016, - "Max": 30.015868663787842, - "Med": 13.837107419967651 - }, - "Speed": { - "Min": 3.01364195656977, - "Max": 79.71702934661838, - "Med": 47.28939186218568 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 0.2795121669769287, - "Max": 11.32710599899292, - "Med": 2.559817671775818 - }, - "Latency": { - "Min": 1.7863786220550537, - "Max": 38.75650501251221, - "Med": 15.827868700027466 - }, - "Speed": { - "Min": 10.963159553275002, - "Max": 83.97842703674367, - "Med": 48.527739925349465 - } - }, - "Safety": { - "Time to Answer": { - "Min": 0.2860133647918701, - "Max": 11.903903245925903, - "Med": 3.9196701049804688 - }, - "Latency": { - "Min": 6.169138193130493, - "Max": 40.045860052108765, - "Med": 15.976381063461304 - }, - "Speed": { - "Min": 16.68060195348541, - "Max": 152.72806642407141, - "Med": 50.35330616011156 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 0.3676488399505615, - "Max": 12.006672382354736, - "Med": 3.640501856803894 - }, - "Latency": { - "Min": 5.874630928039551, - "Max": 55.18393516540527, - "Med": 12.986077189445496 - }, - "Speed": { - "Min": 20.223438086758204, - "Max": 79.36633494255925, - "Med": 36.375707045797675 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 0.4374091625213623, - "Max": 10.824204444885254, - "Med": 4.0476555824279785 - }, - "Latency": { - "Min": 4.208073377609253, - "Max": 23.27540612220764, - "Med": 11.174941182136536 - }, - "Speed": { - "Min": 8.939035042072296, - "Max": 164.84850372963322, - "Med": 31.04757679094981 - } - }, - "Translation": { - "Time to Answer": { - "Min": 0.2437894344329834, - "Max": 13.54941439628601, - "Med": 3.530141234397888 - }, - "Latency": { - "Min": 3.387590169906616, - "Max": 39.24691843986511, - "Med": 13.79154908657074 - }, - "Speed": { - "Min": 15.260651404402982, - "Max": 86.12252770200125, - "Med": 40.6410725402205 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 1.2491354942321777, - "Max": 24.022038459777832, - "Med": 6.727076172828674 - }, - "Latency": { - "Min": 2.615008592605591, - "Max": 80.82627582550049, - "Med": 38.974836587905884 - }, - "Speed": { - "Min": 7.1120288525089075, - "Max": 229.08035233700036, - "Med": 51.560560622009206 - } - } - }, - "GPT-5 nano (Reasoning: medium)": { - "NUM_GPUS": 0, - "Overall": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Editing": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Safety": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Translation": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - } - }, - "GPT-5.1 (Reasoning: medium, verbosity: medium)": { - "NUM_GPUS": -1, - "Overall": { - "Time to Answer": { - "Min": [ - 0 - ], - "Max": [ - 0 - ], - "Med": 0.0 - }, - "Latency": { - "Min": 1.4775474071502686, - "Max": 747.6701903343201, - "Med": 11.673096776008606 - }, - "Speed": { - "Min": 0, - "Max": 0, - "Med": 0.0 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": [ - 0 - ], - "Max": [ - 0 - ], - "Med": 0.0 - }, - "Latency": { - "Min": 3.002990961074829, - "Max": 421.94651198387146, - "Med": 16.867193579673767 - }, - "Speed": { - "Min": 0, - "Max": 0, - "Med": 0.0 - } - }, - "Editing": { - "Time to Answer": { - "Min": [ - 0 - ], - "Max": [ - 0 - ], - "Med": 0.0 - }, - "Latency": { - "Min": 1.4775474071502686, - "Max": 82.63027286529541, - "Med": 5.669041872024536 - }, - "Speed": { - "Min": 0, - "Max": 0, - "Med": 0.0 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": [ - 0 - ], - "Max": [ - 0 - ], - "Med": 0.0 - }, - "Latency": { - "Min": 1.664919137954712, - "Max": 358.5435652732849, - "Med": 7.0718772411346436 - }, - "Speed": { - "Min": 0, - "Max": 0, - "Med": 0.0 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": [ - 0 - ], - "Max": [ - 0 - ], - "Med": 0.0 - }, - "Latency": { - "Min": 1.82639479637146, - "Max": 747.6701903343201, - "Med": 12.99689531326294 - }, - "Speed": { - "Min": 0, - "Max": 0, - "Med": 0.0 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": [ - 0 - ], - "Max": [ - 0 - ], - "Med": 0.0 - }, - "Latency": { - "Min": 2.273186445236206, - "Max": 115.95099306106567, - "Med": 22.67124307155609 - }, - "Speed": { - "Min": 0, - "Max": 0, - "Med": 0.0 - } - }, - "Safety": { - "Time to Answer": { - "Min": [ - 0 - ], - "Max": [ - 0 - ], - "Med": 0.0 - }, - "Latency": { - "Min": 3.3134090900421143, - "Max": 140.77250027656555, - "Med": 18.410767793655396 - }, - "Speed": { - "Min": 0, - "Max": 0, - "Med": 0.0 - } - }, - "Repetition": { - "Time to Answer": { - "Min": [ - 0 - ], - "Max": [ - 0 - ], - "Med": 0.0 - }, - "Latency": { - "Min": 2.3753366470336914, - "Max": 428.47876358032227, - "Med": 19.905622720718384 - }, - "Speed": { - "Min": 0, - "Max": 0, - "Med": 0.0 - } - }, - "Summarization": { - "Time to Answer": { - "Min": [ - 0 - ], - "Max": [ - 0 - ], - "Med": 0.0 - }, - "Latency": { - "Min": 2.2187492847442627, - "Max": 126.85083556175232, - "Med": 5.20970344543457 - }, - "Speed": { - "Min": 0, - "Max": 0, - "Med": 0.0 - } - }, - "Translation": { - "Time to Answer": { - "Min": [ - 0 - ], - "Max": [ - 0 - ], - "Med": 0.0 - }, - "Latency": { - "Min": 2.0158095359802246, - "Max": 64.36819744110107, - "Med": 9.735138773918152 - }, - "Speed": { - "Min": 0, - "Max": 0, - "Med": 0.0 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": [ - 0 - ], - "Max": [ - 0 - ], - "Med": 0.0 - }, - "Latency": { - "Min": 4.493000507354736, - "Max": 501.9931924343109, - "Med": 38.35947251319885 - }, - "Speed": { - "Min": 0, - "Max": 0, - "Med": 0.0 - } - } - }, - "KAT Dev 72B Exp": { - "NUM_GPUS": 8, - "Overall": { - "Time to Answer": { - "Min": 0, - "Max": 1.8852267265319824, - "Med": 0.06221652030944824 - }, - "Latency": { - "Min": 0, - "Max": 1739.6013979911804, - "Med": 8.492375493049622 - }, - "Speed": { - "Min": 11.841053492664015, - "Max": 179.6668529421545, - "Med": 50.601864763867184 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 0.05644536018371582, - "Max": 1.8852267265319824, - "Med": 0.06070876121520996 - }, - "Latency": { - "Min": 0.5495977401733398, - "Max": 1734.7112760543823, - "Med": 11.42176365852356 - }, - "Speed": { - "Min": 33.34421066358906, - "Max": 61.99760541627945, - "Med": 51.61603722996896 - } - }, - "Editing": { - "Time to Answer": { - "Min": 0.056221723556518555, - "Max": 0.21474575996398926, - "Med": 0.06082558631896973 - }, - "Latency": { - "Min": 0.2646908760070801, - "Max": 1595.0533018112183, - "Med": 4.54656982421875 - }, - "Speed": { - "Min": 33.0970793748843, - "Max": 67.8011225008837, - "Med": 49.877649602198524 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 0.057355642318725586, - "Max": 1.3980965614318848, - "Med": 0.10868549346923828 - }, - "Latency": { - "Min": 0.4547910690307617, - "Max": 343.81701016426086, - "Med": 6.172606706619263 - }, - "Speed": { - "Min": 33.23552955310353, - "Max": 61.68261581762401, - "Med": 49.023444483063024 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 0.05635571479797363, - "Max": 0.11922383308410645, - "Med": 0.06026041507720947 - }, - "Latency": { - "Min": 0.3007025718688965, - "Max": 1650.0614280700684, - "Med": 10.36361300945282 - }, - "Speed": { - "Min": 35.11266626219754, - "Max": 61.64151659547226, - "Med": 48.08432696922458 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 0.05710172653198242, - "Max": 0.2662782669067383, - "Med": 0.06034708023071289 - }, - "Latency": { - "Min": 0.4679543972015381, - "Max": 1617.9812409877777, - "Med": 8.860660910606384 - }, - "Speed": { - "Min": 34.20033736500205, - "Max": 63.47312348668281, - "Med": 49.06857210020506 - } - }, - "Safety": { - "Time to Answer": { - "Min": 0.05749773979187012, - "Max": 0.1197052001953125, - "Med": 0.06013894081115723 - }, - "Latency": { - "Min": 0.4509849548339844, - "Max": 1738.2601640224457, - "Med": 7.764802932739258 - }, - "Speed": { - "Min": 31.161307774126723, - "Max": 62.96165283098918, - "Med": 46.6376538248078 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 0.05688214302062988, - "Max": 0.11974930763244629, - "Med": 0.05986011028289795 - }, - "Latency": { - "Min": 1.6353342533111572, - "Max": 1736.6408779621124, - "Med": 9.356394052505493 - }, - "Speed": { - "Min": 33.02125276478502, - "Max": 61.19217468044336, - "Med": 46.27108150127529 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 0.05743670463562012, - "Max": 0.4117448329925537, - "Med": 0.1098024845123291 - }, - "Latency": { - "Min": 0.989130973815918, - "Max": 1565.9191603660583, - "Med": 6.263204216957092 - }, - "Speed": { - "Min": 33.11188589504452, - "Max": 57.96248782433984, - "Med": 49.54728638934513 - } - }, - "Translation": { - "Time to Answer": { - "Min": 0.05727648735046387, - "Max": 0.3603339195251465, - "Med": 0.06304633617401123 - }, - "Latency": { - "Min": 0.2665116786956787, - "Max": 1739.6013979911804, - "Med": 7.2887866497039795 - }, - "Speed": { - "Min": 33.701689123361014, - "Max": 61.86641373663972, - "Med": 50.14262477886369 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 0, - "Max": 1.4983344078063965, - "Med": 0.2324150800704956 - }, - "Latency": { - "Min": 0, - "Max": 888.0258376598358, - "Med": 21.200571298599243 - }, - "Speed": { - "Min": 11.841053492664015, - "Max": 179.6668529421545, - "Med": 52.505335801448915 - } - } - }, - "gpt-oss-20B (Reasoning: medium)": { - "NUM_GPUS": 4, - "Overall": { - "Time to Answer": { - "Min": 2.0265204472169556, - "Max": 129.33762687935325, - "Med": 26.04652036871504 - }, - "Latency": { - "Min": 7.263976097106934, - "Max": 138.10640954971313, - "Med": 29.767700791358948 - }, - "Speed": { - "Min": 5.317348253806318, - "Max": 369.6802851223203, - "Med": 108.53633696847938 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 2.919738582967988, - "Max": 69.87850048414344, - "Med": 25.258961631303542 - }, - "Latency": { - "Min": 7.263976097106934, - "Max": 73.72067332267761, - "Med": 29.7125141620636 - }, - "Speed": { - "Min": 13.604860508942371, - "Max": 224.07024522186745, - "Med": 124.14591263963385 - } - }, - "Editing": { - "Time to Answer": { - "Min": 2.0265204472169556, - "Max": 49.25446497234138, - "Med": 25.30023380826225 - }, - "Latency": { - "Min": 7.319023847579956, - "Max": 56.10624122619629, - "Med": 27.072497606277466 - }, - "Speed": { - "Min": 12.140381961027476, - "Max": 250.484990902761, - "Med": 104.93834609385715 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 3.9151465271321517, - "Max": 75.99197716704057, - "Med": 26.328562295325447 - }, - "Latency": { - "Min": 7.286376476287842, - "Max": 84.5227108001709, - "Med": 28.301609992980957 - }, - "Speed": { - "Min": 11.70440500661996, - "Max": 306.1703155318347, - "Med": 113.21800589706349 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 8.324975468895651, - "Max": 129.33762687935325, - "Med": 27.582642474460418 - }, - "Latency": { - "Min": 18.050434589385986, - "Max": 138.10640954971313, - "Med": 32.32542634010315 - }, - "Speed": { - "Min": 17.82180729362148, - "Max": 207.91746863187615, - "Med": 106.44936231341633 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 5.195440284614488, - "Max": 61.22046760794428, - "Med": 25.105323415675343 - }, - "Latency": { - "Min": 7.313647270202637, - "Max": 63.91348838806152, - "Med": 32.319284319877625 - }, - "Speed": { - "Min": 18.438031290688347, - "Max": 226.50662559152707, - "Med": 110.66899398987842 - } - }, - "Safety": { - "Time to Answer": { - "Min": 4.932410193462053, - "Max": 47.165975079516905, - "Med": 23.45146352177868 - }, - "Latency": { - "Min": 7.2830750942230225, - "Max": 53.09182548522949, - "Med": 24.52879786491394 - }, - "Speed": { - "Min": 5.317348253806318, - "Max": 250.7582211510182, - "Med": 90.91284402754488 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 2.557051893849964, - "Max": 120.32689256267814, - "Med": 28.832852398544777 - }, - "Latency": { - "Min": 20.129476308822632, - "Max": 125.98315095901489, - "Med": 33.87077188491821 - }, - "Speed": { - "Min": 27.080736058951963, - "Max": 266.27309790215196, - "Med": 120.19876605631327 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 2.4394841513682888, - "Max": 113.86724343465069, - "Med": 23.029374821644574 - }, - "Latency": { - "Min": 13.429885149002075, - "Max": 115.32083773612976, - "Med": 25.605836629867554 - }, - "Speed": { - "Min": 10.53590514256035, - "Max": 304.7862966713593, - "Med": 101.49503613110383 - } - }, - "Translation": { - "Time to Answer": { - "Min": 4.240170876932201, - "Max": 72.23964902074354, - "Med": 26.80100677708995 - }, - "Latency": { - "Min": 17.525670528411865, - "Max": 115.61775875091553, - "Med": 30.075977206230164 - }, - "Speed": { - "Min": 22.395176057735203, - "Max": 369.6802851223203, - "Med": 122.92398147980118 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 9.995788375397986, - "Max": 91.21509669950703, - "Med": 39.29442425858453 - }, - "Latency": { - "Min": 16.48517942428589, - "Max": 112.05223345756531, - "Med": 66.7337509393692 - }, - "Speed": { - "Min": 21.53344632125304, - "Max": 273.4009542241851, - "Med": 87.97283614240237 - } - } - }, - "o3-pro (Reasoning: medium)": { - "NUM_GPUS": 0, - "Overall": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Editing": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Safety": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Translation": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - } - }, - "Apriel 1.5 15B Thinker": { - "NUM_GPUS": 1, - "Overall": { - "Time to Answer": { - "Min": 1.2549350261688232, - "Max": 3056.138434088367, - "Med": 299.8162105011457 - }, - "Latency": { - "Min": 46.738779067993164, - "Max": 4136.366504430771, - "Med": 379.46853709220886 - }, - "Speed": { - "Min": 0.42360294235278373, - "Max": 54.947911654193604, - "Med": 14.662753397700879 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 3.7276084423065186, - "Max": 3056.138434088367, - "Med": 305.5469903359111 - }, - "Latency": { - "Min": 51.765430212020874, - "Max": 4136.366504430771, - "Med": 384.5022294521332 - }, - "Speed": { - "Min": 1.282095122702259, - "Max": 33.858585682008396, - "Med": 14.773627460039325 - } - }, - "Editing": { - "Time to Answer": { - "Min": 3.4761545658111572, - "Max": 2273.6135531569553, - "Med": 295.44786529319583 - }, - "Latency": { - "Min": 51.38218688964844, - "Max": 3585.821407556534, - "Med": 344.8719325065613 - }, - "Speed": { - "Min": 1.7095913279748924, - "Max": 32.881206474220114, - "Med": 13.117173086095441 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 3.531322479248047, - "Max": 2724.289971736822, - "Med": 276.6031069894072 - }, - "Latency": { - "Min": 46.738779067993164, - "Max": 3467.508316040039, - "Med": 316.27087450027466 - }, - "Speed": { - "Min": 1.4064350054193115, - "Max": 27.82944594624516, - "Med": 13.721272389823604 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 3.445833206176758, - "Max": 2683.223895152649, - "Med": 328.32872143197346 - }, - "Latency": { - "Min": 89.11460971832275, - "Max": 3187.0217077732086, - "Med": 382.7973885536194 - }, - "Speed": { - "Min": 1.705996948100313, - "Max": 30.580604854482463, - "Med": 14.40724059813003 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 4.491663694381714, - "Max": 2488.7236490093387, - "Med": 309.81985626728726 - }, - "Latency": { - "Min": 51.35005807876587, - "Max": 3339.238896369934, - "Med": 462.8714779615402 - }, - "Speed": { - "Min": 0.5354570626911433, - "Max": 27.440068007053224, - "Med": 14.289745767436045 - } - }, - "Safety": { - "Time to Answer": { - "Min": 5.234750270843506, - "Max": 1324.1906158491395, - "Med": 253.79249203656173 - }, - "Latency": { - "Min": 46.780503273010254, - "Max": 3173.258824825287, - "Med": 269.95654463768005 - }, - "Speed": { - "Min": 0.5017648181327705, - "Max": 23.20511103157671, - "Med": 11.307379239778234 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 3.6516125202178955, - "Max": 1805.0451255592727, - "Med": 244.28766749419418 - }, - "Latency": { - "Min": 112.79669785499573, - "Max": 3744.576027393341, - "Med": 2968.8194106817245 - }, - "Speed": { - "Min": 3.8282718743509396, - "Max": 33.00053650042228, - "Med": 20.689304252724725 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 3.3415656089782715, - "Max": 2078.876019242067, - "Med": 271.65310563543795 - }, - "Latency": { - "Min": 51.81553244590759, - "Max": 3436.665023326874, - "Med": 295.15072190761566 - }, - "Speed": { - "Min": 0.42360294235278373, - "Max": 23.435566620315598, - "Med": 13.18680453942073 - } - }, - "Translation": { - "Time to Answer": { - "Min": 3.201449155807495, - "Max": 2995.292006701791, - "Med": 344.80499021810056 - }, - "Latency": { - "Min": 92.43893480300903, - "Max": 3749.036164045334, - "Med": 455.2424204349518 - }, - "Speed": { - "Min": 1.9899594380385566, - "Max": 41.139672948079316, - "Med": 15.236640938695967 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 1.2549350261688232, - "Max": 2861.318123807745, - "Med": 307.18474078735335 - }, - "Latency": { - "Min": 69.42670488357544, - "Max": 3984.8897964954376, - "Med": 500.44790029525757 - }, - "Speed": { - "Min": 2.9660961868114635, - "Max": 54.947911654193604, - "Med": 29.65026123893604 - } - } - }, - "Gemini 2.5 Flash-lite Preview (09-2025)": { - "NUM_GPUS": 0, - "Overall": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Editing": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Safety": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Translation": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - } - }, - "Mistral Small 3.2 24B Instruct 2506": { - "NUM_GPUS": 4, - "Overall": { - "Time to Answer": { - "Min": 0.12571310997009277, - "Max": 22.059533834457397, - "Med": 3.2450859546661377 - }, - "Latency": { - "Min": 2.161437511444092, - "Max": 659.4294443130493, - "Med": 13.907460689544678 - }, - "Speed": { - "Min": 1.4333493038473446, - "Max": 175.8910844947186, - "Med": 36.382163796915904 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 0.6847662925720215, - "Max": 12.362573146820068, - "Med": 2.9440373182296753 - }, - "Latency": { - "Min": 2.757103443145752, - "Max": 29.507304906845093, - "Med": 13.806361079216003 - }, - "Speed": { - "Min": 6.792632069383921, - "Max": 115.66651254814248, - "Med": 38.30509405962542 - } - }, - "Editing": { - "Time to Answer": { - "Min": 0.12571310997009277, - "Max": 10.516297817230225, - "Med": 3.194353699684143 - }, - "Latency": { - "Min": 2.8111395835876465, - "Max": 21.959777116775513, - "Med": 12.760318636894226 - }, - "Speed": { - "Min": 7.446414644335197, - "Max": 108.03523771174629, - "Med": 32.49664545962825 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 0.5983908176422119, - "Max": 14.474515199661255, - "Med": 2.8958582878112793 - }, - "Latency": { - "Min": 2.161437511444092, - "Max": 70.41749835014343, - "Med": 12.771145343780518 - }, - "Speed": { - "Min": 1.4333493038473446, - "Max": 121.59775552277985, - "Med": 33.44103401287695 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 0.45673561096191406, - "Max": 11.303606033325195, - "Med": 2.9606106281280518 - }, - "Latency": { - "Min": 6.284893989562988, - "Max": 659.4294443130493, - "Med": 14.92256474494934 - }, - "Speed": { - "Min": 2.143380742041602, - "Max": 121.46427610970838, - "Med": 44.1526370389208 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 0.2953360080718994, - "Max": 8.605994462966919, - "Med": 3.012656331062317 - }, - "Latency": { - "Min": 5.203503370285034, - "Max": 33.03637361526489, - "Med": 14.344586730003357 - }, - "Speed": { - "Min": 12.832433455363452, - "Max": 110.59881480068599, - "Med": 42.05259464700501 - } - }, - "Safety": { - "Time to Answer": { - "Min": 0.2732360363006592, - "Max": 7.271705627441406, - "Med": 2.629337787628174 - }, - "Latency": { - "Min": 4.365848541259766, - "Max": 583.3041687011719, - "Med": 13.461174726486206 - }, - "Speed": { - "Min": 3.1052915915849884, - "Max": 122.26175683908541, - "Med": 35.86365065294573 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 0.7907335758209229, - "Max": 7.26047158241272, - "Med": 2.4488712549209595 - }, - "Latency": { - "Min": 5.475977659225464, - "Max": 597.4307239055634, - "Med": 13.749215602874756 - }, - "Speed": { - "Min": 14.729128799314253, - "Max": 116.95785456615033, - "Med": 37.38640264315987 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 0.5957944393157959, - "Max": 12.12608790397644, - "Med": 3.13832688331604 - }, - "Latency": { - "Min": 3.0124306678771973, - "Max": 24.874180793762207, - "Med": 12.077808260917664 - }, - "Speed": { - "Min": 10.75172312839848, - "Max": 120.04771569717427, - "Med": 29.643297389369906 - } - }, - "Translation": { - "Time to Answer": { - "Min": 0.14142727851867676, - "Max": 13.618714809417725, - "Med": 4.023127913475037 - }, - "Latency": { - "Min": 4.730882406234741, - "Max": 36.31072402000427, - "Med": 13.863484382629395 - }, - "Speed": { - "Min": 8.775478399107506, - "Max": 122.19589338428739, - "Med": 34.41867369729855 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 2.075855016708374, - "Max": 22.059533834457397, - "Med": 5.985246300697327 - }, - "Latency": { - "Min": 3.603008270263672, - "Max": 554.1096460819244, - "Med": 32.98428440093994 - }, - "Speed": { - "Min": 6.016974164083879, - "Max": 175.8910844947186, - "Med": 49.70564205164022 - } - } - }, - "Mi:dm 2.0 Base Instruct": { - "NUM_GPUS": 4, - "Overall": { - "Time to Answer": { - "Min": 0.24845504760742188, - "Max": 27.875138998031616, - "Med": 3.0741498470306396 - }, - "Latency": { - "Min": 0.5818257331848145, - "Max": 249.49325704574585, - "Med": 11.089128971099854 - }, - "Speed": { - "Min": 0.9191171942880371, - "Max": 2913.3504296776237, - "Med": 41.135152993186374 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 0.5397839546203613, - "Max": 11.207699298858643, - "Med": 3.0754138231277466 - }, - "Latency": { - "Min": 1.456855297088623, - "Max": 39.203630208969116, - "Med": 11.675567388534546 - }, - "Speed": { - "Min": 8.504514817517254, - "Max": 675.4817098156768, - "Med": 47.87241431486668 - } - }, - "Editing": { - "Time to Answer": { - "Min": 0.24845504760742188, - "Max": 9.77567982673645, - "Med": 2.8453644514083862 - }, - "Latency": { - "Min": 0.5818257331848145, - "Max": 224.3253448009491, - "Med": 9.610992550849915 - }, - "Speed": { - "Min": 5.074386933558473, - "Max": 413.36607650121164, - "Med": 33.953996096770275 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 0.5000381469726562, - "Max": 9.693275213241577, - "Med": 2.911353349685669 - }, - "Latency": { - "Min": 1.8060665130615234, - "Max": 32.512078523635864, - "Med": 9.730275392532349 - }, - "Speed": { - "Min": 3.4720944632633666, - "Max": 413.22852850140805, - "Med": 34.66437999811754 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 0.3369002342224121, - "Max": 9.685616493225098, - "Med": 2.735180377960205 - }, - "Latency": { - "Min": 1.0355570316314697, - "Max": 249.49325704574585, - "Med": 10.179033398628235 - }, - "Speed": { - "Min": 0.9191171942880371, - "Max": 308.33669043593324, - "Med": 43.38231851314892 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 0.38843846321105957, - "Max": 11.417652606964111, - "Med": 3.228148341178894 - }, - "Latency": { - "Min": 1.7227349281311035, - "Max": 23.154359340667725, - "Med": 9.956019878387451 - }, - "Speed": { - "Min": 8.106144245316518, - "Max": 186.90265292586238, - "Med": 35.60764221611936 - } - }, - "Safety": { - "Time to Answer": { - "Min": 0.279982328414917, - "Max": 10.393004894256592, - "Med": 2.695093870162964 - }, - "Latency": { - "Min": 1.0236964225769043, - "Max": 226.86424326896667, - "Med": 8.788090467453003 - }, - "Speed": { - "Min": 6.36970168085182, - "Max": 142.28841482366659, - "Med": 29.044122877260026 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 0.5482354164123535, - "Max": 9.20586085319519, - "Med": 2.924035429954529 - }, - "Latency": { - "Min": 2.055861711502075, - "Max": 245.093603849411, - "Med": 11.194562554359436 - }, - "Speed": { - "Min": 13.28376535796711, - "Max": 145.05043657625245, - "Med": 40.96906247070804 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 0.5676538944244385, - "Max": 18.00051760673523, - "Med": 2.799908995628357 - }, - "Latency": { - "Min": 2.7604880332946777, - "Max": 222.4989938735962, - "Med": 10.76824402809143 - }, - "Speed": { - "Min": 11.438325476102152, - "Max": 600.4280253822659, - "Med": 36.117375325987155 - } - }, - "Translation": { - "Time to Answer": { - "Min": 0.7314670085906982, - "Max": 18.019218683242798, - "Med": 3.098744034767151 - }, - "Latency": { - "Min": 2.326751947402954, - "Max": 45.220077991485596, - "Med": 11.047031044960022 - }, - "Speed": { - "Min": 8.295501061090546, - "Max": 2913.3504296776237, - "Med": 36.83611090850337 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 1.7066113948822021, - "Max": 27.875138998031616, - "Med": 7.51822829246521 - }, - "Latency": { - "Min": 5.285593032836914, - "Max": 64.81745195388794, - "Med": 22.035712718963623 - }, - "Speed": { - "Min": 10.37699857340723, - "Max": 174.55576987440293, - "Med": 74.16331887567887 - } - } - }, - "Qwen3 235B A22B Thinking 2507": { - "NUM_GPUS": 8, - "Overall": { - "Time to Answer": { - "Min": 0.06256508827209473, - "Max": 736.0421651714236, - "Med": 58.364528823897146 - }, - "Latency": { - "Min": 9.065596580505371, - "Max": 1732.575201034546, - "Med": 80.01045334339142 - }, - "Speed": { - "Min": 28.07917633059166, - "Max": 54.119336061700785, - "Med": 31.05335185752473 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 8.32601126502542, - "Max": 470.31466367465873, - "Med": 49.92947529336209 - }, - "Latency": { - "Min": 10.867412567138672, - "Max": 484.6364676952362, - "Med": 77.77793419361115 - }, - "Speed": { - "Min": 28.07917633059166, - "Max": 39.90586673547076, - "Med": 31.002491518580158 - } - }, - "Editing": { - "Time to Answer": { - "Min": 6.7613434317689505, - "Max": 372.06701306114655, - "Med": 55.46972915036075 - }, - "Latency": { - "Min": 12.784188508987427, - "Max": 407.30542731285095, - "Med": 64.62087953090668 - }, - "Speed": { - "Min": 28.747142212250516, - "Max": 37.5956207394253, - "Med": 31.06903190121833 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 7.8188813634279395, - "Max": 579.0974418856284, - "Med": 37.19298512071044 - }, - "Latency": { - "Min": 9.065596580505371, - "Max": 697.0528080463409, - "Med": 46.947518825531006 - }, - "Speed": { - "Min": 28.708839000854958, - "Max": 43.341493719125374, - "Med": 30.90819321498434 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 11.712259641220955, - "Max": 736.0421651714236, - "Med": 68.07581293220125 - }, - "Latency": { - "Min": 16.80387306213379, - "Max": 783.2390296459198, - "Med": 86.2214607000351 - }, - "Speed": { - "Min": 28.937313281060547, - "Max": 44.26187370353506, - "Med": 30.807720420014533 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 7.383297593049381, - "Max": 178.97415001159072, - "Med": 26.851773119277382 - }, - "Latency": { - "Min": 11.782234191894531, - "Max": 197.53329372406006, - "Med": 62.22791922092438 - }, - "Speed": { - "Min": 29.298819370251486, - "Max": 42.65193499777228, - "Med": 30.76656558149197 - } - }, - "Safety": { - "Time to Answer": { - "Min": 0.06256508827209473, - "Max": 128.20847484760435, - "Med": 22.80348276652016 - }, - "Latency": { - "Min": 10.14313530921936, - "Max": 1589.4574007987976, - "Med": 46.40166354179382 - }, - "Speed": { - "Min": 29.062089243655688, - "Max": 53.51322871324012, - "Med": 30.894735584573983 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 0.13854002952575684, - "Max": 368.7230666134867, - "Med": 71.71258273204108 - }, - "Latency": { - "Min": 28.03935408592224, - "Max": 1612.3236315250397, - "Med": 99.78373551368713 - }, - "Speed": { - "Min": 29.446475947658715, - "Max": 54.119336061700785, - "Med": 31.028204368467765 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 7.619767228890078, - "Max": 338.73873281057024, - "Med": 37.45078318875014 - }, - "Latency": { - "Min": 9.203788757324219, - "Max": 344.54028940200806, - "Med": 47.191033244132996 - }, - "Speed": { - "Min": 29.025468262798597, - "Max": 39.93690920819914, - "Med": 30.907140587464593 - } - }, - "Translation": { - "Time to Answer": { - "Min": 0.06609606742858887, - "Max": 285.0211842707438, - "Med": 104.05180452709557 - }, - "Latency": { - "Min": 15.272495746612549, - "Max": 1732.575201034546, - "Med": 112.71562504768372 - }, - "Speed": { - "Min": 29.588853645201016, - "Max": 40.727003511635985, - "Med": 31.17443153247476 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 22.2004454781455, - "Max": 458.72581213557993, - "Med": 100.57843678633506 - }, - "Latency": { - "Min": 24.85186219215393, - "Max": 502.55407524108887, - "Med": 171.34558141231537 - }, - "Speed": { - "Min": 29.683723484882627, - "Max": 48.431172752538174, - "Med": 31.598426840841473 - } - } - }, - "HyperCLOVAX SEED Think 14B (think)": { - "NUM_GPUS": 4, - "Overall": { - "Time to Answer": { - "Min": 1.302618181705475, - "Max": 4689.285183429718, - "Med": 16.12651202553951 - }, - "Latency": { - "Min": 1.7391059398651123, - "Max": 4689.285183429718, - "Med": 24.703290462493896 - }, - "Speed": { - "Min": 7.307445345487631, - "Max": 132.39520584296585, - "Med": 83.75171982150616 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 1.9308034098072417, - "Max": 3182.6324622631073, - "Med": 9.365434941597282 - }, - "Latency": { - "Min": 2.639542579650879, - "Max": 3182.6324622631073, - "Med": 15.83870279788971 - }, - "Speed": { - "Min": 16.77744002340205, - "Max": 132.20948117740727, - "Med": 101.30194462557199 - } - }, - "Editing": { - "Time to Answer": { - "Min": 1.302618181705475, - "Max": 3684.873688697815, - "Med": 11.63883071546728 - }, - "Latency": { - "Min": 1.9943385124206543, - "Max": 3684.873688697815, - "Med": 17.01710867881775 - }, - "Speed": { - "Min": 16.904847795469426, - "Max": 127.9947226245074, - "Med": 82.78091784859947 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 1.722266919989335, - "Max": 3630.6213943958282, - "Med": 18.9117010317138 - }, - "Latency": { - "Min": 1.7391059398651123, - "Max": 3630.6213943958282, - "Med": 22.8030264377594 - }, - "Speed": { - "Min": 16.9533176796045, - "Max": 132.39520584296585, - "Med": 95.55230956717068 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 3.787565594225858, - "Max": 4689.285183429718, - "Med": 47.648784790465946 - }, - "Latency": { - "Min": 4.669194459915161, - "Max": 4689.285183429718, - "Med": 53.14876687526703 - }, - "Speed": { - "Min": 7.307445345487631, - "Max": 122.99707673061086, - "Med": 56.02760850437299 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 2.3909347255435116, - "Max": 4213.355564594269, - "Med": 12.233455278179349 - }, - "Latency": { - "Min": 5.310888767242432, - "Max": 4213.355564594269, - "Med": 17.837754607200623 - }, - "Speed": { - "Min": 15.55508107566778, - "Max": 123.52021564323292, - "Med": 67.40487932554299 - } - }, - "Safety": { - "Time to Answer": { - "Min": 1.5927799282402828, - "Max": 3345.7318699359894, - "Med": 11.678358185594366 - }, - "Latency": { - "Min": 2.3156676292419434, - "Max": 3345.7318699359894, - "Med": 19.955106258392334 - }, - "Speed": { - "Min": 11.31245153779103, - "Max": 122.64101353107978, - "Med": 67.30747920535906 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 2.605623053545928, - "Max": 3760.4994537830357, - "Med": 31.05022206741674 - }, - "Latency": { - "Min": 3.3749899864196777, - "Max": 3760.4994537830353, - "Med": 46.19044613838196 - }, - "Speed": { - "Min": 11.573290568687376, - "Max": 119.73492704762106, - "Med": 65.63865569385999 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 1.9824861443561057, - "Max": 155.0390686538344, - "Med": 9.848718469798364 - }, - "Latency": { - "Min": 3.1880249977111816, - "Max": 173.02495551109314, - "Med": 15.922844052314758 - }, - "Speed": { - "Min": 14.397409577449055, - "Max": 115.6932642853065, - "Med": 67.21227985703797 - } - }, - "Translation": { - "Time to Answer": { - "Min": 3.732137828045693, - "Max": 3044.260366678238, - "Med": 21.35948411961459 - }, - "Latency": { - "Min": 5.585608243942261, - "Max": 3044.260366678238, - "Med": 28.910189151763916 - }, - "Speed": { - "Min": 15.982972114429412, - "Max": 120.67903153413303, - "Med": 65.51372577968641 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 4.791775583638476, - "Max": 3020.2297556369836, - "Med": 22.534852630507356 - }, - "Latency": { - "Min": 7.323899745941162, - "Max": 3128.943753004074, - "Med": 39.47115349769592 - }, - "Speed": { - "Min": 33.1493590537365, - "Max": 120.58512845346421, - "Med": 91.88151991429632 - } - } - }, - "o3": { - "NUM_GPUS": 0, - "Overall": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Editing": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Safety": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Translation": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Latency": { - "Min": 0, - "Max": 0, - "Med": 0.0 - }, - "Speed": { - "Min": 0.0, - "Max": 0.0, - "Med": 0.0 - } - } - }, - "Qwen3 30B A3B Instruct 2507": { - "NUM_GPUS": 4, - "Overall": { - "Time to Answer": { - "Min": 0.07036161422729492, - "Max": 31.39305281639099, - "Med": 7.902002811431885 - }, - "Latency": { - "Min": 1.7285857200622559, - "Max": 547.1464381217957, - "Med": 19.310550212860107 - }, - "Speed": { - "Min": 0.4588256271748607, - "Max": 1675.713403778012, - "Med": 42.44958664990833 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 0.07622957229614258, - "Max": 22.37809944152832, - "Med": 5.547954082489014 - }, - "Latency": { - "Min": 1.7285857200622559, - "Max": 54.02511215209961, - "Med": 17.99526071548462 - }, - "Speed": { - "Min": 1.7449605124170695, - "Max": 395.99575183878096, - "Med": 49.51125812595331 - } - }, - "Editing": { - "Time to Answer": { - "Min": 1.271263837814331, - "Max": 15.484830617904663, - "Med": 9.140550017356873 - }, - "Latency": { - "Min": 2.736412525177002, - "Max": 539.2392597198486, - "Med": 17.479472756385803 - }, - "Speed": { - "Min": 3.9319191699508407, - "Max": 449.3191069974718, - "Med": 34.76717528531384 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 0.07036161422729492, - "Max": 18.753334522247314, - "Med": 11.012907266616821 - }, - "Latency": { - "Min": 4.687523126602173, - "Max": 59.57149577140808, - "Med": 21.979735851287842 - }, - "Speed": { - "Min": 12.198856846482125, - "Max": 1675.713403778012, - "Med": 39.39105098293068 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 1.2672924995422363, - "Max": 12.75803518295288, - "Med": 7.658918380737305 - }, - "Latency": { - "Min": 6.021063327789307, - "Max": 492.8253102302551, - "Med": 22.575589418411255 - }, - "Speed": { - "Min": 0.4588256271748607, - "Max": 136.7240615474058, - "Med": 52.43376380347458 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 1.326181411743164, - "Max": 13.884344339370728, - "Med": 8.749433875083923 - }, - "Latency": { - "Min": 6.732978582382202, - "Max": 523.8203475475311, - "Med": 22.27110004425049 - }, - "Speed": { - "Min": 13.670234094386595, - "Max": 127.01821961775451, - "Med": 48.010004713875546 - } - }, - "Safety": { - "Time to Answer": { - "Min": 0.7221760749816895, - "Max": 17.26791501045227, - "Med": 6.132864952087402 - }, - "Latency": { - "Min": 3.5488877296447754, - "Max": 519.6611423492432, - "Med": 18.625815629959106 - }, - "Speed": { - "Min": 9.42385351468209, - "Max": 136.11952964832034, - "Med": 39.3369653562358 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 1.2615349292755127, - "Max": 11.427521467208862, - "Med": 7.025874733924866 - }, - "Latency": { - "Min": 5.206911563873291, - "Max": 547.1464381217957, - "Med": 19.493096470832825 - }, - "Speed": { - "Min": 15.945074454375074, - "Max": 136.59016289375637, - "Med": 40.6654638059174 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 1.2998223304748535, - "Max": 22.830255270004272, - "Med": 5.679913282394409 - }, - "Latency": { - "Min": 6.018580198287964, - "Max": 34.019956827163696, - "Med": 14.682021975517273 - }, - "Speed": { - "Min": 10.786864734830647, - "Max": 104.8881829014035, - "Med": 34.532935023744685 - } - }, - "Translation": { - "Time to Answer": { - "Min": 1.2215449810028076, - "Max": 20.17802906036377, - "Med": 6.621237397193909 - }, - "Latency": { - "Min": 3.1604950428009033, - "Max": 35.560505390167236, - "Med": 15.364720463752747 - }, - "Speed": { - "Min": 3.7037511544211505, - "Max": 122.69665672393019, - "Med": 37.14367271023433 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 1.6285674571990967, - "Max": 31.39305281639099, - "Med": 9.501431345939636 - }, - "Latency": { - "Min": 3.1299006938934326, - "Max": 461.3084125518799, - "Med": 44.99225735664368 - }, - "Speed": { - "Min": 8.036510493759394, - "Max": 395.4933655049321, - "Med": 59.17519237542838 - } - } - }, - "Kimi K2 Thinking": { - "NUM_GPUS": 16, - "Overall": { - "Time to Answer": { - "Min": 0.11169028282165527, - "Max": 2241.13407095096, - "Med": 45.350714952828156 - }, - "Latency": { - "Min": 4.439338684082031, - "Max": 2728.568812608719, - "Med": 70.24291145801544 - }, - "Speed": { - "Min": 20.822681353212012, - "Max": 30.23625816531444, - "Med": 24.28866627458008 - } - }, - "Content Generation": { - "Time to Answer": { - "Min": 3.9137235890264095, - "Max": 1225.9386449544365, - "Med": 35.3905248505725 - }, - "Latency": { - "Min": 4.439338684082031, - "Max": 1227.7691378593445, - "Med": 69.77224278450012 - }, - "Speed": { - "Min": 22.19674076837345, - "Max": 28.607637310888318, - "Med": 24.40099504786854 - } - }, - "Editing": { - "Time to Answer": { - "Min": 11.06907760156818, - "Max": 471.7268326631406, - "Med": 45.441414409516526 - }, - "Latency": { - "Min": 12.470248699188232, - "Max": 497.5029664039612, - "Med": 56.23897922039032 - }, - "Speed": { - "Min": 22.109020450839882, - "Max": 29.13554703600312, - "Med": 24.070683050531777 - } - }, - "Data Analysis": { - "Time to Answer": { - "Min": 0.11449933052062988, - "Max": 1191.5499346137067, - "Med": 32.336552678242 - }, - "Latency": { - "Min": 7.8422017097473145, - "Max": 2388.0731995105743, - "Med": 41.18542838096619 - }, - "Speed": { - "Min": 22.05901226621864, - "Max": 29.533580819697747, - "Med": 24.28078923247624 - } - }, - "Reasoning": { - "Time to Answer": { - "Min": 9.640599855836832, - "Max": 2241.13407095096, - "Med": 54.49989092334097 - }, - "Latency": { - "Min": 11.675945520401001, - "Max": 2282.450988292694, - "Med": 77.0264184474945 - }, - "Speed": { - "Min": 22.63968842524605, - "Max": 27.45209055990165, - "Med": 24.40082929837471 - } - }, - "Hallucination": { - "Time to Answer": { - "Min": 4.6661487510523845, - "Max": 115.98579355956018, - "Med": 24.99396284027753 - }, - "Latency": { - "Min": 7.999700307846069, - "Max": 201.69537544250488, - "Med": 47.261369943618774 - }, - "Speed": { - "Min": 22.616629795811217, - "Max": 28.067817143682454, - "Med": 24.21991523840923 - } - }, - "Safety": { - "Time to Answer": { - "Min": 3.560721757119162, - "Max": 94.15018529295921, - "Med": 23.53879180359535 - }, - "Latency": { - "Min": 7.6663689613342285, - "Max": 242.5039336681366, - "Med": 42.37087416648865 - }, - "Speed": { - "Min": 22.02350964399482, - "Max": 25.83755322621515, - "Med": 24.36817211144977 - } - }, - "Repetition": { - "Time to Answer": { - "Min": 0.11169028282165527, - "Max": 823.9142600095446, - "Med": 51.967095582457276 - }, - "Latency": { - "Min": 10.11755895614624, - "Max": 2728.568812608719, - "Med": 75.55940163135529 - }, - "Speed": { - "Min": 22.511808549089448, - "Max": 29.03623731013598, - "Med": 24.555569181348574 - } - }, - "Summarization": { - "Time to Answer": { - "Min": 8.394754877090454, - "Max": 501.0477158718318, - "Med": 32.087957848732046 - }, - "Latency": { - "Min": 12.335896253585815, - "Max": 554.7887036800385, - "Med": 41.29298424720764 - }, - "Speed": { - "Min": 22.064383554209275, - "Max": 26.475146333904462, - "Med": 23.97254683466398 - } - }, - "Translation": { - "Time to Answer": { - "Min": 12.529863516024639, - "Max": 529.1059714276646, - "Med": 96.06599344376238 - }, - "Latency": { - "Min": 17.787765979766846, - "Max": 558.00350522995, - "Med": 108.60968720912933 - }, - "Speed": { - "Min": 20.822681353212012, - "Max": 26.82045824411476, - "Med": 24.103166794327993 - } - }, - "Multi-Turn": { - "Time to Answer": { - "Min": 14.937343551654969, - "Max": 438.05960693330314, - "Med": 72.95374519316395 - }, - "Latency": { - "Min": 16.352965593338013, - "Max": 621.9444324970245, - "Med": 156.30420565605164 - }, - "Speed": { - "Min": 23.156346672284208, - "Max": 30.23625816531444, - "Med": 24.427890087314992 - } - } - } -} \ No newline at end of file