Spaces:
Running
Running
| { | |
| "Olmo 3 32B Think": { | |
| "Overall": { | |
| "Min": 210, | |
| "Max": 65454, | |
| "Med": 3360.5, | |
| "Med Resp": 473.0 | |
| }, | |
| "Content Generation": { | |
| "Min": 683, | |
| "Max": 65300, | |
| "Med": 3224.0, | |
| "Med Resp": 606.0 | |
| }, | |
| "Editing": { | |
| "Min": 580, | |
| "Max": 14539, | |
| "Med": 2859.0, | |
| "Med Resp": 419.5 | |
| }, | |
| "Data Analysis": { | |
| "Min": 369, | |
| "Max": 23205, | |
| "Med": 2624.0, | |
| "Med Resp": 332.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 779, | |
| "Max": 27491, | |
| "Med": 3911.5, | |
| "Med Resp": 467.5 | |
| }, | |
| "Hallucination": { | |
| "Min": 254, | |
| "Max": 10751, | |
| "Med": 2472.0, | |
| "Med Resp": 768.0 | |
| }, | |
| "Safety": { | |
| "Min": 210, | |
| "Max": 7162, | |
| "Med": 2025.0, | |
| "Med Resp": 646.0 | |
| }, | |
| "Repetition": { | |
| "Min": 1336, | |
| "Max": 65454, | |
| "Med": 5205.5, | |
| "Med Resp": 575.5 | |
| }, | |
| "Summarization": { | |
| "Min": 380, | |
| "Max": 17517, | |
| "Med": 2254.0, | |
| "Med Resp": 248.0 | |
| }, | |
| "Translation": { | |
| "Min": 846, | |
| "Max": 15667, | |
| "Med": 4546.5, | |
| "Med Resp": 349.5 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 1010, | |
| "Max": 24077, | |
| "Med": 6999.5, | |
| "Med Resp": 1764.5 | |
| } | |
| }, | |
| "Claude 4.1 Opus (20250805) (think)": { | |
| "Overall": { | |
| "Min": -10, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Content Generation": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Editing": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Reasoning": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Hallucination": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Safety": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Repetition": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Summarization": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Translation": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": -10, | |
| "Max": -4, | |
| "Med": -6.0, | |
| "Med Resp": -3.0 | |
| } | |
| }, | |
| "EXAONE 4.0 32B (think)": { | |
| "Overall": { | |
| "Min": 37, | |
| "Max": 142387, | |
| "Med": 1274.5, | |
| "Med Resp": 503.0 | |
| }, | |
| "Content Generation": { | |
| "Min": 160, | |
| "Max": 131068, | |
| "Med": 1178.5, | |
| "Med Resp": 559.0 | |
| }, | |
| "Editing": { | |
| "Min": 37, | |
| "Max": 10786, | |
| "Med": 1041.0, | |
| "Med Resp": 423.5 | |
| }, | |
| "Data Analysis": { | |
| "Min": 229, | |
| "Max": 131072, | |
| "Med": 1412.0, | |
| "Med Resp": 345.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 567, | |
| "Max": 131076, | |
| "Med": 3961.5, | |
| "Med Resp": 585.5 | |
| }, | |
| "Hallucination": { | |
| "Min": 298, | |
| "Max": 65533, | |
| "Med": 1247.5, | |
| "Med Resp": 627.5 | |
| }, | |
| "Safety": { | |
| "Min": 227, | |
| "Max": 5093, | |
| "Med": 1145.0, | |
| "Med Resp": 589.0 | |
| }, | |
| "Repetition": { | |
| "Min": 441, | |
| "Max": 131072, | |
| "Med": 1744.5, | |
| "Med Resp": 579.5 | |
| }, | |
| "Summarization": { | |
| "Min": 149, | |
| "Max": 8423, | |
| "Med": 693.5, | |
| "Med Resp": 311.0 | |
| }, | |
| "Translation": { | |
| "Min": 227, | |
| "Max": 14234, | |
| "Med": 915.0, | |
| "Med Resp": 411.5 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 390, | |
| "Max": 142387, | |
| "Med": 3222.0, | |
| "Med Resp": 1488.0 | |
| } | |
| }, | |
| "DeepSeek V3.1 (think)": { | |
| "Overall": { | |
| "Min": 80, | |
| "Max": 31147, | |
| "Med": 710.5, | |
| "Med Resp": 356.0 | |
| }, | |
| "Content Generation": { | |
| "Min": 132, | |
| "Max": 5354, | |
| "Med": 776.5, | |
| "Med Resp": 500.0 | |
| }, | |
| "Editing": { | |
| "Min": 119, | |
| "Max": 2063, | |
| "Med": 571.0, | |
| "Med Resp": 287.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": 119, | |
| "Max": 13106, | |
| "Med": 644.0, | |
| "Med Resp": 218.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 259, | |
| "Max": 31147, | |
| "Med": 1340.5, | |
| "Med Resp": 338.0 | |
| }, | |
| "Hallucination": { | |
| "Min": 206, | |
| "Max": 10356, | |
| "Med": 1132.5, | |
| "Med Resp": 667.0 | |
| }, | |
| "Safety": { | |
| "Min": 80, | |
| "Max": 3412, | |
| "Med": 565.0, | |
| "Med Resp": 206.0 | |
| }, | |
| "Repetition": { | |
| "Min": 290, | |
| "Max": 6553, | |
| "Med": 826.5, | |
| "Med Resp": 450.0 | |
| }, | |
| "Summarization": { | |
| "Min": 148, | |
| "Max": 1533, | |
| "Med": 432.0, | |
| "Med Resp": 211.5 | |
| }, | |
| "Translation": { | |
| "Min": 147, | |
| "Max": 7448, | |
| "Med": 554.5, | |
| "Med Resp": 320.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 324, | |
| "Max": 7862, | |
| "Med": 2558.5, | |
| "Med Resp": 1545.0 | |
| } | |
| }, | |
| "Qwen3 30B A3B Thinking 2507": { | |
| "Overall": { | |
| "Min": 305, | |
| "Max": 32743, | |
| "Med": 2830.0, | |
| "Med Resp": 351.0 | |
| }, | |
| "Content Generation": { | |
| "Min": 335, | |
| "Max": 10914, | |
| "Med": 2775.5, | |
| "Med Resp": 403.5 | |
| }, | |
| "Editing": { | |
| "Min": 371, | |
| "Max": 7617, | |
| "Med": 2358.5, | |
| "Med Resp": 220.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": 305, | |
| "Max": 19749, | |
| "Med": 1702.0, | |
| "Med Resp": 227.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 485, | |
| "Max": 19485, | |
| "Med": 2504.0, | |
| "Med Resp": 505.0 | |
| }, | |
| "Hallucination": { | |
| "Min": 360, | |
| "Max": 6054, | |
| "Med": 2123.5, | |
| "Med Resp": 668.0 | |
| }, | |
| "Safety": { | |
| "Min": 306, | |
| "Max": 32688, | |
| "Med": 1667.0, | |
| "Med Resp": 447.0 | |
| }, | |
| "Repetition": { | |
| "Min": 1070, | |
| "Max": 32743, | |
| "Med": 3719.0, | |
| "Med Resp": 368.5 | |
| }, | |
| "Summarization": { | |
| "Min": 435, | |
| "Max": 14462, | |
| "Med": 2108.0, | |
| "Med Resp": 204.0 | |
| }, | |
| "Translation": { | |
| "Min": 513, | |
| "Max": 11340, | |
| "Med": 3869.5, | |
| "Med Resp": 276.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 536, | |
| "Max": 14557, | |
| "Med": 5822.5, | |
| "Med Resp": 1237.0 | |
| } | |
| }, | |
| "o4-mini": { | |
| "Overall": { | |
| "Min": -10, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Content Generation": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Editing": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Reasoning": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Hallucination": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Safety": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Repetition": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Summarization": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Translation": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": -10, | |
| "Max": -4, | |
| "Med": -6.0, | |
| "Med Resp": -3.0 | |
| } | |
| }, | |
| "Gemini 2.5 Flash": { | |
| "Overall": { | |
| "Min": -10, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Content Generation": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Editing": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Reasoning": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Hallucination": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Safety": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Repetition": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Summarization": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Translation": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": -10, | |
| "Max": -4, | |
| "Med": -6.0, | |
| "Med Resp": -3.0 | |
| } | |
| }, | |
| "Claude 4 Sonnet (20250514) (think)": { | |
| "Overall": { | |
| "Min": -10, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Content Generation": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Editing": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Reasoning": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Hallucination": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Safety": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Repetition": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Summarization": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Translation": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": -10, | |
| "Max": -4, | |
| "Med": -6.0, | |
| "Med Resp": -3.0 | |
| } | |
| }, | |
| "Gemini 3 Pro Preview (Thinking Level: High)": { | |
| "Overall": { | |
| "Min": 0, | |
| "Max": 18460, | |
| "Med": 1930.5, | |
| "Med Resp": 378.0 | |
| }, | |
| "Content Generation": { | |
| "Min": 0, | |
| "Max": 12404, | |
| "Med": 1967.5, | |
| "Med Resp": 570.5 | |
| }, | |
| "Editing": { | |
| "Min": 433, | |
| "Max": 7372, | |
| "Med": 1684.0, | |
| "Med Resp": 322.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": 386, | |
| "Max": 10791, | |
| "Med": 1357.0, | |
| "Med Resp": 172.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 0, | |
| "Max": 18460, | |
| "Med": 1821.0, | |
| "Med Resp": 502.5 | |
| }, | |
| "Hallucination": { | |
| "Min": 524, | |
| "Max": 6228, | |
| "Med": 1833.0, | |
| "Med Resp": 468.0 | |
| }, | |
| "Safety": { | |
| "Min": 832, | |
| "Max": 6324, | |
| "Med": 1802.0, | |
| "Med Resp": 291.0 | |
| }, | |
| "Repetition": { | |
| "Min": 516, | |
| "Max": 5086, | |
| "Med": 1910.0, | |
| "Med Resp": 314.0 | |
| }, | |
| "Summarization": { | |
| "Min": 663, | |
| "Max": 3857, | |
| "Med": 1295.5, | |
| "Med Resp": 184.5 | |
| }, | |
| "Translation": { | |
| "Min": 964, | |
| "Max": 9535, | |
| "Med": 2286.5, | |
| "Med Resp": 357.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 608, | |
| "Max": 10590, | |
| "Med": 4601.5, | |
| "Med Resp": 1377.0 | |
| } | |
| }, | |
| "Solar Pro Preview (top_p:0.95, temp: 0.7)": { | |
| "Overall": { | |
| "Min": 1, | |
| "Max": 4060, | |
| "Med": 260.0, | |
| "Med Resp": 260.0 | |
| }, | |
| "Content Generation": { | |
| "Min": 15, | |
| "Max": 3643, | |
| "Med": 426.0, | |
| "Med Resp": 426.0 | |
| }, | |
| "Editing": { | |
| "Min": 14, | |
| "Max": 3948, | |
| "Med": 218.0, | |
| "Med Resp": 218.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": 2, | |
| "Max": 3500, | |
| "Med": 89.0, | |
| "Med Resp": 89.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 1, | |
| "Max": 3338, | |
| "Med": 190.5, | |
| "Med Resp": 190.5 | |
| }, | |
| "Hallucination": { | |
| "Min": 20, | |
| "Max": 1093, | |
| "Med": 128.5, | |
| "Med Resp": 128.5 | |
| }, | |
| "Safety": { | |
| "Min": 11, | |
| "Max": 1507, | |
| "Med": 92.0, | |
| "Med Resp": 92.0 | |
| }, | |
| "Repetition": { | |
| "Min": 34, | |
| "Max": 4060, | |
| "Med": 214.0, | |
| "Med Resp": 214.0 | |
| }, | |
| "Summarization": { | |
| "Min": 43, | |
| "Max": 2478, | |
| "Med": 218.0, | |
| "Med Resp": 218.0 | |
| }, | |
| "Translation": { | |
| "Min": 20, | |
| "Max": 1711, | |
| "Med": 360.0, | |
| "Med Resp": 360.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 5, | |
| "Max": 3353, | |
| "Med": 530.0, | |
| "Med Resp": 530.0 | |
| } | |
| }, | |
| "DeepSeek R1 (0528) (top_p: 0.95, temp:0.6)": { | |
| "Overall": { | |
| "Min": 4, | |
| "Max": 16917, | |
| "Med": 1177.5, | |
| "Med Resp": 554.0 | |
| }, | |
| "Content Generation": { | |
| "Min": 389, | |
| "Max": 7861, | |
| "Med": 1261.5, | |
| "Med Resp": 694.0 | |
| }, | |
| "Editing": { | |
| "Min": 4, | |
| "Max": 7611, | |
| "Med": 1054.5, | |
| "Med Resp": 517.5 | |
| }, | |
| "Data Analysis": { | |
| "Min": 4, | |
| "Max": 8191, | |
| "Med": 1112.0, | |
| "Med Resp": 355.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 4, | |
| "Max": 12257, | |
| "Med": 1913.0, | |
| "Med Resp": 455.5 | |
| }, | |
| "Hallucination": { | |
| "Min": 4, | |
| "Max": 7390, | |
| "Med": 1214.5, | |
| "Med Resp": 682.0 | |
| }, | |
| "Safety": { | |
| "Min": 227, | |
| "Max": 6387, | |
| "Med": 963.0, | |
| "Med Resp": 568.0 | |
| }, | |
| "Repetition": { | |
| "Min": 4, | |
| "Max": 7787, | |
| "Med": 1405.5, | |
| "Med Resp": 646.5 | |
| }, | |
| "Summarization": { | |
| "Min": 319, | |
| "Max": 2613, | |
| "Med": 711.5, | |
| "Med Resp": 321.0 | |
| }, | |
| "Translation": { | |
| "Min": 4, | |
| "Max": 7687, | |
| "Med": 1021.0, | |
| "Med Resp": 561.5 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 448, | |
| "Max": 16917, | |
| "Med": 3418.5, | |
| "Med Resp": 1874.0 | |
| } | |
| }, | |
| "A.X 4.0": { | |
| "Overall": { | |
| "Min": 1, | |
| "Max": 65581, | |
| "Med": 412.5, | |
| "Med Resp": 412.5 | |
| }, | |
| "Content Generation": { | |
| "Min": 2, | |
| "Max": 65581, | |
| "Med": 543.0, | |
| "Med Resp": 543.0 | |
| }, | |
| "Editing": { | |
| "Min": 8, | |
| "Max": 1791, | |
| "Med": 250.0, | |
| "Med Resp": 250.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": 1, | |
| "Max": 65537, | |
| "Med": 267.0, | |
| "Med Resp": 267.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 2, | |
| "Max": 2046, | |
| "Med": 498.0, | |
| "Med Resp": 498.0 | |
| }, | |
| "Hallucination": { | |
| "Min": 12, | |
| "Max": 2639, | |
| "Med": 511.5, | |
| "Med Resp": 511.5 | |
| }, | |
| "Safety": { | |
| "Min": 4, | |
| "Max": 2942, | |
| "Med": 516.0, | |
| "Med Resp": 516.0 | |
| }, | |
| "Repetition": { | |
| "Min": 84, | |
| "Max": 65536, | |
| "Med": 341.5, | |
| "Med Resp": 341.5 | |
| }, | |
| "Summarization": { | |
| "Min": 26, | |
| "Max": 2369, | |
| "Med": 282.0, | |
| "Med Resp": 282.0 | |
| }, | |
| "Translation": { | |
| "Min": 7, | |
| "Max": 35068, | |
| "Med": 343.0, | |
| "Med Resp": 343.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 3, | |
| "Max": 9420, | |
| "Med": 1455.0, | |
| "Med Resp": 1455.0 | |
| } | |
| }, | |
| "DeepSeek V3.1 Terminus (think)": { | |
| "Overall": { | |
| "Min": 123, | |
| "Max": 36717, | |
| "Med": 831.5, | |
| "Med Resp": 377.0 | |
| }, | |
| "Content Generation": { | |
| "Min": 152, | |
| "Max": 11674, | |
| "Med": 869.0, | |
| "Med Resp": 540.5 | |
| }, | |
| "Editing": { | |
| "Min": 153, | |
| "Max": 13465, | |
| "Med": 697.0, | |
| "Med Resp": 323.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": 163, | |
| "Max": 18833, | |
| "Med": 731.0, | |
| "Med Resp": 208.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 273, | |
| "Max": 36717, | |
| "Med": 1471.0, | |
| "Med Resp": 337.5 | |
| }, | |
| "Hallucination": { | |
| "Min": 183, | |
| "Max": 8837, | |
| "Med": 1170.0, | |
| "Med Resp": 743.0 | |
| }, | |
| "Safety": { | |
| "Min": 167, | |
| "Max": 2442, | |
| "Med": 721.0, | |
| "Med Resp": 294.0 | |
| }, | |
| "Repetition": { | |
| "Min": 308, | |
| "Max": 9694, | |
| "Med": 997.0, | |
| "Med Resp": 508.0 | |
| }, | |
| "Summarization": { | |
| "Min": 157, | |
| "Max": 1562, | |
| "Med": 460.0, | |
| "Med Resp": 215.0 | |
| }, | |
| "Translation": { | |
| "Min": 123, | |
| "Max": 9706, | |
| "Med": 617.5, | |
| "Med Resp": 347.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 297, | |
| "Max": 16318, | |
| "Med": 2647.5, | |
| "Med Resp": 1623.0 | |
| } | |
| }, | |
| "Dhanishtha-2.0 Preview": { | |
| "Overall": { | |
| "Min": 93, | |
| "Max": 6076, | |
| "Med": 520.0, | |
| "Med Resp": 356.0 | |
| }, | |
| "Content Generation": { | |
| "Min": 102, | |
| "Max": 3978, | |
| "Med": 589.0, | |
| "Med Resp": 439.0 | |
| }, | |
| "Editing": { | |
| "Min": 116, | |
| "Max": 1716, | |
| "Med": 437.5, | |
| "Med Resp": 301.5 | |
| }, | |
| "Data Analysis": { | |
| "Min": 116, | |
| "Max": 4070, | |
| "Med": 470.0, | |
| "Med Resp": 288.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 182, | |
| "Max": 2451, | |
| "Med": 625.0, | |
| "Med Resp": 366.0 | |
| }, | |
| "Hallucination": { | |
| "Min": 160, | |
| "Max": 4068, | |
| "Med": 494.0, | |
| "Med Resp": 318.5 | |
| }, | |
| "Safety": { | |
| "Min": 121, | |
| "Max": 1470, | |
| "Med": 381.0, | |
| "Med Resp": 236.0 | |
| }, | |
| "Repetition": { | |
| "Min": 240, | |
| "Max": 3982, | |
| "Med": 576.5, | |
| "Med Resp": 384.5 | |
| }, | |
| "Summarization": { | |
| "Min": 93, | |
| "Max": 2578, | |
| "Med": 385.0, | |
| "Med Resp": 289.0 | |
| }, | |
| "Translation": { | |
| "Min": 107, | |
| "Max": 3331, | |
| "Med": 426.0, | |
| "Med Resp": 331.5 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 362, | |
| "Max": 6076, | |
| "Med": 1462.0, | |
| "Med Resp": 1095.0 | |
| } | |
| }, | |
| "GPT-5 (Reasoning: medium)": { | |
| "Overall": { | |
| "Min": -10, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Content Generation": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Editing": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Reasoning": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Hallucination": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Safety": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Repetition": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Summarization": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Translation": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": -10, | |
| "Max": -4, | |
| "Med": -6.0, | |
| "Med Resp": -3.0 | |
| } | |
| }, | |
| "Kanana 1.5 15.7B A3B Instruct": { | |
| "Overall": { | |
| "Min": 1, | |
| "Max": 34276, | |
| "Med": 414.0, | |
| "Med Resp": 414.0 | |
| }, | |
| "Content Generation": { | |
| "Min": 10, | |
| "Max": 22194, | |
| "Med": 463.5, | |
| "Med Resp": 463.5 | |
| }, | |
| "Editing": { | |
| "Min": 5, | |
| "Max": 1311, | |
| "Med": 249.5, | |
| "Med Resp": 249.5 | |
| }, | |
| "Data Analysis": { | |
| "Min": 1, | |
| "Max": 22211, | |
| "Med": 396.0, | |
| "Med Resp": 396.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 1, | |
| "Max": 20275, | |
| "Med": 581.0, | |
| "Med Resp": 581.0 | |
| }, | |
| "Hallucination": { | |
| "Min": 24, | |
| "Max": 21645, | |
| "Med": 441.5, | |
| "Med Resp": 441.5 | |
| }, | |
| "Safety": { | |
| "Min": 18, | |
| "Max": 1531, | |
| "Med": 414.0, | |
| "Med Resp": 414.0 | |
| }, | |
| "Repetition": { | |
| "Min": 76, | |
| "Max": 1912, | |
| "Med": 299.5, | |
| "Med Resp": 299.5 | |
| }, | |
| "Summarization": { | |
| "Min": 1, | |
| "Max": 29578, | |
| "Med": 275.5, | |
| "Med Resp": 275.5 | |
| }, | |
| "Translation": { | |
| "Min": 9, | |
| "Max": 31839, | |
| "Med": 308.5, | |
| "Med Resp": 308.5 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 3, | |
| "Max": 34276, | |
| "Med": 1167.5, | |
| "Med Resp": 1167.5 | |
| } | |
| }, | |
| "DeepSeek V3 (0324) (top_p: 0.95, temp:1.3)": { | |
| "Overall": { | |
| "Min": 1, | |
| "Max": 5178, | |
| "Med": 408.0, | |
| "Med Resp": 408.0 | |
| }, | |
| "Content Generation": { | |
| "Min": 7, | |
| "Max": 1974, | |
| "Med": 439.5, | |
| "Med Resp": 439.5 | |
| }, | |
| "Editing": { | |
| "Min": 5, | |
| "Max": 1192, | |
| "Med": 293.0, | |
| "Med Resp": 293.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": 1, | |
| "Max": 3155, | |
| "Med": 330.0, | |
| "Med Resp": 330.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 63, | |
| "Max": 5178, | |
| "Med": 519.0, | |
| "Med Resp": 519.0 | |
| }, | |
| "Hallucination": { | |
| "Min": 57, | |
| "Max": 1621, | |
| "Med": 502.5, | |
| "Med Resp": 502.5 | |
| }, | |
| "Safety": { | |
| "Min": 12, | |
| "Max": 1726, | |
| "Med": 337.0, | |
| "Med Resp": 337.0 | |
| }, | |
| "Repetition": { | |
| "Min": 98, | |
| "Max": 2754, | |
| "Med": 406.5, | |
| "Med Resp": 406.5 | |
| }, | |
| "Summarization": { | |
| "Min": 32, | |
| "Max": 959, | |
| "Med": 251.0, | |
| "Med Resp": 251.0 | |
| }, | |
| "Translation": { | |
| "Min": 60, | |
| "Max": 2197, | |
| "Med": 351.5, | |
| "Med Resp": 351.5 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 4, | |
| "Max": 4959, | |
| "Med": 1318.5, | |
| "Med Resp": 1318.5 | |
| } | |
| }, | |
| "GLM-4.6 FP8": { | |
| "Overall": { | |
| "Min": 126, | |
| "Max": 23404, | |
| "Med": 2645.5, | |
| "Med Resp": 522.0 | |
| }, | |
| "Content Generation": { | |
| "Min": 669, | |
| "Max": 8438, | |
| "Med": 2561.5, | |
| "Med Resp": 687.5 | |
| }, | |
| "Editing": { | |
| "Min": 976, | |
| "Max": 4842, | |
| "Med": 2395.5, | |
| "Med Resp": 445.5 | |
| }, | |
| "Data Analysis": { | |
| "Min": 487, | |
| "Max": 9089, | |
| "Med": 1743.0, | |
| "Med Resp": 213.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 515, | |
| "Max": 23404, | |
| "Med": 2596.0, | |
| "Med Resp": 697.0 | |
| }, | |
| "Hallucination": { | |
| "Min": 838, | |
| "Max": 10287, | |
| "Med": 2426.0, | |
| "Med Resp": 838.5 | |
| }, | |
| "Safety": { | |
| "Min": 251, | |
| "Max": 7182, | |
| "Med": 2375.0, | |
| "Med Resp": 621.0 | |
| }, | |
| "Repetition": { | |
| "Min": 1115, | |
| "Max": 9952, | |
| "Med": 2572.5, | |
| "Med Resp": 544.5 | |
| }, | |
| "Summarization": { | |
| "Min": 956, | |
| "Max": 6571, | |
| "Med": 1978.5, | |
| "Med Resp": 206.0 | |
| }, | |
| "Translation": { | |
| "Min": 126, | |
| "Max": 10588, | |
| "Med": 3456.0, | |
| "Med Resp": 421.5 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 1512, | |
| "Max": 13662, | |
| "Med": 6810.5, | |
| "Med Resp": 2282.5 | |
| } | |
| }, | |
| "DeepSeek V3.2 Speciale": { | |
| "Overall": { | |
| "Min": 160, | |
| "Max": 65513, | |
| "Med": 3226.5, | |
| "Med Resp": 249.5 | |
| }, | |
| "Content Generation": { | |
| "Min": 186, | |
| "Max": 46347, | |
| "Med": 3634.0, | |
| "Med Resp": 364.0 | |
| }, | |
| "Editing": { | |
| "Min": 329, | |
| "Max": 24883, | |
| "Med": 3043.0, | |
| "Med Resp": 178.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": 191, | |
| "Max": 64268, | |
| "Med": 1640.0, | |
| "Med Resp": 67.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 228, | |
| "Max": 65472, | |
| "Med": 2211.5, | |
| "Med Resp": 165.0 | |
| }, | |
| "Hallucination": { | |
| "Min": 373, | |
| "Max": 23653, | |
| "Med": 3253.5, | |
| "Med Resp": 258.0 | |
| }, | |
| "Safety": { | |
| "Min": 331, | |
| "Max": 39236, | |
| "Med": 2575.0, | |
| "Med Resp": 158.0 | |
| }, | |
| "Repetition": { | |
| "Min": 356, | |
| "Max": 65513, | |
| "Med": 3357.0, | |
| "Med Resp": 246.0 | |
| }, | |
| "Summarization": { | |
| "Min": 160, | |
| "Max": 56309, | |
| "Med": 1500.0, | |
| "Med Resp": 189.5 | |
| }, | |
| "Translation": { | |
| "Min": 522, | |
| "Max": 25619, | |
| "Med": 5143.5, | |
| "Med Resp": 281.5 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 244, | |
| "Max": 32258, | |
| "Med": 4282.0, | |
| "Med Resp": 854.0 | |
| } | |
| }, | |
| "Claude 4.5 Opus (think, budget: 16K)": { | |
| "Overall": { | |
| "Min": -10, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Content Generation": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Editing": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Reasoning": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Hallucination": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Safety": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Repetition": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Summarization": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Translation": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": -10, | |
| "Max": -4, | |
| "Med": -6.0, | |
| "Med Resp": -3.0 | |
| } | |
| }, | |
| "GLM-4.5 FP8": { | |
| "Overall": { | |
| "Min": 75, | |
| "Max": 65432, | |
| "Med": 1442.0, | |
| "Med Resp": 604.0 | |
| }, | |
| "Content Generation": { | |
| "Min": 322, | |
| "Max": 9320, | |
| "Med": 1283.0, | |
| "Med Resp": 655.5 | |
| }, | |
| "Editing": { | |
| "Min": 232, | |
| "Max": 10227, | |
| "Med": 1163.5, | |
| "Med Resp": 571.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": 318, | |
| "Max": 15748, | |
| "Med": 1328.0, | |
| "Med Resp": 481.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 558, | |
| "Max": 65432, | |
| "Med": 3187.5, | |
| "Med Resp": 653.0 | |
| }, | |
| "Hallucination": { | |
| "Min": 75, | |
| "Max": 10541, | |
| "Med": 1546.5, | |
| "Med Resp": 962.5 | |
| }, | |
| "Safety": { | |
| "Min": 159, | |
| "Max": 5552, | |
| "Med": 1418.0, | |
| "Med Resp": 808.0 | |
| }, | |
| "Repetition": { | |
| "Min": 284, | |
| "Max": 65409, | |
| "Med": 1492.0, | |
| "Med Resp": 729.5 | |
| }, | |
| "Summarization": { | |
| "Min": 242, | |
| "Max": 3610, | |
| "Med": 688.5, | |
| "Med Resp": 268.0 | |
| }, | |
| "Translation": { | |
| "Min": 156, | |
| "Max": 10043, | |
| "Med": 1448.5, | |
| "Med Resp": 414.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 630, | |
| "Max": 15831, | |
| "Med": 3977.5, | |
| "Med Resp": 2277.5 | |
| } | |
| }, | |
| "Gauss2.3 Hybrid": { | |
| "Overall": { | |
| "Min": 7, | |
| "Max": 134423, | |
| "Med": 546.0, | |
| "Med Resp": 308.0 | |
| }, | |
| "Content Generation": { | |
| "Min": 16, | |
| "Max": 6706, | |
| "Med": 470.0, | |
| "Med Resp": 416.5 | |
| }, | |
| "Editing": { | |
| "Min": 9, | |
| "Max": 2943, | |
| "Med": 219.0, | |
| "Med Resp": 188.5 | |
| }, | |
| "Data Analysis": { | |
| "Min": 23, | |
| "Max": 131072, | |
| "Med": 585.0, | |
| "Med Resp": 192.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 329, | |
| "Max": 131072, | |
| "Med": 2091.0, | |
| "Med Resp": 387.0 | |
| }, | |
| "Hallucination": { | |
| "Min": 20, | |
| "Max": 131072, | |
| "Med": 972.5, | |
| "Med Resp": 387.0 | |
| }, | |
| "Safety": { | |
| "Min": 20, | |
| "Max": 131072, | |
| "Med": 603.0, | |
| "Med Resp": 270.0 | |
| }, | |
| "Repetition": { | |
| "Min": 60, | |
| "Max": 131085, | |
| "Med": 869.5, | |
| "Med Resp": 392.0 | |
| }, | |
| "Summarization": { | |
| "Min": 26, | |
| "Max": 2114, | |
| "Med": 320.0, | |
| "Med Resp": 208.0 | |
| }, | |
| "Translation": { | |
| "Min": 7, | |
| "Max": 71270, | |
| "Med": 322.0, | |
| "Med Resp": 273.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 7, | |
| "Max": 134423, | |
| "Med": 2478.5, | |
| "Med Resp": 1208.5 | |
| } | |
| }, | |
| "DeepSeek V3.2": { | |
| "Overall": { | |
| "Min": 134, | |
| "Max": 22816, | |
| "Med": 762.5, | |
| "Med Resp": 312.0 | |
| }, | |
| "Content Generation": { | |
| "Min": 153, | |
| "Max": 5977, | |
| "Med": 845.0, | |
| "Med Resp": 462.0 | |
| }, | |
| "Editing": { | |
| "Min": 141, | |
| "Max": 6055, | |
| "Med": 587.5, | |
| "Med Resp": 245.5 | |
| }, | |
| "Data Analysis": { | |
| "Min": 157, | |
| "Max": 13414, | |
| "Med": 695.0, | |
| "Med Resp": 166.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 272, | |
| "Max": 22816, | |
| "Med": 1440.5, | |
| "Med Resp": 245.0 | |
| }, | |
| "Hallucination": { | |
| "Min": 213, | |
| "Max": 9501, | |
| "Med": 938.5, | |
| "Med Resp": 532.5 | |
| }, | |
| "Safety": { | |
| "Min": 184, | |
| "Max": 5304, | |
| "Med": 617.0, | |
| "Med Resp": 238.0 | |
| }, | |
| "Repetition": { | |
| "Min": 216, | |
| "Max": 7227, | |
| "Med": 919.5, | |
| "Med Resp": 399.0 | |
| }, | |
| "Summarization": { | |
| "Min": 134, | |
| "Max": 1750, | |
| "Med": 471.0, | |
| "Med Resp": 197.5 | |
| }, | |
| "Translation": { | |
| "Min": 154, | |
| "Max": 6364, | |
| "Med": 565.0, | |
| "Med Resp": 301.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 401, | |
| "Max": 14066, | |
| "Med": 2538.5, | |
| "Med Resp": 1261.0 | |
| } | |
| }, | |
| "MiniMax-M2 (230B A10B)": { | |
| "Overall": { | |
| "Min": 64, | |
| "Max": 28729, | |
| "Med": 1142.0, | |
| "Med Resp": 325.0 | |
| }, | |
| "Content Generation": { | |
| "Min": 116, | |
| "Max": 16249, | |
| "Med": 1235.5, | |
| "Med Resp": 501.5 | |
| }, | |
| "Editing": { | |
| "Min": 111, | |
| "Max": 11557, | |
| "Med": 858.0, | |
| "Med Resp": 201.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": 76, | |
| "Max": 18529, | |
| "Med": 834.0, | |
| "Med Resp": 170.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 118, | |
| "Max": 18596, | |
| "Med": 1674.0, | |
| "Med Resp": 418.5 | |
| }, | |
| "Hallucination": { | |
| "Min": 92, | |
| "Max": 8617, | |
| "Med": 1130.0, | |
| "Med Resp": 436.0 | |
| }, | |
| "Safety": { | |
| "Min": 64, | |
| "Max": 5803, | |
| "Med": 563.0, | |
| "Med Resp": 176.0 | |
| }, | |
| "Repetition": { | |
| "Min": 175, | |
| "Max": 14147, | |
| "Med": 1054.5, | |
| "Med Resp": 259.0 | |
| }, | |
| "Summarization": { | |
| "Min": 135, | |
| "Max": 15849, | |
| "Med": 716.0, | |
| "Med Resp": 197.5 | |
| }, | |
| "Translation": { | |
| "Min": 216, | |
| "Max": 22260, | |
| "Med": 1133.0, | |
| "Med Resp": 297.5 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 303, | |
| "Max": 28729, | |
| "Med": 3732.0, | |
| "Med Resp": 1424.0 | |
| } | |
| }, | |
| "gpt-oss-120B (Reasoning: medium)": { | |
| "Overall": { | |
| "Min": 43, | |
| "Max": 18693, | |
| "Med": 759.5, | |
| "Med Resp": 370.5 | |
| }, | |
| "Content Generation": { | |
| "Min": 126, | |
| "Max": 6264, | |
| "Med": 897.0, | |
| "Med Resp": 613.5 | |
| }, | |
| "Editing": { | |
| "Min": 61, | |
| "Max": 4605, | |
| "Med": 475.5, | |
| "Med Resp": 248.5 | |
| }, | |
| "Data Analysis": { | |
| "Min": 49, | |
| "Max": 6975, | |
| "Med": 596.0, | |
| "Med Resp": 213.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 147, | |
| "Max": 10387, | |
| "Med": 1170.5, | |
| "Med Resp": 635.0 | |
| }, | |
| "Hallucination": { | |
| "Min": 88, | |
| "Max": 5277, | |
| "Med": 1317.0, | |
| "Med Resp": 1106.5 | |
| }, | |
| "Safety": { | |
| "Min": 43, | |
| "Max": 3651, | |
| "Med": 199.0, | |
| "Med Resp": 12.0 | |
| }, | |
| "Repetition": { | |
| "Min": 122, | |
| "Max": 6986, | |
| "Med": 940.0, | |
| "Med Resp": 407.0 | |
| }, | |
| "Summarization": { | |
| "Min": 83, | |
| "Max": 15231, | |
| "Med": 378.0, | |
| "Med Resp": 246.0 | |
| }, | |
| "Translation": { | |
| "Min": 107, | |
| "Max": 3659, | |
| "Med": 737.0, | |
| "Med Resp": 299.5 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 135, | |
| "Max": 18693, | |
| "Med": 2826.0, | |
| "Med Resp": 2150.0 | |
| } | |
| }, | |
| "K2-Think": { | |
| "Overall": { | |
| "Min": 27, | |
| "Max": 8178, | |
| "Med": 1835.0, | |
| "Med Resp": 486.0 | |
| }, | |
| "Content Generation": { | |
| "Min": 138, | |
| "Max": 2049, | |
| "Med": 1821.5, | |
| "Med Resp": 660.5 | |
| }, | |
| "Editing": { | |
| "Min": 169, | |
| "Max": 2054, | |
| "Med": 1433.5, | |
| "Med Resp": 283.5 | |
| }, | |
| "Data Analysis": { | |
| "Min": 150, | |
| "Max": 2053, | |
| "Med": 1349.0, | |
| "Med Resp": 264.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 419, | |
| "Max": 2048, | |
| "Med": 2045.5, | |
| "Med Resp": 576.5 | |
| }, | |
| "Hallucination": { | |
| "Min": 174, | |
| "Max": 2054, | |
| "Med": 1890.0, | |
| "Med Resp": 522.5 | |
| }, | |
| "Safety": { | |
| "Min": 27, | |
| "Max": 2048, | |
| "Med": 1393.0, | |
| "Med Resp": 405.0 | |
| }, | |
| "Repetition": { | |
| "Min": 870, | |
| "Max": 2070, | |
| "Med": 2048.0, | |
| "Med Resp": 2048.0 | |
| }, | |
| "Summarization": { | |
| "Min": 252, | |
| "Max": 2053, | |
| "Med": 1011.0, | |
| "Med Resp": 262.5 | |
| }, | |
| "Translation": { | |
| "Min": 195, | |
| "Max": 2051, | |
| "Med": 2006.0, | |
| "Med Resp": 371.5 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 110, | |
| "Max": 8178, | |
| "Med": 3224.0, | |
| "Med Resp": 1526.0 | |
| } | |
| }, | |
| "MiMo V2 Flash": { | |
| "Overall": { | |
| "Min": 125, | |
| "Max": 69375, | |
| "Med": 1477.5, | |
| "Med Resp": 373.0 | |
| }, | |
| "Content Generation": { | |
| "Min": 222, | |
| "Max": 65445, | |
| "Med": 1321.5, | |
| "Med Resp": 500.5 | |
| }, | |
| "Editing": { | |
| "Min": 265, | |
| "Max": 65423, | |
| "Med": 1194.0, | |
| "Med Resp": 314.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": 262, | |
| "Max": 65439, | |
| "Med": 1296.0, | |
| "Med Resp": 235.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 319, | |
| "Max": 65430, | |
| "Med": 2559.5, | |
| "Med Resp": 402.5 | |
| }, | |
| "Hallucination": { | |
| "Min": 129, | |
| "Max": 65447, | |
| "Med": 1179.5, | |
| "Med Resp": 499.0 | |
| }, | |
| "Safety": { | |
| "Min": 133, | |
| "Max": 5184, | |
| "Med": 717.0, | |
| "Med Resp": 294.0 | |
| }, | |
| "Repetition": { | |
| "Min": 295, | |
| "Max": 65472, | |
| "Med": 2153.5, | |
| "Med Resp": 573.5 | |
| }, | |
| "Summarization": { | |
| "Min": 188, | |
| "Max": 64302, | |
| "Med": 789.5, | |
| "Med Resp": 220.5 | |
| }, | |
| "Translation": { | |
| "Min": 125, | |
| "Max": 65041, | |
| "Med": 1738.5, | |
| "Med Resp": 339.5 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 323, | |
| "Max": 69375, | |
| "Med": 3331.5, | |
| "Med Resp": 1361.0 | |
| } | |
| }, | |
| "Qwen3 32B (think)": { | |
| "Overall": { | |
| "Min": 164, | |
| "Max": 34272, | |
| "Med": 1113.0, | |
| "Med Resp": 390.0 | |
| }, | |
| "Content Generation": { | |
| "Min": 164, | |
| "Max": 32768, | |
| "Med": 1027.5, | |
| "Med Resp": 476.0 | |
| }, | |
| "Editing": { | |
| "Min": 285, | |
| "Max": 3646, | |
| "Med": 843.0, | |
| "Med Resp": 283.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": 210, | |
| "Max": 18774, | |
| "Med": 968.0, | |
| "Med Resp": 278.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 477, | |
| "Max": 18676, | |
| "Med": 1759.0, | |
| "Med Resp": 459.0 | |
| }, | |
| "Hallucination": { | |
| "Min": 170, | |
| "Max": 3776, | |
| "Med": 1617.0, | |
| "Med Resp": 646.0 | |
| }, | |
| "Safety": { | |
| "Min": 169, | |
| "Max": 4053, | |
| "Med": 940.0, | |
| "Med Resp": 429.0 | |
| }, | |
| "Repetition": { | |
| "Min": 608, | |
| "Max": 32768, | |
| "Med": 2316.5, | |
| "Med Resp": 537.5 | |
| }, | |
| "Summarization": { | |
| "Min": 192, | |
| "Max": 2255, | |
| "Med": 586.0, | |
| "Med Resp": 236.5 | |
| }, | |
| "Translation": { | |
| "Min": 374, | |
| "Max": 10683, | |
| "Med": 1113.5, | |
| "Med Resp": 307.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 493, | |
| "Max": 34272, | |
| "Med": 3210.0, | |
| "Med Resp": 1481.0 | |
| } | |
| }, | |
| "ERNIE 4.5 21B A3B Thinking": { | |
| "Overall": { | |
| "Min": 186, | |
| "Max": 66114, | |
| "Med": 1637.0, | |
| "Med Resp": 541.0 | |
| }, | |
| "Content Generation": { | |
| "Min": 302, | |
| "Max": 12760, | |
| "Med": 1586.5, | |
| "Med Resp": 654.5 | |
| }, | |
| "Editing": { | |
| "Min": 186, | |
| "Max": 8703, | |
| "Med": 1119.5, | |
| "Med Resp": 336.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": 200, | |
| "Max": 31928, | |
| "Med": 1484.0, | |
| "Med Resp": 418.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 511, | |
| "Max": 29184, | |
| "Med": 5312.0, | |
| "Med Resp": 669.5 | |
| }, | |
| "Hallucination": { | |
| "Min": 313, | |
| "Max": 11452, | |
| "Med": 1716.0, | |
| "Med Resp": 797.5 | |
| }, | |
| "Safety": { | |
| "Min": 213, | |
| "Max": 6914, | |
| "Med": 1242.0, | |
| "Med Resp": 599.0 | |
| }, | |
| "Repetition": { | |
| "Min": 643, | |
| "Max": 65463, | |
| "Med": 2387.0, | |
| "Med Resp": 516.5 | |
| }, | |
| "Summarization": { | |
| "Min": 215, | |
| "Max": 12449, | |
| "Med": 884.0, | |
| "Med Resp": 269.5 | |
| }, | |
| "Translation": { | |
| "Min": 298, | |
| "Max": 19672, | |
| "Med": 1466.5, | |
| "Med Resp": 421.5 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 705, | |
| "Max": 66114, | |
| "Med": 4404.5, | |
| "Med Resp": 1819.0 | |
| } | |
| }, | |
| "Qwen3 235B A22B Instruct 2507": { | |
| "Overall": { | |
| "Min": 1, | |
| "Max": 65405, | |
| "Med": 433.0, | |
| "Med Resp": 433.0 | |
| }, | |
| "Content Generation": { | |
| "Min": 7, | |
| "Max": 4604, | |
| "Med": 492.5, | |
| "Med Resp": 492.5 | |
| }, | |
| "Editing": { | |
| "Min": 6, | |
| "Max": 2067, | |
| "Med": 248.5, | |
| "Med Resp": 248.5 | |
| }, | |
| "Data Analysis": { | |
| "Min": 1, | |
| "Max": 5119, | |
| "Med": 357.0, | |
| "Med Resp": 357.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 1, | |
| "Max": 11933, | |
| "Med": 730.5, | |
| "Med Resp": 730.5 | |
| }, | |
| "Hallucination": { | |
| "Min": 38, | |
| "Max": 2395, | |
| "Med": 630.0, | |
| "Med Resp": 630.0 | |
| }, | |
| "Safety": { | |
| "Min": 12, | |
| "Max": 2497, | |
| "Med": 352.0, | |
| "Med Resp": 352.0 | |
| }, | |
| "Repetition": { | |
| "Min": 73, | |
| "Max": 65405, | |
| "Med": 468.5, | |
| "Med Resp": 468.5 | |
| }, | |
| "Summarization": { | |
| "Min": 24, | |
| "Max": 1899, | |
| "Med": 249.0, | |
| "Med Resp": 249.0 | |
| }, | |
| "Translation": { | |
| "Min": 10, | |
| "Max": 64183, | |
| "Med": 299.0, | |
| "Med Resp": 299.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 3, | |
| "Max": 8009, | |
| "Med": 1728.5, | |
| "Med Resp": 1728.5 | |
| } | |
| }, | |
| "Grok-4": { | |
| "Overall": { | |
| "Min": -10, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Content Generation": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Editing": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Reasoning": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Hallucination": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Safety": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Repetition": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Summarization": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Translation": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": -10, | |
| "Max": -4, | |
| "Med": -6.0, | |
| "Med Resp": -3.0 | |
| } | |
| }, | |
| "GPT-5.2 (Reasoning: medium)": { | |
| "Overall": { | |
| "Min": 11, | |
| "Max": 7735, | |
| "Med": 347.0, | |
| "Med Resp": 264.0 | |
| }, | |
| "Content Generation": { | |
| "Min": 12, | |
| "Max": 7735, | |
| "Med": 537.0, | |
| "Med Resp": 370.0 | |
| }, | |
| "Editing": { | |
| "Min": 11, | |
| "Max": 1562, | |
| "Med": 173.5, | |
| "Med Resp": 166.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": 18, | |
| "Max": 3954, | |
| "Med": 222.0, | |
| "Med Resp": 98.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 29, | |
| "Max": 6895, | |
| "Med": 445.5, | |
| "Med Resp": 246.5 | |
| }, | |
| "Hallucination": { | |
| "Min": 72, | |
| "Max": 3525, | |
| "Med": 633.0, | |
| "Med Resp": 357.5 | |
| }, | |
| "Safety": { | |
| "Min": 58, | |
| "Max": 2808, | |
| "Med": 434.0, | |
| "Med Resp": 285.0 | |
| }, | |
| "Repetition": { | |
| "Min": 34, | |
| "Max": 5202, | |
| "Med": 272.0, | |
| "Med Resp": 223.0 | |
| }, | |
| "Summarization": { | |
| "Min": 37, | |
| "Max": 2339, | |
| "Med": 201.0, | |
| "Med Resp": 194.5 | |
| }, | |
| "Translation": { | |
| "Min": 12, | |
| "Max": 3684, | |
| "Med": 307.0, | |
| "Med Resp": 283.5 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 41, | |
| "Max": 7003, | |
| "Med": 983.5, | |
| "Med Resp": 844.5 | |
| } | |
| }, | |
| "Gemini 3 Flash Preview (Thinking Level: High)": { | |
| "Overall": { | |
| "Min": 137, | |
| "Max": 24472, | |
| "Med": 1296.5, | |
| "Med Resp": 424.5 | |
| }, | |
| "Content Generation": { | |
| "Min": 248, | |
| "Max": 16374, | |
| "Med": 1368.5, | |
| "Med Resp": 535.5 | |
| }, | |
| "Editing": { | |
| "Min": 137, | |
| "Max": 10610, | |
| "Med": 1113.5, | |
| "Med Resp": 338.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": 166, | |
| "Max": 13595, | |
| "Med": 923.0, | |
| "Med Resp": 232.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 318, | |
| "Max": 24472, | |
| "Med": 1210.5, | |
| "Med Resp": 556.0 | |
| }, | |
| "Hallucination": { | |
| "Min": 349, | |
| "Max": 5023, | |
| "Med": 1295.5, | |
| "Med Resp": 639.5 | |
| }, | |
| "Safety": { | |
| "Min": 380, | |
| "Max": 5510, | |
| "Med": 1297.0, | |
| "Med Resp": 482.0 | |
| }, | |
| "Repetition": { | |
| "Min": 309, | |
| "Max": 7743, | |
| "Med": 1477.5, | |
| "Med Resp": 389.5 | |
| }, | |
| "Summarization": { | |
| "Min": 306, | |
| "Max": 18709, | |
| "Med": 905.5, | |
| "Med Resp": 195.0 | |
| }, | |
| "Translation": { | |
| "Min": 289, | |
| "Max": 17871, | |
| "Med": 1421.0, | |
| "Med Resp": 381.5 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 231, | |
| "Max": 11926, | |
| "Med": 3075.5, | |
| "Med Resp": 1466.5 | |
| } | |
| }, | |
| "Kanana 2 30B A3B Thinking": { | |
| "Overall": { | |
| "Min": 584, | |
| "Max": 247274, | |
| "Med": 4263.0, | |
| "Med Resp": 854.5 | |
| }, | |
| "Content Generation": { | |
| "Min": 1055, | |
| "Max": 139421, | |
| "Med": 3898.5, | |
| "Med Resp": 1028.0 | |
| }, | |
| "Editing": { | |
| "Min": 747, | |
| "Max": 134253, | |
| "Med": 3199.0, | |
| "Med Resp": 606.5 | |
| }, | |
| "Data Analysis": { | |
| "Min": 618, | |
| "Max": 120325, | |
| "Med": 3402.0, | |
| "Med Resp": 509.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 1042, | |
| "Max": 160440, | |
| "Med": 6428.5, | |
| "Med Resp": 925.5 | |
| }, | |
| "Hallucination": { | |
| "Min": 760, | |
| "Max": 137639, | |
| "Med": 4215.0, | |
| "Med Resp": 1061.5 | |
| }, | |
| "Safety": { | |
| "Min": 787, | |
| "Max": 116591, | |
| "Med": 3686.0, | |
| "Med Resp": 867.0 | |
| }, | |
| "Repetition": { | |
| "Min": 1238, | |
| "Max": 134651, | |
| "Med": 8164.0, | |
| "Med Resp": 517.5 | |
| }, | |
| "Summarization": { | |
| "Min": 584, | |
| "Max": 59519, | |
| "Med": 2540.0, | |
| "Med Resp": 656.5 | |
| }, | |
| "Translation": { | |
| "Min": 899, | |
| "Max": 131258, | |
| "Med": 4796.0, | |
| "Med Resp": 894.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 1560, | |
| "Max": 247274, | |
| "Med": 12632.5, | |
| "Med Resp": 2593.0 | |
| } | |
| }, | |
| "Kanana 2 30B A3B Instruct": { | |
| "Overall": { | |
| "Min": 51, | |
| "Max": 177683, | |
| "Med": 1195.0, | |
| "Med Resp": 1195.0 | |
| }, | |
| "Content Generation": { | |
| "Min": 58, | |
| "Max": 12603, | |
| "Med": 1448.0, | |
| "Med Resp": 1448.0 | |
| }, | |
| "Editing": { | |
| "Min": 69, | |
| "Max": 51628, | |
| "Med": 836.5, | |
| "Med Resp": 836.5 | |
| }, | |
| "Data Analysis": { | |
| "Min": 51, | |
| "Max": 11567, | |
| "Med": 916.0, | |
| "Med Resp": 916.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 51, | |
| "Max": 122001, | |
| "Med": 1587.0, | |
| "Med Resp": 1587.0 | |
| }, | |
| "Hallucination": { | |
| "Min": 104, | |
| "Max": 17989, | |
| "Med": 1419.0, | |
| "Med Resp": 1419.0 | |
| }, | |
| "Safety": { | |
| "Min": 96, | |
| "Max": 7485, | |
| "Med": 1377.0, | |
| "Med Resp": 1377.0 | |
| }, | |
| "Repetition": { | |
| "Min": 255, | |
| "Max": 177683, | |
| "Med": 844.0, | |
| "Med Resp": 844.0 | |
| }, | |
| "Summarization": { | |
| "Min": 108, | |
| "Max": 4592, | |
| "Med": 778.0, | |
| "Med Resp": 778.0 | |
| }, | |
| "Translation": { | |
| "Min": 69, | |
| "Max": 30611, | |
| "Med": 1059.0, | |
| "Med Resp": 1059.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 119, | |
| "Max": 74203, | |
| "Med": 3252.5, | |
| "Med Resp": 3252.5 | |
| } | |
| }, | |
| "Claude 4 Opus (20250514) (think)": { | |
| "Overall": { | |
| "Min": -10, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Content Generation": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Editing": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Reasoning": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Hallucination": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Safety": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Repetition": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Summarization": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Translation": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": -10, | |
| "Max": -4, | |
| "Med": -6.0, | |
| "Med Resp": -3.0 | |
| } | |
| }, | |
| "Gemini 2.5 Pro": { | |
| "Overall": { | |
| "Min": -10, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Content Generation": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Editing": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Reasoning": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Hallucination": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Safety": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Repetition": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Summarization": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Translation": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": -10, | |
| "Max": -4, | |
| "Med": -6.0, | |
| "Med Resp": -3.0 | |
| } | |
| }, | |
| "Tongyi DeepResearch 30B A3B": { | |
| "Overall": { | |
| "Min": 153, | |
| "Max": 68912, | |
| "Med": 1147.0, | |
| "Med Resp": 408.0 | |
| }, | |
| "Content Generation": { | |
| "Min": 216, | |
| "Max": 65477, | |
| "Med": 1086.5, | |
| "Med Resp": 510.5 | |
| }, | |
| "Editing": { | |
| "Min": 251, | |
| "Max": 65470, | |
| "Med": 985.5, | |
| "Med Resp": 313.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": 242, | |
| "Max": 65499, | |
| "Med": 998.0, | |
| "Med Resp": 239.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 333, | |
| "Max": 65477, | |
| "Med": 2043.5, | |
| "Med Resp": 388.5 | |
| }, | |
| "Hallucination": { | |
| "Min": 194, | |
| "Max": 65501, | |
| "Med": 1344.5, | |
| "Med Resp": 593.0 | |
| }, | |
| "Safety": { | |
| "Min": 153, | |
| "Max": 65472, | |
| "Med": 992.0, | |
| "Med Resp": 392.0 | |
| }, | |
| "Repetition": { | |
| "Min": 425, | |
| "Max": 65513, | |
| "Med": 1986.5, | |
| "Med Resp": 472.5 | |
| }, | |
| "Summarization": { | |
| "Min": 290, | |
| "Max": 2410, | |
| "Med": 662.5, | |
| "Med Resp": 262.0 | |
| }, | |
| "Translation": { | |
| "Min": 360, | |
| "Max": 65406, | |
| "Med": 1107.0, | |
| "Med Resp": 317.5 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 240, | |
| "Max": 68912, | |
| "Med": 3134.5, | |
| "Med Resp": 1349.5 | |
| } | |
| }, | |
| "GPT-5 mini (Reasoning: medium)": { | |
| "Overall": { | |
| "Min": -10, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Content Generation": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Editing": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Reasoning": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Hallucination": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Safety": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Repetition": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Summarization": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Translation": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": -10, | |
| "Max": -4, | |
| "Med": -6.0, | |
| "Med Resp": -3.0 | |
| } | |
| }, | |
| "Gemma 3 27B it": { | |
| "Overall": { | |
| "Min": 1, | |
| "Max": 65458, | |
| "Med": 380.0, | |
| "Med Resp": 380.0 | |
| }, | |
| "Content Generation": { | |
| "Min": 7, | |
| "Max": 3893, | |
| "Med": 484.0, | |
| "Med Resp": 484.0 | |
| }, | |
| "Editing": { | |
| "Min": 6, | |
| "Max": 1776, | |
| "Med": 254.0, | |
| "Med Resp": 254.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": 1, | |
| "Max": 63850, | |
| "Med": 180.0, | |
| "Med Resp": 180.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 2, | |
| "Max": 1926, | |
| "Med": 485.5, | |
| "Med Resp": 485.5 | |
| }, | |
| "Hallucination": { | |
| "Min": 13, | |
| "Max": 2494, | |
| "Med": 534.0, | |
| "Med Resp": 534.0 | |
| }, | |
| "Safety": { | |
| "Min": 31, | |
| "Max": 2440, | |
| "Med": 518.0, | |
| "Med Resp": 518.0 | |
| }, | |
| "Repetition": { | |
| "Min": 95, | |
| "Max": 65433, | |
| "Med": 299.0, | |
| "Med Resp": 299.0 | |
| }, | |
| "Summarization": { | |
| "Min": 30, | |
| "Max": 1080, | |
| "Med": 202.5, | |
| "Med Resp": 202.5 | |
| }, | |
| "Translation": { | |
| "Min": 46, | |
| "Max": 62659, | |
| "Med": 374.0, | |
| "Med Resp": 374.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 4, | |
| "Max": 65458, | |
| "Med": 1558.0, | |
| "Med Resp": 1558.0 | |
| } | |
| }, | |
| "GLM-4.7 FP8": { | |
| "Overall": { | |
| "Min": 212, | |
| "Max": 131072, | |
| "Med": 2252.5, | |
| "Med Resp": 328.0 | |
| }, | |
| "Content Generation": { | |
| "Min": 383, | |
| "Max": 18712, | |
| "Med": 2094.0, | |
| "Med Resp": 423.0 | |
| }, | |
| "Editing": { | |
| "Min": 384, | |
| "Max": 14538, | |
| "Med": 2070.5, | |
| "Med Resp": 263.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": 396, | |
| "Max": 13525, | |
| "Med": 1477.0, | |
| "Med Resp": 162.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 254, | |
| "Max": 40295, | |
| "Med": 2298.5, | |
| "Med Resp": 465.5 | |
| }, | |
| "Hallucination": { | |
| "Min": 443, | |
| "Max": 19838, | |
| "Med": 2156.5, | |
| "Med Resp": 481.0 | |
| }, | |
| "Safety": { | |
| "Min": 212, | |
| "Max": 10792, | |
| "Med": 2121.0, | |
| "Med Resp": 197.0 | |
| }, | |
| "Repetition": { | |
| "Min": 768, | |
| "Max": 131072, | |
| "Med": 2963.5, | |
| "Med Resp": 289.0 | |
| }, | |
| "Summarization": { | |
| "Min": 599, | |
| "Max": 10452, | |
| "Med": 1426.0, | |
| "Med Resp": 182.5 | |
| }, | |
| "Translation": { | |
| "Min": 796, | |
| "Max": 12247, | |
| "Med": 3159.5, | |
| "Med Resp": 312.5 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 420, | |
| "Max": 15706, | |
| "Med": 5514.5, | |
| "Med Resp": 1361.0 | |
| } | |
| }, | |
| "GPT-5 nano (Reasoning: medium)": { | |
| "Overall": { | |
| "Min": -10, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Content Generation": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Editing": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Reasoning": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Hallucination": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Safety": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Repetition": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Summarization": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Translation": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": -10, | |
| "Max": -4, | |
| "Med": -6.0, | |
| "Med Resp": -3.0 | |
| } | |
| }, | |
| "GPT-5.1 (Reasoning: medium, verbosity: medium)": { | |
| "Overall": { | |
| "Min": -10, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Content Generation": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Editing": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Reasoning": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Hallucination": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Safety": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Repetition": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Summarization": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Translation": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": -10, | |
| "Max": -4, | |
| "Med": -6.0, | |
| "Med Resp": -3.0 | |
| } | |
| }, | |
| "KAT Dev 72B Exp": { | |
| "Overall": { | |
| "Min": 6, | |
| "Max": 65602, | |
| "Med": 397.0, | |
| "Med Resp": 397.0 | |
| }, | |
| "Content Generation": { | |
| "Min": 26, | |
| "Max": 65466, | |
| "Med": 554.5, | |
| "Med Resp": 554.5 | |
| }, | |
| "Editing": { | |
| "Min": 13, | |
| "Max": 65363, | |
| "Med": 223.0, | |
| "Med Resp": 223.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": 21, | |
| "Max": 15350, | |
| "Med": 289.0, | |
| "Med Resp": 289.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 10, | |
| "Max": 65442, | |
| "Med": 487.5, | |
| "Med Resp": 487.5 | |
| }, | |
| "Hallucination": { | |
| "Min": 24, | |
| "Max": 65455, | |
| "Med": 402.0, | |
| "Med Resp": 402.0 | |
| }, | |
| "Safety": { | |
| "Min": 17, | |
| "Max": 65474, | |
| "Med": 345.0, | |
| "Med Resp": 345.0 | |
| }, | |
| "Repetition": { | |
| "Min": 96, | |
| "Max": 65602, | |
| "Med": 405.0, | |
| "Med Resp": 405.0 | |
| }, | |
| "Summarization": { | |
| "Min": 39, | |
| "Max": 65376, | |
| "Med": 292.0, | |
| "Med Resp": 292.0 | |
| }, | |
| "Translation": { | |
| "Min": 10, | |
| "Max": 65331, | |
| "Med": 339.0, | |
| "Med Resp": 339.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 6, | |
| "Max": 65466, | |
| "Med": 1083.5, | |
| "Med Resp": 1083.5 | |
| } | |
| }, | |
| "gpt-oss-20B (Reasoning: medium)": { | |
| "Overall": { | |
| "Min": 32, | |
| "Max": 18763, | |
| "Med": 953.5, | |
| "Med Resp": 326.0 | |
| }, | |
| "Content Generation": { | |
| "Min": 126, | |
| "Max": 6343, | |
| "Med": 983.5, | |
| "Med Resp": 486.5 | |
| }, | |
| "Editing": { | |
| "Min": 107, | |
| "Max": 7213, | |
| "Med": 667.0, | |
| "Med Resp": 195.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": 94, | |
| "Max": 14599, | |
| "Med": 750.0, | |
| "Med Resp": 192.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 109, | |
| "Max": 18763, | |
| "Med": 1290.5, | |
| "Med Resp": 475.5 | |
| }, | |
| "Hallucination": { | |
| "Min": 132, | |
| "Max": 7937, | |
| "Med": 1493.5, | |
| "Med Resp": 620.5 | |
| }, | |
| "Safety": { | |
| "Min": 32, | |
| "Max": 6678, | |
| "Med": 268.0, | |
| "Med Resp": 12.0 | |
| }, | |
| "Repetition": { | |
| "Min": 258, | |
| "Max": 17217, | |
| "Med": 1847.0, | |
| "Med Resp": 332.5 | |
| }, | |
| "Summarization": { | |
| "Min": 99, | |
| "Max": 4060, | |
| "Med": 438.5, | |
| "Med Resp": 219.0 | |
| }, | |
| "Translation": { | |
| "Min": 133, | |
| "Max": 10446, | |
| "Med": 1028.5, | |
| "Med Resp": 290.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 102, | |
| "Max": 14863, | |
| "Med": 2483.0, | |
| "Med Resp": 1514.0 | |
| } | |
| }, | |
| "o3-pro (Reasoning: medium)": { | |
| "Overall": { | |
| "Min": -10, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Content Generation": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Editing": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Reasoning": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Hallucination": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Safety": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Repetition": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Summarization": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Translation": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": -10, | |
| "Max": -4, | |
| "Med": -6.0, | |
| "Med Resp": -3.0 | |
| } | |
| }, | |
| "Apriel 1.5 15B Thinker": { | |
| "Overall": { | |
| "Min": 118, | |
| "Max": 74664, | |
| "Med": 2238.0, | |
| "Med Resp": 375.0 | |
| }, | |
| "Content Generation": { | |
| "Min": 216, | |
| "Max": 65428, | |
| "Med": 2397.0, | |
| "Med Resp": 565.5 | |
| }, | |
| "Editing": { | |
| "Min": 318, | |
| "Max": 65412, | |
| "Med": 1577.0, | |
| "Med Resp": 247.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": 213, | |
| "Max": 65413, | |
| "Med": 1562.0, | |
| "Med Resp": 225.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 307, | |
| "Max": 65372, | |
| "Med": 2393.5, | |
| "Med Resp": 548.0 | |
| }, | |
| "Hallucination": { | |
| "Min": 219, | |
| "Max": 65421, | |
| "Med": 2986.5, | |
| "Med Resp": 1016.5 | |
| }, | |
| "Safety": { | |
| "Min": 118, | |
| "Max": 65407, | |
| "Med": 380.0, | |
| "Med Resp": 12.0 | |
| }, | |
| "Repetition": { | |
| "Min": 181, | |
| "Max": 65431, | |
| "Med": 65362.5, | |
| "Med Resp": 65328.5 | |
| }, | |
| "Summarization": { | |
| "Min": 252, | |
| "Max": 65065, | |
| "Med": 981.5, | |
| "Med Resp": 238.5 | |
| }, | |
| "Translation": { | |
| "Min": 297, | |
| "Max": 65781, | |
| "Med": 3477.0, | |
| "Med Resp": 342.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 378, | |
| "Max": 74664, | |
| "Med": 6692.0, | |
| "Med Resp": 2079.0 | |
| } | |
| }, | |
| "Gemini 2.5 Flash-lite Preview (09-2025)": { | |
| "Overall": { | |
| "Min": -10, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Content Generation": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Editing": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Reasoning": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Hallucination": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Safety": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Repetition": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Summarization": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Translation": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": -10, | |
| "Max": -4, | |
| "Med": -6.0, | |
| "Med Resp": -3.0 | |
| } | |
| }, | |
| "Mistral Small 3.2 24B Instruct 2506": { | |
| "Overall": { | |
| "Min": 1, | |
| "Max": 65516, | |
| "Med": 369.0, | |
| "Med Resp": 369.0 | |
| }, | |
| "Content Generation": { | |
| "Min": 7, | |
| "Max": 2684, | |
| "Med": 389.5, | |
| "Med Resp": 389.5 | |
| }, | |
| "Editing": { | |
| "Min": 9, | |
| "Max": 1172, | |
| "Med": 269.0, | |
| "Med Resp": 269.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": 1, | |
| "Max": 3973, | |
| "Med": 295.0, | |
| "Med Resp": 295.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 1, | |
| "Max": 65462, | |
| "Med": 484.5, | |
| "Med Resp": 484.5 | |
| }, | |
| "Hallucination": { | |
| "Min": 61, | |
| "Max": 5920, | |
| "Med": 489.0, | |
| "Med Resp": 489.0 | |
| }, | |
| "Safety": { | |
| "Min": 10, | |
| "Max": 65465, | |
| "Med": 320.0, | |
| "Med Resp": 320.0 | |
| }, | |
| "Repetition": { | |
| "Min": 103, | |
| "Max": 65516, | |
| "Med": 376.5, | |
| "Med Resp": 376.5 | |
| }, | |
| "Summarization": { | |
| "Min": 28, | |
| "Max": 1266, | |
| "Med": 234.5, | |
| "Med Resp": 234.5 | |
| }, | |
| "Translation": { | |
| "Min": 9, | |
| "Max": 3248, | |
| "Med": 327.0, | |
| "Med Resp": 327.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 4, | |
| "Max": 65494, | |
| "Med": 1279.0, | |
| "Med Resp": 1279.0 | |
| } | |
| }, | |
| "Mistral Large 3 675B Instruct 2512": { | |
| "Overall": { | |
| "Min": 1, | |
| "Max": 12120, | |
| "Med": 448.0, | |
| "Med Resp": 448.0 | |
| }, | |
| "Content Generation": { | |
| "Min": 13, | |
| "Max": 6162, | |
| "Med": 565.0, | |
| "Med Resp": 565.0 | |
| }, | |
| "Editing": { | |
| "Min": 12, | |
| "Max": 2369, | |
| "Med": 299.0, | |
| "Med Resp": 299.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": 1, | |
| "Max": 3902, | |
| "Med": 295.0, | |
| "Med Resp": 295.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 1, | |
| "Max": 6293, | |
| "Med": 530.0, | |
| "Med Resp": 530.0 | |
| }, | |
| "Hallucination": { | |
| "Min": 54, | |
| "Max": 4461, | |
| "Med": 896.0, | |
| "Med Resp": 896.0 | |
| }, | |
| "Safety": { | |
| "Min": 27, | |
| "Max": 4250, | |
| "Med": 589.0, | |
| "Med Resp": 589.0 | |
| }, | |
| "Repetition": { | |
| "Min": 89, | |
| "Max": 5264, | |
| "Med": 448.0, | |
| "Med Resp": 448.0 | |
| }, | |
| "Summarization": { | |
| "Min": 31, | |
| "Max": 1357, | |
| "Med": 251.5, | |
| "Med Resp": 251.5 | |
| }, | |
| "Translation": { | |
| "Min": 22, | |
| "Max": 3529, | |
| "Med": 354.5, | |
| "Med Resp": 354.5 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 4, | |
| "Max": 12120, | |
| "Med": 2191.5, | |
| "Med Resp": 2191.5 | |
| } | |
| }, | |
| "Mi:dm 2.0 Base Instruct": { | |
| "Overall": { | |
| "Min": 1, | |
| "Max": 32764, | |
| "Med": 316.0, | |
| "Med Resp": 316.0 | |
| }, | |
| "Content Generation": { | |
| "Min": 7, | |
| "Max": 3515, | |
| "Med": 400.0, | |
| "Med Resp": 400.0 | |
| }, | |
| "Editing": { | |
| "Min": 10, | |
| "Max": 1998, | |
| "Med": 191.0, | |
| "Med Resp": 191.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": 1, | |
| "Max": 3302, | |
| "Med": 260.0, | |
| "Med Resp": 260.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 1, | |
| "Max": 32071, | |
| "Med": 398.0, | |
| "Med Resp": 398.0 | |
| }, | |
| "Hallucination": { | |
| "Min": 13, | |
| "Max": 3061, | |
| "Med": 191.5, | |
| "Med Resp": 191.5 | |
| }, | |
| "Safety": { | |
| "Min": 10, | |
| "Max": 1110, | |
| "Med": 159.0, | |
| "Med Resp": 159.0 | |
| }, | |
| "Repetition": { | |
| "Min": 50, | |
| "Max": 2734, | |
| "Med": 316.5, | |
| "Med Resp": 316.5 | |
| }, | |
| "Summarization": { | |
| "Min": 35, | |
| "Max": 2967, | |
| "Med": 261.0, | |
| "Med Resp": 261.0 | |
| }, | |
| "Translation": { | |
| "Min": 7, | |
| "Max": 4703, | |
| "Med": 289.5, | |
| "Med Resp": 289.5 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 3, | |
| "Max": 32764, | |
| "Med": 957.0, | |
| "Med Resp": 957.0 | |
| } | |
| }, | |
| "Qwen3 235B A22B Thinking 2507": { | |
| "Overall": { | |
| "Min": 8, | |
| "Max": 19533, | |
| "Med": 2404.5, | |
| "Med Resp": 423.0 | |
| }, | |
| "Content Generation": { | |
| "Min": 402, | |
| "Max": 13776, | |
| "Med": 2337.0, | |
| "Med Resp": 577.5 | |
| }, | |
| "Editing": { | |
| "Min": 482, | |
| "Max": 13235, | |
| "Med": 1894.5, | |
| "Med Resp": 274.5 | |
| }, | |
| "Data Analysis": { | |
| "Min": 8, | |
| "Max": 13217, | |
| "Med": 1427.0, | |
| "Med Resp": 303.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 8, | |
| "Max": 19533, | |
| "Med": 2340.0, | |
| "Med Resp": 568.5 | |
| }, | |
| "Hallucination": { | |
| "Min": 305, | |
| "Max": 6670, | |
| "Med": 2005.0, | |
| "Med Resp": 848.0 | |
| }, | |
| "Safety": { | |
| "Min": 304, | |
| "Max": 8302, | |
| "Med": 1708.0, | |
| "Med Resp": 619.0 | |
| }, | |
| "Repetition": { | |
| "Min": 8, | |
| "Max": 11012, | |
| "Med": 3533.0, | |
| "Med Resp": 514.5 | |
| }, | |
| "Summarization": { | |
| "Min": 373, | |
| "Max": 11701, | |
| "Med": 1468.5, | |
| "Med Resp": 233.5 | |
| }, | |
| "Translation": { | |
| "Min": 381, | |
| "Max": 12124, | |
| "Med": 3332.5, | |
| "Med Resp": 284.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 721, | |
| "Max": 19299, | |
| "Med": 5745.0, | |
| "Med Resp": 1736.5 | |
| } | |
| }, | |
| "HyperCLOVAX SEED Think 14B (think)": { | |
| "Overall": { | |
| "Min": 223, | |
| "Max": 131436, | |
| "Med": 1444.0, | |
| "Med Resp": 382.5 | |
| }, | |
| "Content Generation": { | |
| "Min": 279, | |
| "Max": 72029, | |
| "Med": 1222.0, | |
| "Med Resp": 476.5 | |
| }, | |
| "Editing": { | |
| "Min": 304, | |
| "Max": 65536, | |
| "Med": 1228.5, | |
| "Med Resp": 351.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": 240, | |
| "Max": 65536, | |
| "Med": 1352.0, | |
| "Med Resp": 234.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 414, | |
| "Max": 65536, | |
| "Med": 3010.0, | |
| "Med Resp": 315.0 | |
| }, | |
| "Hallucination": { | |
| "Min": 263, | |
| "Max": 65536, | |
| "Med": 1310.5, | |
| "Med Resp": 444.0 | |
| }, | |
| "Safety": { | |
| "Min": 241, | |
| "Max": 65536, | |
| "Med": 1100.0, | |
| "Med Resp": 412.0 | |
| }, | |
| "Repetition": { | |
| "Min": 389, | |
| "Max": 65536, | |
| "Med": 2233.0, | |
| "Med Resp": 355.0 | |
| }, | |
| "Summarization": { | |
| "Min": 223, | |
| "Max": 5987, | |
| "Med": 833.5, | |
| "Med Resp": 285.0 | |
| }, | |
| "Translation": { | |
| "Min": 457, | |
| "Max": 65536, | |
| "Med": 1611.0, | |
| "Med Resp": 352.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 648, | |
| "Max": 131436, | |
| "Med": 3234.5, | |
| "Med Resp": 1324.5 | |
| } | |
| }, | |
| "o3": { | |
| "Overall": { | |
| "Min": -10, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Content Generation": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Editing": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Reasoning": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Hallucination": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Safety": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Repetition": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Summarization": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Translation": { | |
| "Min": -2, | |
| "Max": -2, | |
| "Med": -2.0, | |
| "Med Resp": -1.0 | |
| }, | |
| "Multi-Turn": { | |
| "Min": -10, | |
| "Max": -4, | |
| "Med": -6.0, | |
| "Med Resp": -3.0 | |
| } | |
| }, | |
| "Qwen3 30B A3B Instruct 2507": { | |
| "Overall": { | |
| "Min": 1, | |
| "Max": 65516, | |
| "Med": 441.5, | |
| "Med Resp": 441.5 | |
| }, | |
| "Content Generation": { | |
| "Min": 7, | |
| "Max": 5659, | |
| "Med": 510.5, | |
| "Med Resp": 510.5 | |
| }, | |
| "Editing": { | |
| "Min": 7, | |
| "Max": 2231, | |
| "Med": 255.0, | |
| "Med Resp": 255.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": 1, | |
| "Max": 8094, | |
| "Med": 381.0, | |
| "Med Resp": 381.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 1, | |
| "Max": 9376, | |
| "Med": 753.5, | |
| "Med Resp": 753.5 | |
| }, | |
| "Hallucination": { | |
| "Min": 19, | |
| "Max": 65495, | |
| "Med": 689.5, | |
| "Med Resp": 689.5 | |
| }, | |
| "Safety": { | |
| "Min": 16, | |
| "Max": 65456, | |
| "Med": 445.0, | |
| "Med Resp": 445.0 | |
| }, | |
| "Repetition": { | |
| "Min": 81, | |
| "Max": 65516, | |
| "Med": 533.5, | |
| "Med Resp": 533.5 | |
| }, | |
| "Summarization": { | |
| "Min": 34, | |
| "Max": 1870, | |
| "Med": 251.0, | |
| "Med Resp": 251.0 | |
| }, | |
| "Translation": { | |
| "Min": 8, | |
| "Max": 3257, | |
| "Med": 292.5, | |
| "Med Resp": 292.5 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 3, | |
| "Max": 6825, | |
| "Med": 1809.5, | |
| "Med Resp": 1809.5 | |
| } | |
| }, | |
| "Kimi K2 Thinking": { | |
| "Overall": { | |
| "Min": 115, | |
| "Max": 65500, | |
| "Med": 1692.0, | |
| "Med Resp": 330.0 | |
| }, | |
| "Content Generation": { | |
| "Min": 115, | |
| "Max": 29508, | |
| "Med": 1696.0, | |
| "Med Resp": 478.0 | |
| }, | |
| "Editing": { | |
| "Min": 302, | |
| "Max": 11808, | |
| "Med": 1347.5, | |
| "Med Resp": 219.0 | |
| }, | |
| "Data Analysis": { | |
| "Min": 186, | |
| "Max": 65462, | |
| "Med": 978.0, | |
| "Med Resp": 156.0 | |
| }, | |
| "Reasoning": { | |
| "Min": 291, | |
| "Max": 55791, | |
| "Med": 1842.0, | |
| "Med Resp": 377.0 | |
| }, | |
| "Hallucination": { | |
| "Min": 194, | |
| "Max": 5063, | |
| "Med": 1140.5, | |
| "Med Resp": 382.5 | |
| }, | |
| "Safety": { | |
| "Min": 171, | |
| "Max": 5707, | |
| "Med": 1013.0, | |
| "Med Resp": 395.0 | |
| }, | |
| "Repetition": { | |
| "Min": 236, | |
| "Max": 65500, | |
| "Med": 1890.0, | |
| "Med Resp": 264.0 | |
| }, | |
| "Summarization": { | |
| "Min": 276, | |
| "Max": 13220, | |
| "Med": 996.0, | |
| "Med Resp": 196.5 | |
| }, | |
| "Translation": { | |
| "Min": 433, | |
| "Max": 13703, | |
| "Med": 2637.0, | |
| "Med Resp": 310.5 | |
| }, | |
| "Multi-Turn": { | |
| "Min": 333, | |
| "Max": 17384, | |
| "Med": 3771.5, | |
| "Med Resp": 1102.0 | |
| } | |
| } | |
| } |