{ "Olmo 3 32B Think": { "Overall": { "Min": 210, "Max": 65454, "Med": 3360.5, "Med Resp": 473.0 }, "Content Generation": { "Min": 683, "Max": 65300, "Med": 3224.0, "Med Resp": 606.0 }, "Editing": { "Min": 580, "Max": 14539, "Med": 2859.0, "Med Resp": 419.5 }, "Data Analysis": { "Min": 369, "Max": 23205, "Med": 2624.0, "Med Resp": 332.0 }, "Reasoning": { "Min": 779, "Max": 27491, "Med": 3911.5, "Med Resp": 467.5 }, "Hallucination": { "Min": 254, "Max": 10751, "Med": 2472.0, "Med Resp": 768.0 }, "Safety": { "Min": 210, "Max": 7162, "Med": 2025.0, "Med Resp": 646.0 }, "Repetition": { "Min": 1336, "Max": 65454, "Med": 5205.5, "Med Resp": 575.5 }, "Summarization": { "Min": 380, "Max": 17517, "Med": 2254.0, "Med Resp": 248.0 }, "Translation": { "Min": 846, "Max": 15667, "Med": 4546.5, "Med Resp": 349.5 }, "Multi-Turn": { "Min": 1010, "Max": 24077, "Med": 6999.5, "Med Resp": 1764.5 } }, "Claude 4.1 Opus (20250805) (think)": { "Overall": { "Min": -10, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Content Generation": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Editing": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Data Analysis": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Reasoning": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Hallucination": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Safety": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Repetition": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Summarization": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Translation": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Multi-Turn": { "Min": -10, "Max": -4, "Med": -6.0, "Med Resp": -3.0 } }, "EXAONE 4.0 32B (think)": { "Overall": { "Min": 37, "Max": 142387, "Med": 1274.5, "Med Resp": 503.0 }, "Content Generation": { "Min": 160, "Max": 131068, "Med": 1178.5, "Med Resp": 559.0 }, "Editing": { "Min": 37, "Max": 10786, "Med": 1041.0, "Med Resp": 423.5 }, "Data Analysis": { "Min": 229, "Max": 131072, "Med": 1412.0, "Med Resp": 345.0 }, "Reasoning": { "Min": 567, "Max": 131076, "Med": 3961.5, "Med Resp": 585.5 }, "Hallucination": { "Min": 298, "Max": 65533, "Med": 1247.5, "Med Resp": 627.5 }, "Safety": { "Min": 227, "Max": 5093, "Med": 1145.0, "Med Resp": 589.0 }, "Repetition": { "Min": 441, "Max": 131072, "Med": 1744.5, "Med Resp": 579.5 }, "Summarization": { "Min": 149, "Max": 8423, "Med": 693.5, "Med Resp": 311.0 }, "Translation": { "Min": 227, "Max": 14234, "Med": 915.0, "Med Resp": 411.5 }, "Multi-Turn": { "Min": 390, "Max": 142387, "Med": 3222.0, "Med Resp": 1488.0 } }, "DeepSeek V3.1 (think)": { "Overall": { "Min": 80, "Max": 31147, "Med": 710.5, "Med Resp": 356.0 }, "Content Generation": { "Min": 132, "Max": 5354, "Med": 776.5, "Med Resp": 500.0 }, "Editing": { "Min": 119, "Max": 2063, "Med": 571.0, "Med Resp": 287.0 }, "Data Analysis": { "Min": 119, "Max": 13106, "Med": 644.0, "Med Resp": 218.0 }, "Reasoning": { "Min": 259, "Max": 31147, "Med": 1340.5, "Med Resp": 338.0 }, "Hallucination": { "Min": 206, "Max": 10356, "Med": 1132.5, "Med Resp": 667.0 }, "Safety": { "Min": 80, "Max": 3412, "Med": 565.0, "Med Resp": 206.0 }, "Repetition": { "Min": 290, "Max": 6553, "Med": 826.5, "Med Resp": 450.0 }, "Summarization": { "Min": 148, "Max": 1533, "Med": 432.0, "Med Resp": 211.5 }, "Translation": { "Min": 147, "Max": 7448, "Med": 554.5, "Med Resp": 320.0 }, "Multi-Turn": { "Min": 324, "Max": 7862, "Med": 2558.5, "Med Resp": 1545.0 } }, "Qwen3 30B A3B Thinking 2507": { "Overall": { "Min": 305, "Max": 32743, "Med": 2830.0, "Med Resp": 351.0 }, "Content Generation": { "Min": 335, "Max": 10914, "Med": 2775.5, "Med Resp": 403.5 }, "Editing": { "Min": 371, "Max": 7617, "Med": 2358.5, "Med Resp": 220.0 }, "Data Analysis": { "Min": 305, "Max": 19749, "Med": 1702.0, "Med Resp": 227.0 }, "Reasoning": { "Min": 485, "Max": 19485, "Med": 2504.0, "Med Resp": 505.0 }, "Hallucination": { "Min": 360, "Max": 6054, "Med": 2123.5, "Med Resp": 668.0 }, "Safety": { "Min": 306, "Max": 32688, "Med": 1667.0, "Med Resp": 447.0 }, "Repetition": { "Min": 1070, "Max": 32743, "Med": 3719.0, "Med Resp": 368.5 }, "Summarization": { "Min": 435, "Max": 14462, "Med": 2108.0, "Med Resp": 204.0 }, "Translation": { "Min": 513, "Max": 11340, "Med": 3869.5, "Med Resp": 276.0 }, "Multi-Turn": { "Min": 536, "Max": 14557, "Med": 5822.5, "Med Resp": 1237.0 } }, "o4-mini": { "Overall": { "Min": -10, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Content Generation": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Editing": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Data Analysis": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Reasoning": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Hallucination": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Safety": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Repetition": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Summarization": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Translation": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Multi-Turn": { "Min": -10, "Max": -4, "Med": -6.0, "Med Resp": -3.0 } }, "Gemini 2.5 Flash": { "Overall": { "Min": -10, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Content Generation": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Editing": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Data Analysis": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Reasoning": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Hallucination": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Safety": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Repetition": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Summarization": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Translation": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Multi-Turn": { "Min": -10, "Max": -4, "Med": -6.0, "Med Resp": -3.0 } }, "Claude 4 Sonnet (20250514) (think)": { "Overall": { "Min": -10, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Content Generation": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Editing": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Data Analysis": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Reasoning": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Hallucination": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Safety": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Repetition": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Summarization": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Translation": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Multi-Turn": { "Min": -10, "Max": -4, "Med": -6.0, "Med Resp": -3.0 } }, "Gemini 3 Pro Preview (Thinking Level: High)": { "Overall": { "Min": 0, "Max": 18460, "Med": 1930.5, "Med Resp": 378.0 }, "Content Generation": { "Min": 0, "Max": 12404, "Med": 1967.5, "Med Resp": 570.5 }, "Editing": { "Min": 433, "Max": 7372, "Med": 1684.0, "Med Resp": 322.0 }, "Data Analysis": { "Min": 386, "Max": 10791, "Med": 1357.0, "Med Resp": 172.0 }, "Reasoning": { "Min": 0, "Max": 18460, "Med": 1821.0, "Med Resp": 502.5 }, "Hallucination": { "Min": 524, "Max": 6228, "Med": 1833.0, "Med Resp": 468.0 }, "Safety": { "Min": 832, "Max": 6324, "Med": 1802.0, "Med Resp": 291.0 }, "Repetition": { "Min": 516, "Max": 5086, "Med": 1910.0, "Med Resp": 314.0 }, "Summarization": { "Min": 663, "Max": 3857, "Med": 1295.5, "Med Resp": 184.5 }, "Translation": { "Min": 964, "Max": 9535, "Med": 2286.5, "Med Resp": 357.0 }, "Multi-Turn": { "Min": 608, "Max": 10590, "Med": 4601.5, "Med Resp": 1377.0 } }, "Solar Pro Preview (top_p:0.95, temp: 0.7)": { "Overall": { "Min": 1, "Max": 4060, "Med": 260.0, "Med Resp": 260.0 }, "Content Generation": { "Min": 15, "Max": 3643, "Med": 426.0, "Med Resp": 426.0 }, "Editing": { "Min": 14, "Max": 3948, "Med": 218.0, "Med Resp": 218.0 }, "Data Analysis": { "Min": 2, "Max": 3500, "Med": 89.0, "Med Resp": 89.0 }, "Reasoning": { "Min": 1, "Max": 3338, "Med": 190.5, "Med Resp": 190.5 }, "Hallucination": { "Min": 20, "Max": 1093, "Med": 128.5, "Med Resp": 128.5 }, "Safety": { "Min": 11, "Max": 1507, "Med": 92.0, "Med Resp": 92.0 }, "Repetition": { "Min": 34, "Max": 4060, "Med": 214.0, "Med Resp": 214.0 }, "Summarization": { "Min": 43, "Max": 2478, "Med": 218.0, "Med Resp": 218.0 }, "Translation": { "Min": 20, "Max": 1711, "Med": 360.0, "Med Resp": 360.0 }, "Multi-Turn": { "Min": 5, "Max": 3353, "Med": 530.0, "Med Resp": 530.0 } }, "DeepSeek R1 (0528) (top_p: 0.95, temp:0.6)": { "Overall": { "Min": 4, "Max": 16917, "Med": 1177.5, "Med Resp": 554.0 }, "Content Generation": { "Min": 389, "Max": 7861, "Med": 1261.5, "Med Resp": 694.0 }, "Editing": { "Min": 4, "Max": 7611, "Med": 1054.5, "Med Resp": 517.5 }, "Data Analysis": { "Min": 4, "Max": 8191, "Med": 1112.0, "Med Resp": 355.0 }, "Reasoning": { "Min": 4, "Max": 12257, "Med": 1913.0, "Med Resp": 455.5 }, "Hallucination": { "Min": 4, "Max": 7390, "Med": 1214.5, "Med Resp": 682.0 }, "Safety": { "Min": 227, "Max": 6387, "Med": 963.0, "Med Resp": 568.0 }, "Repetition": { "Min": 4, "Max": 7787, "Med": 1405.5, "Med Resp": 646.5 }, "Summarization": { "Min": 319, "Max": 2613, "Med": 711.5, "Med Resp": 321.0 }, "Translation": { "Min": 4, "Max": 7687, "Med": 1021.0, "Med Resp": 561.5 }, "Multi-Turn": { "Min": 448, "Max": 16917, "Med": 3418.5, "Med Resp": 1874.0 } }, "A.X 4.0": { "Overall": { "Min": 1, "Max": 65581, "Med": 412.5, "Med Resp": 412.5 }, "Content Generation": { "Min": 2, "Max": 65581, "Med": 543.0, "Med Resp": 543.0 }, "Editing": { "Min": 8, "Max": 1791, "Med": 250.0, "Med Resp": 250.0 }, "Data Analysis": { "Min": 1, "Max": 65537, "Med": 267.0, "Med Resp": 267.0 }, "Reasoning": { "Min": 2, "Max": 2046, "Med": 498.0, "Med Resp": 498.0 }, "Hallucination": { "Min": 12, "Max": 2639, "Med": 511.5, "Med Resp": 511.5 }, "Safety": { "Min": 4, "Max": 2942, "Med": 516.0, "Med Resp": 516.0 }, "Repetition": { "Min": 84, "Max": 65536, "Med": 341.5, "Med Resp": 341.5 }, "Summarization": { "Min": 26, "Max": 2369, "Med": 282.0, "Med Resp": 282.0 }, "Translation": { "Min": 7, "Max": 35068, "Med": 343.0, "Med Resp": 343.0 }, "Multi-Turn": { "Min": 3, "Max": 9420, "Med": 1455.0, "Med Resp": 1455.0 } }, "DeepSeek V3.1 Terminus (think)": { "Overall": { "Min": 123, "Max": 36717, "Med": 831.5, "Med Resp": 377.0 }, "Content Generation": { "Min": 152, "Max": 11674, "Med": 869.0, "Med Resp": 540.5 }, "Editing": { "Min": 153, "Max": 13465, "Med": 697.0, "Med Resp": 323.0 }, "Data Analysis": { "Min": 163, "Max": 18833, "Med": 731.0, "Med Resp": 208.0 }, "Reasoning": { "Min": 273, "Max": 36717, "Med": 1471.0, "Med Resp": 337.5 }, "Hallucination": { "Min": 183, "Max": 8837, "Med": 1170.0, "Med Resp": 743.0 }, "Safety": { "Min": 167, "Max": 2442, "Med": 721.0, "Med Resp": 294.0 }, "Repetition": { "Min": 308, "Max": 9694, "Med": 997.0, "Med Resp": 508.0 }, "Summarization": { "Min": 157, "Max": 1562, "Med": 460.0, "Med Resp": 215.0 }, "Translation": { "Min": 123, "Max": 9706, "Med": 617.5, "Med Resp": 347.0 }, "Multi-Turn": { "Min": 297, "Max": 16318, "Med": 2647.5, "Med Resp": 1623.0 } }, "Dhanishtha-2.0 Preview": { "Overall": { "Min": 93, "Max": 6076, "Med": 520.0, "Med Resp": 356.0 }, "Content Generation": { "Min": 102, "Max": 3978, "Med": 589.0, "Med Resp": 439.0 }, "Editing": { "Min": 116, "Max": 1716, "Med": 437.5, "Med Resp": 301.5 }, "Data Analysis": { "Min": 116, "Max": 4070, "Med": 470.0, "Med Resp": 288.0 }, "Reasoning": { "Min": 182, "Max": 2451, "Med": 625.0, "Med Resp": 366.0 }, "Hallucination": { "Min": 160, "Max": 4068, "Med": 494.0, "Med Resp": 318.5 }, "Safety": { "Min": 121, "Max": 1470, "Med": 381.0, "Med Resp": 236.0 }, "Repetition": { "Min": 240, "Max": 3982, "Med": 576.5, "Med Resp": 384.5 }, "Summarization": { "Min": 93, "Max": 2578, "Med": 385.0, "Med Resp": 289.0 }, "Translation": { "Min": 107, "Max": 3331, "Med": 426.0, "Med Resp": 331.5 }, "Multi-Turn": { "Min": 362, "Max": 6076, "Med": 1462.0, "Med Resp": 1095.0 } }, "GPT-5 (Reasoning: medium)": { "Overall": { "Min": -10, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Content Generation": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Editing": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Data Analysis": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Reasoning": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Hallucination": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Safety": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Repetition": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Summarization": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Translation": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Multi-Turn": { "Min": -10, "Max": -4, "Med": -6.0, "Med Resp": -3.0 } }, "Kanana 1.5 15.7B A3B Instruct": { "Overall": { "Min": 1, "Max": 34276, "Med": 414.0, "Med Resp": 414.0 }, "Content Generation": { "Min": 10, "Max": 22194, "Med": 463.5, "Med Resp": 463.5 }, "Editing": { "Min": 5, "Max": 1311, "Med": 249.5, "Med Resp": 249.5 }, "Data Analysis": { "Min": 1, "Max": 22211, "Med": 396.0, "Med Resp": 396.0 }, "Reasoning": { "Min": 1, "Max": 20275, "Med": 581.0, "Med Resp": 581.0 }, "Hallucination": { "Min": 24, "Max": 21645, "Med": 441.5, "Med Resp": 441.5 }, "Safety": { "Min": 18, "Max": 1531, "Med": 414.0, "Med Resp": 414.0 }, "Repetition": { "Min": 76, "Max": 1912, "Med": 299.5, "Med Resp": 299.5 }, "Summarization": { "Min": 1, "Max": 29578, "Med": 275.5, "Med Resp": 275.5 }, "Translation": { "Min": 9, "Max": 31839, "Med": 308.5, "Med Resp": 308.5 }, "Multi-Turn": { "Min": 3, "Max": 34276, "Med": 1167.5, "Med Resp": 1167.5 } }, "DeepSeek V3 (0324) (top_p: 0.95, temp:1.3)": { "Overall": { "Min": 1, "Max": 5178, "Med": 408.0, "Med Resp": 408.0 }, "Content Generation": { "Min": 7, "Max": 1974, "Med": 439.5, "Med Resp": 439.5 }, "Editing": { "Min": 5, "Max": 1192, "Med": 293.0, "Med Resp": 293.0 }, "Data Analysis": { "Min": 1, "Max": 3155, "Med": 330.0, "Med Resp": 330.0 }, "Reasoning": { "Min": 63, "Max": 5178, "Med": 519.0, "Med Resp": 519.0 }, "Hallucination": { "Min": 57, "Max": 1621, "Med": 502.5, "Med Resp": 502.5 }, "Safety": { "Min": 12, "Max": 1726, "Med": 337.0, "Med Resp": 337.0 }, "Repetition": { "Min": 98, "Max": 2754, "Med": 406.5, "Med Resp": 406.5 }, "Summarization": { "Min": 32, "Max": 959, "Med": 251.0, "Med Resp": 251.0 }, "Translation": { "Min": 60, "Max": 2197, "Med": 351.5, "Med Resp": 351.5 }, "Multi-Turn": { "Min": 4, "Max": 4959, "Med": 1318.5, "Med Resp": 1318.5 } }, "GLM-4.6 FP8": { "Overall": { "Min": 126, "Max": 23404, "Med": 2645.5, "Med Resp": 522.0 }, "Content Generation": { "Min": 669, "Max": 8438, "Med": 2561.5, "Med Resp": 687.5 }, "Editing": { "Min": 976, "Max": 4842, "Med": 2395.5, "Med Resp": 445.5 }, "Data Analysis": { "Min": 487, "Max": 9089, "Med": 1743.0, "Med Resp": 213.0 }, "Reasoning": { "Min": 515, "Max": 23404, "Med": 2596.0, "Med Resp": 697.0 }, "Hallucination": { "Min": 838, "Max": 10287, "Med": 2426.0, "Med Resp": 838.5 }, "Safety": { "Min": 251, "Max": 7182, "Med": 2375.0, "Med Resp": 621.0 }, "Repetition": { "Min": 1115, "Max": 9952, "Med": 2572.5, "Med Resp": 544.5 }, "Summarization": { "Min": 956, "Max": 6571, "Med": 1978.5, "Med Resp": 206.0 }, "Translation": { "Min": 126, "Max": 10588, "Med": 3456.0, "Med Resp": 421.5 }, "Multi-Turn": { "Min": 1512, "Max": 13662, "Med": 6810.5, "Med Resp": 2282.5 } }, "DeepSeek V3.2 Speciale": { "Overall": { "Min": 160, "Max": 65513, "Med": 3226.5, "Med Resp": 249.5 }, "Content Generation": { "Min": 186, "Max": 46347, "Med": 3634.0, "Med Resp": 364.0 }, "Editing": { "Min": 329, "Max": 24883, "Med": 3043.0, "Med Resp": 178.0 }, "Data Analysis": { "Min": 191, "Max": 64268, "Med": 1640.0, "Med Resp": 67.0 }, "Reasoning": { "Min": 228, "Max": 65472, "Med": 2211.5, "Med Resp": 165.0 }, "Hallucination": { "Min": 373, "Max": 23653, "Med": 3253.5, "Med Resp": 258.0 }, "Safety": { "Min": 331, "Max": 39236, "Med": 2575.0, "Med Resp": 158.0 }, "Repetition": { "Min": 356, "Max": 65513, "Med": 3357.0, "Med Resp": 246.0 }, "Summarization": { "Min": 160, "Max": 56309, "Med": 1500.0, "Med Resp": 189.5 }, "Translation": { "Min": 522, "Max": 25619, "Med": 5143.5, "Med Resp": 281.5 }, "Multi-Turn": { "Min": 244, "Max": 32258, "Med": 4282.0, "Med Resp": 854.0 } }, "Claude 4.5 Opus (think, budget: 16K)": { "Overall": { "Min": -10, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Content Generation": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Editing": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Data Analysis": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Reasoning": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Hallucination": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Safety": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Repetition": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Summarization": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Translation": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Multi-Turn": { "Min": -10, "Max": -4, "Med": -6.0, "Med Resp": -3.0 } }, "GLM-4.5 FP8": { "Overall": { "Min": 75, "Max": 65432, "Med": 1442.0, "Med Resp": 604.0 }, "Content Generation": { "Min": 322, "Max": 9320, "Med": 1283.0, "Med Resp": 655.5 }, "Editing": { "Min": 232, "Max": 10227, "Med": 1163.5, "Med Resp": 571.0 }, "Data Analysis": { "Min": 318, "Max": 15748, "Med": 1328.0, "Med Resp": 481.0 }, "Reasoning": { "Min": 558, "Max": 65432, "Med": 3187.5, "Med Resp": 653.0 }, "Hallucination": { "Min": 75, "Max": 10541, "Med": 1546.5, "Med Resp": 962.5 }, "Safety": { "Min": 159, "Max": 5552, "Med": 1418.0, "Med Resp": 808.0 }, "Repetition": { "Min": 284, "Max": 65409, "Med": 1492.0, "Med Resp": 729.5 }, "Summarization": { "Min": 242, "Max": 3610, "Med": 688.5, "Med Resp": 268.0 }, "Translation": { "Min": 156, "Max": 10043, "Med": 1448.5, "Med Resp": 414.0 }, "Multi-Turn": { "Min": 630, "Max": 15831, "Med": 3977.5, "Med Resp": 2277.5 } }, "Gauss2.3 Hybrid": { "Overall": { "Min": 7, "Max": 134423, "Med": 546.0, "Med Resp": 308.0 }, "Content Generation": { "Min": 16, "Max": 6706, "Med": 470.0, "Med Resp": 416.5 }, "Editing": { "Min": 9, "Max": 2943, "Med": 219.0, "Med Resp": 188.5 }, "Data Analysis": { "Min": 23, "Max": 131072, "Med": 585.0, "Med Resp": 192.0 }, "Reasoning": { "Min": 329, "Max": 131072, "Med": 2091.0, "Med Resp": 387.0 }, "Hallucination": { "Min": 20, "Max": 131072, "Med": 972.5, "Med Resp": 387.0 }, "Safety": { "Min": 20, "Max": 131072, "Med": 603.0, "Med Resp": 270.0 }, "Repetition": { "Min": 60, "Max": 131085, "Med": 869.5, "Med Resp": 392.0 }, "Summarization": { "Min": 26, "Max": 2114, "Med": 320.0, "Med Resp": 208.0 }, "Translation": { "Min": 7, "Max": 71270, "Med": 322.0, "Med Resp": 273.0 }, "Multi-Turn": { "Min": 7, "Max": 134423, "Med": 2478.5, "Med Resp": 1208.5 } }, "DeepSeek V3.2": { "Overall": { "Min": 134, "Max": 22816, "Med": 762.5, "Med Resp": 312.0 }, "Content Generation": { "Min": 153, "Max": 5977, "Med": 845.0, "Med Resp": 462.0 }, "Editing": { "Min": 141, "Max": 6055, "Med": 587.5, "Med Resp": 245.5 }, "Data Analysis": { "Min": 157, "Max": 13414, "Med": 695.0, "Med Resp": 166.0 }, "Reasoning": { "Min": 272, "Max": 22816, "Med": 1440.5, "Med Resp": 245.0 }, "Hallucination": { "Min": 213, "Max": 9501, "Med": 938.5, "Med Resp": 532.5 }, "Safety": { "Min": 184, "Max": 5304, "Med": 617.0, "Med Resp": 238.0 }, "Repetition": { "Min": 216, "Max": 7227, "Med": 919.5, "Med Resp": 399.0 }, "Summarization": { "Min": 134, "Max": 1750, "Med": 471.0, "Med Resp": 197.5 }, "Translation": { "Min": 154, "Max": 6364, "Med": 565.0, "Med Resp": 301.0 }, "Multi-Turn": { "Min": 401, "Max": 14066, "Med": 2538.5, "Med Resp": 1261.0 } }, "MiniMax-M2 (230B A10B)": { "Overall": { "Min": 64, "Max": 28729, "Med": 1142.0, "Med Resp": 325.0 }, "Content Generation": { "Min": 116, "Max": 16249, "Med": 1235.5, "Med Resp": 501.5 }, "Editing": { "Min": 111, "Max": 11557, "Med": 858.0, "Med Resp": 201.0 }, "Data Analysis": { "Min": 76, "Max": 18529, "Med": 834.0, "Med Resp": 170.0 }, "Reasoning": { "Min": 118, "Max": 18596, "Med": 1674.0, "Med Resp": 418.5 }, "Hallucination": { "Min": 92, "Max": 8617, "Med": 1130.0, "Med Resp": 436.0 }, "Safety": { "Min": 64, "Max": 5803, "Med": 563.0, "Med Resp": 176.0 }, "Repetition": { "Min": 175, "Max": 14147, "Med": 1054.5, "Med Resp": 259.0 }, "Summarization": { "Min": 135, "Max": 15849, "Med": 716.0, "Med Resp": 197.5 }, "Translation": { "Min": 216, "Max": 22260, "Med": 1133.0, "Med Resp": 297.5 }, "Multi-Turn": { "Min": 303, "Max": 28729, "Med": 3732.0, "Med Resp": 1424.0 } }, "gpt-oss-120B (Reasoning: medium)": { "Overall": { "Min": 43, "Max": 18693, "Med": 759.5, "Med Resp": 370.5 }, "Content Generation": { "Min": 126, "Max": 6264, "Med": 897.0, "Med Resp": 613.5 }, "Editing": { "Min": 61, "Max": 4605, "Med": 475.5, "Med Resp": 248.5 }, "Data Analysis": { "Min": 49, "Max": 6975, "Med": 596.0, "Med Resp": 213.0 }, "Reasoning": { "Min": 147, "Max": 10387, "Med": 1170.5, "Med Resp": 635.0 }, "Hallucination": { "Min": 88, "Max": 5277, "Med": 1317.0, "Med Resp": 1106.5 }, "Safety": { "Min": 43, "Max": 3651, "Med": 199.0, "Med Resp": 12.0 }, "Repetition": { "Min": 122, "Max": 6986, "Med": 940.0, "Med Resp": 407.0 }, "Summarization": { "Min": 83, "Max": 15231, "Med": 378.0, "Med Resp": 246.0 }, "Translation": { "Min": 107, "Max": 3659, "Med": 737.0, "Med Resp": 299.5 }, "Multi-Turn": { "Min": 135, "Max": 18693, "Med": 2826.0, "Med Resp": 2150.0 } }, "K2-Think": { "Overall": { "Min": 27, "Max": 8178, "Med": 1835.0, "Med Resp": 486.0 }, "Content Generation": { "Min": 138, "Max": 2049, "Med": 1821.5, "Med Resp": 660.5 }, "Editing": { "Min": 169, "Max": 2054, "Med": 1433.5, "Med Resp": 283.5 }, "Data Analysis": { "Min": 150, "Max": 2053, "Med": 1349.0, "Med Resp": 264.0 }, "Reasoning": { "Min": 419, "Max": 2048, "Med": 2045.5, "Med Resp": 576.5 }, "Hallucination": { "Min": 174, "Max": 2054, "Med": 1890.0, "Med Resp": 522.5 }, "Safety": { "Min": 27, "Max": 2048, "Med": 1393.0, "Med Resp": 405.0 }, "Repetition": { "Min": 870, "Max": 2070, "Med": 2048.0, "Med Resp": 2048.0 }, "Summarization": { "Min": 252, "Max": 2053, "Med": 1011.0, "Med Resp": 262.5 }, "Translation": { "Min": 195, "Max": 2051, "Med": 2006.0, "Med Resp": 371.5 }, "Multi-Turn": { "Min": 110, "Max": 8178, "Med": 3224.0, "Med Resp": 1526.0 } }, "MiMo V2 Flash": { "Overall": { "Min": 125, "Max": 69375, "Med": 1477.5, "Med Resp": 373.0 }, "Content Generation": { "Min": 222, "Max": 65445, "Med": 1321.5, "Med Resp": 500.5 }, "Editing": { "Min": 265, "Max": 65423, "Med": 1194.0, "Med Resp": 314.0 }, "Data Analysis": { "Min": 262, "Max": 65439, "Med": 1296.0, "Med Resp": 235.0 }, "Reasoning": { "Min": 319, "Max": 65430, "Med": 2559.5, "Med Resp": 402.5 }, "Hallucination": { "Min": 129, "Max": 65447, "Med": 1179.5, "Med Resp": 499.0 }, "Safety": { "Min": 133, "Max": 5184, "Med": 717.0, "Med Resp": 294.0 }, "Repetition": { "Min": 295, "Max": 65472, "Med": 2153.5, "Med Resp": 573.5 }, "Summarization": { "Min": 188, "Max": 64302, "Med": 789.5, "Med Resp": 220.5 }, "Translation": { "Min": 125, "Max": 65041, "Med": 1738.5, "Med Resp": 339.5 }, "Multi-Turn": { "Min": 323, "Max": 69375, "Med": 3331.5, "Med Resp": 1361.0 } }, "Qwen3 32B (think)": { "Overall": { "Min": 164, "Max": 34272, "Med": 1113.0, "Med Resp": 390.0 }, "Content Generation": { "Min": 164, "Max": 32768, "Med": 1027.5, "Med Resp": 476.0 }, "Editing": { "Min": 285, "Max": 3646, "Med": 843.0, "Med Resp": 283.0 }, "Data Analysis": { "Min": 210, "Max": 18774, "Med": 968.0, "Med Resp": 278.0 }, "Reasoning": { "Min": 477, "Max": 18676, "Med": 1759.0, "Med Resp": 459.0 }, "Hallucination": { "Min": 170, "Max": 3776, "Med": 1617.0, "Med Resp": 646.0 }, "Safety": { "Min": 169, "Max": 4053, "Med": 940.0, "Med Resp": 429.0 }, "Repetition": { "Min": 608, "Max": 32768, "Med": 2316.5, "Med Resp": 537.5 }, "Summarization": { "Min": 192, "Max": 2255, "Med": 586.0, "Med Resp": 236.5 }, "Translation": { "Min": 374, "Max": 10683, "Med": 1113.5, "Med Resp": 307.0 }, "Multi-Turn": { "Min": 493, "Max": 34272, "Med": 3210.0, "Med Resp": 1481.0 } }, "ERNIE 4.5 21B A3B Thinking": { "Overall": { "Min": 186, "Max": 66114, "Med": 1637.0, "Med Resp": 541.0 }, "Content Generation": { "Min": 302, "Max": 12760, "Med": 1586.5, "Med Resp": 654.5 }, "Editing": { "Min": 186, "Max": 8703, "Med": 1119.5, "Med Resp": 336.0 }, "Data Analysis": { "Min": 200, "Max": 31928, "Med": 1484.0, "Med Resp": 418.0 }, "Reasoning": { "Min": 511, "Max": 29184, "Med": 5312.0, "Med Resp": 669.5 }, "Hallucination": { "Min": 313, "Max": 11452, "Med": 1716.0, "Med Resp": 797.5 }, "Safety": { "Min": 213, "Max": 6914, "Med": 1242.0, "Med Resp": 599.0 }, "Repetition": { "Min": 643, "Max": 65463, "Med": 2387.0, "Med Resp": 516.5 }, "Summarization": { "Min": 215, "Max": 12449, "Med": 884.0, "Med Resp": 269.5 }, "Translation": { "Min": 298, "Max": 19672, "Med": 1466.5, "Med Resp": 421.5 }, "Multi-Turn": { "Min": 705, "Max": 66114, "Med": 4404.5, "Med Resp": 1819.0 } }, "Qwen3 235B A22B Instruct 2507": { "Overall": { "Min": 1, "Max": 65405, "Med": 433.0, "Med Resp": 433.0 }, "Content Generation": { "Min": 7, "Max": 4604, "Med": 492.5, "Med Resp": 492.5 }, "Editing": { "Min": 6, "Max": 2067, "Med": 248.5, "Med Resp": 248.5 }, "Data Analysis": { "Min": 1, "Max": 5119, "Med": 357.0, "Med Resp": 357.0 }, "Reasoning": { "Min": 1, "Max": 11933, "Med": 730.5, "Med Resp": 730.5 }, "Hallucination": { "Min": 38, "Max": 2395, "Med": 630.0, "Med Resp": 630.0 }, "Safety": { "Min": 12, "Max": 2497, "Med": 352.0, "Med Resp": 352.0 }, "Repetition": { "Min": 73, "Max": 65405, "Med": 468.5, "Med Resp": 468.5 }, "Summarization": { "Min": 24, "Max": 1899, "Med": 249.0, "Med Resp": 249.0 }, "Translation": { "Min": 10, "Max": 64183, "Med": 299.0, "Med Resp": 299.0 }, "Multi-Turn": { "Min": 3, "Max": 8009, "Med": 1728.5, "Med Resp": 1728.5 } }, "Grok-4": { "Overall": { "Min": -10, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Content Generation": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Editing": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Data Analysis": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Reasoning": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Hallucination": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Safety": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Repetition": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Summarization": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Translation": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Multi-Turn": { "Min": -10, "Max": -4, "Med": -6.0, "Med Resp": -3.0 } }, "GPT-5.2 (Reasoning: medium)": { "Overall": { "Min": 11, "Max": 7735, "Med": 347.0, "Med Resp": 264.0 }, "Content Generation": { "Min": 12, "Max": 7735, "Med": 537.0, "Med Resp": 370.0 }, "Editing": { "Min": 11, "Max": 1562, "Med": 173.5, "Med Resp": 166.0 }, "Data Analysis": { "Min": 18, "Max": 3954, "Med": 222.0, "Med Resp": 98.0 }, "Reasoning": { "Min": 29, "Max": 6895, "Med": 445.5, "Med Resp": 246.5 }, "Hallucination": { "Min": 72, "Max": 3525, "Med": 633.0, "Med Resp": 357.5 }, "Safety": { "Min": 58, "Max": 2808, "Med": 434.0, "Med Resp": 285.0 }, "Repetition": { "Min": 34, "Max": 5202, "Med": 272.0, "Med Resp": 223.0 }, "Summarization": { "Min": 37, "Max": 2339, "Med": 201.0, "Med Resp": 194.5 }, "Translation": { "Min": 12, "Max": 3684, "Med": 307.0, "Med Resp": 283.5 }, "Multi-Turn": { "Min": 41, "Max": 7003, "Med": 983.5, "Med Resp": 844.5 } }, "Gemini 3 Flash Preview (Thinking Level: High)": { "Overall": { "Min": 137, "Max": 24472, "Med": 1296.5, "Med Resp": 424.5 }, "Content Generation": { "Min": 248, "Max": 16374, "Med": 1368.5, "Med Resp": 535.5 }, "Editing": { "Min": 137, "Max": 10610, "Med": 1113.5, "Med Resp": 338.0 }, "Data Analysis": { "Min": 166, "Max": 13595, "Med": 923.0, "Med Resp": 232.0 }, "Reasoning": { "Min": 318, "Max": 24472, "Med": 1210.5, "Med Resp": 556.0 }, "Hallucination": { "Min": 349, "Max": 5023, "Med": 1295.5, "Med Resp": 639.5 }, "Safety": { "Min": 380, "Max": 5510, "Med": 1297.0, "Med Resp": 482.0 }, "Repetition": { "Min": 309, "Max": 7743, "Med": 1477.5, "Med Resp": 389.5 }, "Summarization": { "Min": 306, "Max": 18709, "Med": 905.5, "Med Resp": 195.0 }, "Translation": { "Min": 289, "Max": 17871, "Med": 1421.0, "Med Resp": 381.5 }, "Multi-Turn": { "Min": 231, "Max": 11926, "Med": 3075.5, "Med Resp": 1466.5 } }, "Kanana 2 30B A3B Thinking": { "Overall": { "Min": 584, "Max": 247274, "Med": 4263.0, "Med Resp": 854.5 }, "Content Generation": { "Min": 1055, "Max": 139421, "Med": 3898.5, "Med Resp": 1028.0 }, "Editing": { "Min": 747, "Max": 134253, "Med": 3199.0, "Med Resp": 606.5 }, "Data Analysis": { "Min": 618, "Max": 120325, "Med": 3402.0, "Med Resp": 509.0 }, "Reasoning": { "Min": 1042, "Max": 160440, "Med": 6428.5, "Med Resp": 925.5 }, "Hallucination": { "Min": 760, "Max": 137639, "Med": 4215.0, "Med Resp": 1061.5 }, "Safety": { "Min": 787, "Max": 116591, "Med": 3686.0, "Med Resp": 867.0 }, "Repetition": { "Min": 1238, "Max": 134651, "Med": 8164.0, "Med Resp": 517.5 }, "Summarization": { "Min": 584, "Max": 59519, "Med": 2540.0, "Med Resp": 656.5 }, "Translation": { "Min": 899, "Max": 131258, "Med": 4796.0, "Med Resp": 894.0 }, "Multi-Turn": { "Min": 1560, "Max": 247274, "Med": 12632.5, "Med Resp": 2593.0 } }, "Kanana 2 30B A3B Instruct": { "Overall": { "Min": 51, "Max": 177683, "Med": 1195.0, "Med Resp": 1195.0 }, "Content Generation": { "Min": 58, "Max": 12603, "Med": 1448.0, "Med Resp": 1448.0 }, "Editing": { "Min": 69, "Max": 51628, "Med": 836.5, "Med Resp": 836.5 }, "Data Analysis": { "Min": 51, "Max": 11567, "Med": 916.0, "Med Resp": 916.0 }, "Reasoning": { "Min": 51, "Max": 122001, "Med": 1587.0, "Med Resp": 1587.0 }, "Hallucination": { "Min": 104, "Max": 17989, "Med": 1419.0, "Med Resp": 1419.0 }, "Safety": { "Min": 96, "Max": 7485, "Med": 1377.0, "Med Resp": 1377.0 }, "Repetition": { "Min": 255, "Max": 177683, "Med": 844.0, "Med Resp": 844.0 }, "Summarization": { "Min": 108, "Max": 4592, "Med": 778.0, "Med Resp": 778.0 }, "Translation": { "Min": 69, "Max": 30611, "Med": 1059.0, "Med Resp": 1059.0 }, "Multi-Turn": { "Min": 119, "Max": 74203, "Med": 3252.5, "Med Resp": 3252.5 } }, "Claude 4 Opus (20250514) (think)": { "Overall": { "Min": -10, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Content Generation": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Editing": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Data Analysis": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Reasoning": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Hallucination": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Safety": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Repetition": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Summarization": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Translation": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Multi-Turn": { "Min": -10, "Max": -4, "Med": -6.0, "Med Resp": -3.0 } }, "Gemini 2.5 Pro": { "Overall": { "Min": -10, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Content Generation": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Editing": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Data Analysis": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Reasoning": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Hallucination": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Safety": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Repetition": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Summarization": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Translation": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Multi-Turn": { "Min": -10, "Max": -4, "Med": -6.0, "Med Resp": -3.0 } }, "Tongyi DeepResearch 30B A3B": { "Overall": { "Min": 153, "Max": 68912, "Med": 1147.0, "Med Resp": 408.0 }, "Content Generation": { "Min": 216, "Max": 65477, "Med": 1086.5, "Med Resp": 510.5 }, "Editing": { "Min": 251, "Max": 65470, "Med": 985.5, "Med Resp": 313.0 }, "Data Analysis": { "Min": 242, "Max": 65499, "Med": 998.0, "Med Resp": 239.0 }, "Reasoning": { "Min": 333, "Max": 65477, "Med": 2043.5, "Med Resp": 388.5 }, "Hallucination": { "Min": 194, "Max": 65501, "Med": 1344.5, "Med Resp": 593.0 }, "Safety": { "Min": 153, "Max": 65472, "Med": 992.0, "Med Resp": 392.0 }, "Repetition": { "Min": 425, "Max": 65513, "Med": 1986.5, "Med Resp": 472.5 }, "Summarization": { "Min": 290, "Max": 2410, "Med": 662.5, "Med Resp": 262.0 }, "Translation": { "Min": 360, "Max": 65406, "Med": 1107.0, "Med Resp": 317.5 }, "Multi-Turn": { "Min": 240, "Max": 68912, "Med": 3134.5, "Med Resp": 1349.5 } }, "GPT-5 mini (Reasoning: medium)": { "Overall": { "Min": -10, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Content Generation": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Editing": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Data Analysis": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Reasoning": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Hallucination": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Safety": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Repetition": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Summarization": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Translation": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Multi-Turn": { "Min": -10, "Max": -4, "Med": -6.0, "Med Resp": -3.0 } }, "Gemma 3 27B it": { "Overall": { "Min": 1, "Max": 65458, "Med": 380.0, "Med Resp": 380.0 }, "Content Generation": { "Min": 7, "Max": 3893, "Med": 484.0, "Med Resp": 484.0 }, "Editing": { "Min": 6, "Max": 1776, "Med": 254.0, "Med Resp": 254.0 }, "Data Analysis": { "Min": 1, "Max": 63850, "Med": 180.0, "Med Resp": 180.0 }, "Reasoning": { "Min": 2, "Max": 1926, "Med": 485.5, "Med Resp": 485.5 }, "Hallucination": { "Min": 13, "Max": 2494, "Med": 534.0, "Med Resp": 534.0 }, "Safety": { "Min": 31, "Max": 2440, "Med": 518.0, "Med Resp": 518.0 }, "Repetition": { "Min": 95, "Max": 65433, "Med": 299.0, "Med Resp": 299.0 }, "Summarization": { "Min": 30, "Max": 1080, "Med": 202.5, "Med Resp": 202.5 }, "Translation": { "Min": 46, "Max": 62659, "Med": 374.0, "Med Resp": 374.0 }, "Multi-Turn": { "Min": 4, "Max": 65458, "Med": 1558.0, "Med Resp": 1558.0 } }, "GLM-4.7 FP8": { "Overall": { "Min": 212, "Max": 131072, "Med": 2252.5, "Med Resp": 328.0 }, "Content Generation": { "Min": 383, "Max": 18712, "Med": 2094.0, "Med Resp": 423.0 }, "Editing": { "Min": 384, "Max": 14538, "Med": 2070.5, "Med Resp": 263.0 }, "Data Analysis": { "Min": 396, "Max": 13525, "Med": 1477.0, "Med Resp": 162.0 }, "Reasoning": { "Min": 254, "Max": 40295, "Med": 2298.5, "Med Resp": 465.5 }, "Hallucination": { "Min": 443, "Max": 19838, "Med": 2156.5, "Med Resp": 481.0 }, "Safety": { "Min": 212, "Max": 10792, "Med": 2121.0, "Med Resp": 197.0 }, "Repetition": { "Min": 768, "Max": 131072, "Med": 2963.5, "Med Resp": 289.0 }, "Summarization": { "Min": 599, "Max": 10452, "Med": 1426.0, "Med Resp": 182.5 }, "Translation": { "Min": 796, "Max": 12247, "Med": 3159.5, "Med Resp": 312.5 }, "Multi-Turn": { "Min": 420, "Max": 15706, "Med": 5514.5, "Med Resp": 1361.0 } }, "GPT-5 nano (Reasoning: medium)": { "Overall": { "Min": -10, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Content Generation": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Editing": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Data Analysis": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Reasoning": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Hallucination": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Safety": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Repetition": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Summarization": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Translation": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Multi-Turn": { "Min": -10, "Max": -4, "Med": -6.0, "Med Resp": -3.0 } }, "GPT-5.1 (Reasoning: medium, verbosity: medium)": { "Overall": { "Min": -10, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Content Generation": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Editing": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Data Analysis": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Reasoning": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Hallucination": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Safety": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Repetition": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Summarization": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Translation": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Multi-Turn": { "Min": -10, "Max": -4, "Med": -6.0, "Med Resp": -3.0 } }, "KAT Dev 72B Exp": { "Overall": { "Min": 6, "Max": 65602, "Med": 397.0, "Med Resp": 397.0 }, "Content Generation": { "Min": 26, "Max": 65466, "Med": 554.5, "Med Resp": 554.5 }, "Editing": { "Min": 13, "Max": 65363, "Med": 223.0, "Med Resp": 223.0 }, "Data Analysis": { "Min": 21, "Max": 15350, "Med": 289.0, "Med Resp": 289.0 }, "Reasoning": { "Min": 10, "Max": 65442, "Med": 487.5, "Med Resp": 487.5 }, "Hallucination": { "Min": 24, "Max": 65455, "Med": 402.0, "Med Resp": 402.0 }, "Safety": { "Min": 17, "Max": 65474, "Med": 345.0, "Med Resp": 345.0 }, "Repetition": { "Min": 96, "Max": 65602, "Med": 405.0, "Med Resp": 405.0 }, "Summarization": { "Min": 39, "Max": 65376, "Med": 292.0, "Med Resp": 292.0 }, "Translation": { "Min": 10, "Max": 65331, "Med": 339.0, "Med Resp": 339.0 }, "Multi-Turn": { "Min": 6, "Max": 65466, "Med": 1083.5, "Med Resp": 1083.5 } }, "gpt-oss-20B (Reasoning: medium)": { "Overall": { "Min": 32, "Max": 18763, "Med": 953.5, "Med Resp": 326.0 }, "Content Generation": { "Min": 126, "Max": 6343, "Med": 983.5, "Med Resp": 486.5 }, "Editing": { "Min": 107, "Max": 7213, "Med": 667.0, "Med Resp": 195.0 }, "Data Analysis": { "Min": 94, "Max": 14599, "Med": 750.0, "Med Resp": 192.0 }, "Reasoning": { "Min": 109, "Max": 18763, "Med": 1290.5, "Med Resp": 475.5 }, "Hallucination": { "Min": 132, "Max": 7937, "Med": 1493.5, "Med Resp": 620.5 }, "Safety": { "Min": 32, "Max": 6678, "Med": 268.0, "Med Resp": 12.0 }, "Repetition": { "Min": 258, "Max": 17217, "Med": 1847.0, "Med Resp": 332.5 }, "Summarization": { "Min": 99, "Max": 4060, "Med": 438.5, "Med Resp": 219.0 }, "Translation": { "Min": 133, "Max": 10446, "Med": 1028.5, "Med Resp": 290.0 }, "Multi-Turn": { "Min": 102, "Max": 14863, "Med": 2483.0, "Med Resp": 1514.0 } }, "o3-pro (Reasoning: medium)": { "Overall": { "Min": -10, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Content Generation": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Editing": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Data Analysis": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Reasoning": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Hallucination": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Safety": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Repetition": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Summarization": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Translation": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Multi-Turn": { "Min": -10, "Max": -4, "Med": -6.0, "Med Resp": -3.0 } }, "Apriel 1.5 15B Thinker": { "Overall": { "Min": 118, "Max": 74664, "Med": 2238.0, "Med Resp": 375.0 }, "Content Generation": { "Min": 216, "Max": 65428, "Med": 2397.0, "Med Resp": 565.5 }, "Editing": { "Min": 318, "Max": 65412, "Med": 1577.0, "Med Resp": 247.0 }, "Data Analysis": { "Min": 213, "Max": 65413, "Med": 1562.0, "Med Resp": 225.0 }, "Reasoning": { "Min": 307, "Max": 65372, "Med": 2393.5, "Med Resp": 548.0 }, "Hallucination": { "Min": 219, "Max": 65421, "Med": 2986.5, "Med Resp": 1016.5 }, "Safety": { "Min": 118, "Max": 65407, "Med": 380.0, "Med Resp": 12.0 }, "Repetition": { "Min": 181, "Max": 65431, "Med": 65362.5, "Med Resp": 65328.5 }, "Summarization": { "Min": 252, "Max": 65065, "Med": 981.5, "Med Resp": 238.5 }, "Translation": { "Min": 297, "Max": 65781, "Med": 3477.0, "Med Resp": 342.0 }, "Multi-Turn": { "Min": 378, "Max": 74664, "Med": 6692.0, "Med Resp": 2079.0 } }, "Gemini 2.5 Flash-lite Preview (09-2025)": { "Overall": { "Min": -10, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Content Generation": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Editing": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Data Analysis": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Reasoning": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Hallucination": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Safety": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Repetition": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Summarization": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Translation": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Multi-Turn": { "Min": -10, "Max": -4, "Med": -6.0, "Med Resp": -3.0 } }, "Mistral Small 3.2 24B Instruct 2506": { "Overall": { "Min": 1, "Max": 65516, "Med": 369.0, "Med Resp": 369.0 }, "Content Generation": { "Min": 7, "Max": 2684, "Med": 389.5, "Med Resp": 389.5 }, "Editing": { "Min": 9, "Max": 1172, "Med": 269.0, "Med Resp": 269.0 }, "Data Analysis": { "Min": 1, "Max": 3973, "Med": 295.0, "Med Resp": 295.0 }, "Reasoning": { "Min": 1, "Max": 65462, "Med": 484.5, "Med Resp": 484.5 }, "Hallucination": { "Min": 61, "Max": 5920, "Med": 489.0, "Med Resp": 489.0 }, "Safety": { "Min": 10, "Max": 65465, "Med": 320.0, "Med Resp": 320.0 }, "Repetition": { "Min": 103, "Max": 65516, "Med": 376.5, "Med Resp": 376.5 }, "Summarization": { "Min": 28, "Max": 1266, "Med": 234.5, "Med Resp": 234.5 }, "Translation": { "Min": 9, "Max": 3248, "Med": 327.0, "Med Resp": 327.0 }, "Multi-Turn": { "Min": 4, "Max": 65494, "Med": 1279.0, "Med Resp": 1279.0 } }, "Mistral Large 3 675B Instruct 2512": { "Overall": { "Min": 1, "Max": 12120, "Med": 448.0, "Med Resp": 448.0 }, "Content Generation": { "Min": 13, "Max": 6162, "Med": 565.0, "Med Resp": 565.0 }, "Editing": { "Min": 12, "Max": 2369, "Med": 299.0, "Med Resp": 299.0 }, "Data Analysis": { "Min": 1, "Max": 3902, "Med": 295.0, "Med Resp": 295.0 }, "Reasoning": { "Min": 1, "Max": 6293, "Med": 530.0, "Med Resp": 530.0 }, "Hallucination": { "Min": 54, "Max": 4461, "Med": 896.0, "Med Resp": 896.0 }, "Safety": { "Min": 27, "Max": 4250, "Med": 589.0, "Med Resp": 589.0 }, "Repetition": { "Min": 89, "Max": 5264, "Med": 448.0, "Med Resp": 448.0 }, "Summarization": { "Min": 31, "Max": 1357, "Med": 251.5, "Med Resp": 251.5 }, "Translation": { "Min": 22, "Max": 3529, "Med": 354.5, "Med Resp": 354.5 }, "Multi-Turn": { "Min": 4, "Max": 12120, "Med": 2191.5, "Med Resp": 2191.5 } }, "Mi:dm 2.0 Base Instruct": { "Overall": { "Min": 1, "Max": 32764, "Med": 316.0, "Med Resp": 316.0 }, "Content Generation": { "Min": 7, "Max": 3515, "Med": 400.0, "Med Resp": 400.0 }, "Editing": { "Min": 10, "Max": 1998, "Med": 191.0, "Med Resp": 191.0 }, "Data Analysis": { "Min": 1, "Max": 3302, "Med": 260.0, "Med Resp": 260.0 }, "Reasoning": { "Min": 1, "Max": 32071, "Med": 398.0, "Med Resp": 398.0 }, "Hallucination": { "Min": 13, "Max": 3061, "Med": 191.5, "Med Resp": 191.5 }, "Safety": { "Min": 10, "Max": 1110, "Med": 159.0, "Med Resp": 159.0 }, "Repetition": { "Min": 50, "Max": 2734, "Med": 316.5, "Med Resp": 316.5 }, "Summarization": { "Min": 35, "Max": 2967, "Med": 261.0, "Med Resp": 261.0 }, "Translation": { "Min": 7, "Max": 4703, "Med": 289.5, "Med Resp": 289.5 }, "Multi-Turn": { "Min": 3, "Max": 32764, "Med": 957.0, "Med Resp": 957.0 } }, "Qwen3 235B A22B Thinking 2507": { "Overall": { "Min": 8, "Max": 19533, "Med": 2404.5, "Med Resp": 423.0 }, "Content Generation": { "Min": 402, "Max": 13776, "Med": 2337.0, "Med Resp": 577.5 }, "Editing": { "Min": 482, "Max": 13235, "Med": 1894.5, "Med Resp": 274.5 }, "Data Analysis": { "Min": 8, "Max": 13217, "Med": 1427.0, "Med Resp": 303.0 }, "Reasoning": { "Min": 8, "Max": 19533, "Med": 2340.0, "Med Resp": 568.5 }, "Hallucination": { "Min": 305, "Max": 6670, "Med": 2005.0, "Med Resp": 848.0 }, "Safety": { "Min": 304, "Max": 8302, "Med": 1708.0, "Med Resp": 619.0 }, "Repetition": { "Min": 8, "Max": 11012, "Med": 3533.0, "Med Resp": 514.5 }, "Summarization": { "Min": 373, "Max": 11701, "Med": 1468.5, "Med Resp": 233.5 }, "Translation": { "Min": 381, "Max": 12124, "Med": 3332.5, "Med Resp": 284.0 }, "Multi-Turn": { "Min": 721, "Max": 19299, "Med": 5745.0, "Med Resp": 1736.5 } }, "HyperCLOVAX SEED Think 14B (think)": { "Overall": { "Min": 223, "Max": 131436, "Med": 1444.0, "Med Resp": 382.5 }, "Content Generation": { "Min": 279, "Max": 72029, "Med": 1222.0, "Med Resp": 476.5 }, "Editing": { "Min": 304, "Max": 65536, "Med": 1228.5, "Med Resp": 351.0 }, "Data Analysis": { "Min": 240, "Max": 65536, "Med": 1352.0, "Med Resp": 234.0 }, "Reasoning": { "Min": 414, "Max": 65536, "Med": 3010.0, "Med Resp": 315.0 }, "Hallucination": { "Min": 263, "Max": 65536, "Med": 1310.5, "Med Resp": 444.0 }, "Safety": { "Min": 241, "Max": 65536, "Med": 1100.0, "Med Resp": 412.0 }, "Repetition": { "Min": 389, "Max": 65536, "Med": 2233.0, "Med Resp": 355.0 }, "Summarization": { "Min": 223, "Max": 5987, "Med": 833.5, "Med Resp": 285.0 }, "Translation": { "Min": 457, "Max": 65536, "Med": 1611.0, "Med Resp": 352.0 }, "Multi-Turn": { "Min": 648, "Max": 131436, "Med": 3234.5, "Med Resp": 1324.5 } }, "o3": { "Overall": { "Min": -10, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Content Generation": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Editing": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Data Analysis": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Reasoning": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Hallucination": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Safety": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Repetition": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Summarization": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Translation": { "Min": -2, "Max": -2, "Med": -2.0, "Med Resp": -1.0 }, "Multi-Turn": { "Min": -10, "Max": -4, "Med": -6.0, "Med Resp": -3.0 } }, "Qwen3 30B A3B Instruct 2507": { "Overall": { "Min": 1, "Max": 65516, "Med": 441.5, "Med Resp": 441.5 }, "Content Generation": { "Min": 7, "Max": 5659, "Med": 510.5, "Med Resp": 510.5 }, "Editing": { "Min": 7, "Max": 2231, "Med": 255.0, "Med Resp": 255.0 }, "Data Analysis": { "Min": 1, "Max": 8094, "Med": 381.0, "Med Resp": 381.0 }, "Reasoning": { "Min": 1, "Max": 9376, "Med": 753.5, "Med Resp": 753.5 }, "Hallucination": { "Min": 19, "Max": 65495, "Med": 689.5, "Med Resp": 689.5 }, "Safety": { "Min": 16, "Max": 65456, "Med": 445.0, "Med Resp": 445.0 }, "Repetition": { "Min": 81, "Max": 65516, "Med": 533.5, "Med Resp": 533.5 }, "Summarization": { "Min": 34, "Max": 1870, "Med": 251.0, "Med Resp": 251.0 }, "Translation": { "Min": 8, "Max": 3257, "Med": 292.5, "Med Resp": 292.5 }, "Multi-Turn": { "Min": 3, "Max": 6825, "Med": 1809.5, "Med Resp": 1809.5 } }, "Kimi K2 Thinking": { "Overall": { "Min": 115, "Max": 65500, "Med": 1692.0, "Med Resp": 330.0 }, "Content Generation": { "Min": 115, "Max": 29508, "Med": 1696.0, "Med Resp": 478.0 }, "Editing": { "Min": 302, "Max": 11808, "Med": 1347.5, "Med Resp": 219.0 }, "Data Analysis": { "Min": 186, "Max": 65462, "Med": 978.0, "Med Resp": 156.0 }, "Reasoning": { "Min": 291, "Max": 55791, "Med": 1842.0, "Med Resp": 377.0 }, "Hallucination": { "Min": 194, "Max": 5063, "Med": 1140.5, "Med Resp": 382.5 }, "Safety": { "Min": 171, "Max": 5707, "Med": 1013.0, "Med Resp": 395.0 }, "Repetition": { "Min": 236, "Max": 65500, "Med": 1890.0, "Med Resp": 264.0 }, "Summarization": { "Min": 276, "Max": 13220, "Med": 996.0, "Med Resp": 196.5 }, "Translation": { "Min": 433, "Max": 13703, "Med": 2637.0, "Med Resp": 310.5 }, "Multi-Turn": { "Min": 333, "Max": 17384, "Med": 3771.5, "Med Resp": 1102.0 } } }