TRUEBench / src /data /open /length_data.json
Jongyoon Song
Update evaluation results (251224) & Remove time and speed-related results
ef2b66d
{
"Olmo 3 32B Think": {
"Overall": {
"Min": 210,
"Max": 65454,
"Med": 3360.5,
"Med Resp": 473.0
},
"Content Generation": {
"Min": 683,
"Max": 65300,
"Med": 3224.0,
"Med Resp": 606.0
},
"Editing": {
"Min": 580,
"Max": 14539,
"Med": 2859.0,
"Med Resp": 419.5
},
"Data Analysis": {
"Min": 369,
"Max": 23205,
"Med": 2624.0,
"Med Resp": 332.0
},
"Reasoning": {
"Min": 779,
"Max": 27491,
"Med": 3911.5,
"Med Resp": 467.5
},
"Hallucination": {
"Min": 254,
"Max": 10751,
"Med": 2472.0,
"Med Resp": 768.0
},
"Safety": {
"Min": 210,
"Max": 7162,
"Med": 2025.0,
"Med Resp": 646.0
},
"Repetition": {
"Min": 1336,
"Max": 65454,
"Med": 5205.5,
"Med Resp": 575.5
},
"Summarization": {
"Min": 380,
"Max": 17517,
"Med": 2254.0,
"Med Resp": 248.0
},
"Translation": {
"Min": 846,
"Max": 15667,
"Med": 4546.5,
"Med Resp": 349.5
},
"Multi-Turn": {
"Min": 1010,
"Max": 24077,
"Med": 6999.5,
"Med Resp": 1764.5
}
},
"Claude 4.1 Opus (20250805) (think)": {
"Overall": {
"Min": -10,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Content Generation": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Editing": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Data Analysis": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Reasoning": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Hallucination": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Safety": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Repetition": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Summarization": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Translation": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Multi-Turn": {
"Min": -10,
"Max": -4,
"Med": -6.0,
"Med Resp": -3.0
}
},
"EXAONE 4.0 32B (think)": {
"Overall": {
"Min": 37,
"Max": 142387,
"Med": 1274.5,
"Med Resp": 503.0
},
"Content Generation": {
"Min": 160,
"Max": 131068,
"Med": 1178.5,
"Med Resp": 559.0
},
"Editing": {
"Min": 37,
"Max": 10786,
"Med": 1041.0,
"Med Resp": 423.5
},
"Data Analysis": {
"Min": 229,
"Max": 131072,
"Med": 1412.0,
"Med Resp": 345.0
},
"Reasoning": {
"Min": 567,
"Max": 131076,
"Med": 3961.5,
"Med Resp": 585.5
},
"Hallucination": {
"Min": 298,
"Max": 65533,
"Med": 1247.5,
"Med Resp": 627.5
},
"Safety": {
"Min": 227,
"Max": 5093,
"Med": 1145.0,
"Med Resp": 589.0
},
"Repetition": {
"Min": 441,
"Max": 131072,
"Med": 1744.5,
"Med Resp": 579.5
},
"Summarization": {
"Min": 149,
"Max": 8423,
"Med": 693.5,
"Med Resp": 311.0
},
"Translation": {
"Min": 227,
"Max": 14234,
"Med": 915.0,
"Med Resp": 411.5
},
"Multi-Turn": {
"Min": 390,
"Max": 142387,
"Med": 3222.0,
"Med Resp": 1488.0
}
},
"DeepSeek V3.1 (think)": {
"Overall": {
"Min": 80,
"Max": 31147,
"Med": 710.5,
"Med Resp": 356.0
},
"Content Generation": {
"Min": 132,
"Max": 5354,
"Med": 776.5,
"Med Resp": 500.0
},
"Editing": {
"Min": 119,
"Max": 2063,
"Med": 571.0,
"Med Resp": 287.0
},
"Data Analysis": {
"Min": 119,
"Max": 13106,
"Med": 644.0,
"Med Resp": 218.0
},
"Reasoning": {
"Min": 259,
"Max": 31147,
"Med": 1340.5,
"Med Resp": 338.0
},
"Hallucination": {
"Min": 206,
"Max": 10356,
"Med": 1132.5,
"Med Resp": 667.0
},
"Safety": {
"Min": 80,
"Max": 3412,
"Med": 565.0,
"Med Resp": 206.0
},
"Repetition": {
"Min": 290,
"Max": 6553,
"Med": 826.5,
"Med Resp": 450.0
},
"Summarization": {
"Min": 148,
"Max": 1533,
"Med": 432.0,
"Med Resp": 211.5
},
"Translation": {
"Min": 147,
"Max": 7448,
"Med": 554.5,
"Med Resp": 320.0
},
"Multi-Turn": {
"Min": 324,
"Max": 7862,
"Med": 2558.5,
"Med Resp": 1545.0
}
},
"Qwen3 30B A3B Thinking 2507": {
"Overall": {
"Min": 305,
"Max": 32743,
"Med": 2830.0,
"Med Resp": 351.0
},
"Content Generation": {
"Min": 335,
"Max": 10914,
"Med": 2775.5,
"Med Resp": 403.5
},
"Editing": {
"Min": 371,
"Max": 7617,
"Med": 2358.5,
"Med Resp": 220.0
},
"Data Analysis": {
"Min": 305,
"Max": 19749,
"Med": 1702.0,
"Med Resp": 227.0
},
"Reasoning": {
"Min": 485,
"Max": 19485,
"Med": 2504.0,
"Med Resp": 505.0
},
"Hallucination": {
"Min": 360,
"Max": 6054,
"Med": 2123.5,
"Med Resp": 668.0
},
"Safety": {
"Min": 306,
"Max": 32688,
"Med": 1667.0,
"Med Resp": 447.0
},
"Repetition": {
"Min": 1070,
"Max": 32743,
"Med": 3719.0,
"Med Resp": 368.5
},
"Summarization": {
"Min": 435,
"Max": 14462,
"Med": 2108.0,
"Med Resp": 204.0
},
"Translation": {
"Min": 513,
"Max": 11340,
"Med": 3869.5,
"Med Resp": 276.0
},
"Multi-Turn": {
"Min": 536,
"Max": 14557,
"Med": 5822.5,
"Med Resp": 1237.0
}
},
"o4-mini": {
"Overall": {
"Min": -10,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Content Generation": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Editing": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Data Analysis": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Reasoning": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Hallucination": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Safety": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Repetition": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Summarization": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Translation": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Multi-Turn": {
"Min": -10,
"Max": -4,
"Med": -6.0,
"Med Resp": -3.0
}
},
"Gemini 2.5 Flash": {
"Overall": {
"Min": -10,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Content Generation": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Editing": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Data Analysis": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Reasoning": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Hallucination": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Safety": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Repetition": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Summarization": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Translation": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Multi-Turn": {
"Min": -10,
"Max": -4,
"Med": -6.0,
"Med Resp": -3.0
}
},
"Claude 4 Sonnet (20250514) (think)": {
"Overall": {
"Min": -10,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Content Generation": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Editing": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Data Analysis": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Reasoning": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Hallucination": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Safety": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Repetition": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Summarization": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Translation": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Multi-Turn": {
"Min": -10,
"Max": -4,
"Med": -6.0,
"Med Resp": -3.0
}
},
"Gemini 3 Pro Preview (Thinking Level: High)": {
"Overall": {
"Min": 0,
"Max": 18460,
"Med": 1930.5,
"Med Resp": 378.0
},
"Content Generation": {
"Min": 0,
"Max": 12404,
"Med": 1967.5,
"Med Resp": 570.5
},
"Editing": {
"Min": 433,
"Max": 7372,
"Med": 1684.0,
"Med Resp": 322.0
},
"Data Analysis": {
"Min": 386,
"Max": 10791,
"Med": 1357.0,
"Med Resp": 172.0
},
"Reasoning": {
"Min": 0,
"Max": 18460,
"Med": 1821.0,
"Med Resp": 502.5
},
"Hallucination": {
"Min": 524,
"Max": 6228,
"Med": 1833.0,
"Med Resp": 468.0
},
"Safety": {
"Min": 832,
"Max": 6324,
"Med": 1802.0,
"Med Resp": 291.0
},
"Repetition": {
"Min": 516,
"Max": 5086,
"Med": 1910.0,
"Med Resp": 314.0
},
"Summarization": {
"Min": 663,
"Max": 3857,
"Med": 1295.5,
"Med Resp": 184.5
},
"Translation": {
"Min": 964,
"Max": 9535,
"Med": 2286.5,
"Med Resp": 357.0
},
"Multi-Turn": {
"Min": 608,
"Max": 10590,
"Med": 4601.5,
"Med Resp": 1377.0
}
},
"Solar Pro Preview (top_p:0.95, temp: 0.7)": {
"Overall": {
"Min": 1,
"Max": 4060,
"Med": 260.0,
"Med Resp": 260.0
},
"Content Generation": {
"Min": 15,
"Max": 3643,
"Med": 426.0,
"Med Resp": 426.0
},
"Editing": {
"Min": 14,
"Max": 3948,
"Med": 218.0,
"Med Resp": 218.0
},
"Data Analysis": {
"Min": 2,
"Max": 3500,
"Med": 89.0,
"Med Resp": 89.0
},
"Reasoning": {
"Min": 1,
"Max": 3338,
"Med": 190.5,
"Med Resp": 190.5
},
"Hallucination": {
"Min": 20,
"Max": 1093,
"Med": 128.5,
"Med Resp": 128.5
},
"Safety": {
"Min": 11,
"Max": 1507,
"Med": 92.0,
"Med Resp": 92.0
},
"Repetition": {
"Min": 34,
"Max": 4060,
"Med": 214.0,
"Med Resp": 214.0
},
"Summarization": {
"Min": 43,
"Max": 2478,
"Med": 218.0,
"Med Resp": 218.0
},
"Translation": {
"Min": 20,
"Max": 1711,
"Med": 360.0,
"Med Resp": 360.0
},
"Multi-Turn": {
"Min": 5,
"Max": 3353,
"Med": 530.0,
"Med Resp": 530.0
}
},
"DeepSeek R1 (0528) (top_p: 0.95, temp:0.6)": {
"Overall": {
"Min": 4,
"Max": 16917,
"Med": 1177.5,
"Med Resp": 554.0
},
"Content Generation": {
"Min": 389,
"Max": 7861,
"Med": 1261.5,
"Med Resp": 694.0
},
"Editing": {
"Min": 4,
"Max": 7611,
"Med": 1054.5,
"Med Resp": 517.5
},
"Data Analysis": {
"Min": 4,
"Max": 8191,
"Med": 1112.0,
"Med Resp": 355.0
},
"Reasoning": {
"Min": 4,
"Max": 12257,
"Med": 1913.0,
"Med Resp": 455.5
},
"Hallucination": {
"Min": 4,
"Max": 7390,
"Med": 1214.5,
"Med Resp": 682.0
},
"Safety": {
"Min": 227,
"Max": 6387,
"Med": 963.0,
"Med Resp": 568.0
},
"Repetition": {
"Min": 4,
"Max": 7787,
"Med": 1405.5,
"Med Resp": 646.5
},
"Summarization": {
"Min": 319,
"Max": 2613,
"Med": 711.5,
"Med Resp": 321.0
},
"Translation": {
"Min": 4,
"Max": 7687,
"Med": 1021.0,
"Med Resp": 561.5
},
"Multi-Turn": {
"Min": 448,
"Max": 16917,
"Med": 3418.5,
"Med Resp": 1874.0
}
},
"A.X 4.0": {
"Overall": {
"Min": 1,
"Max": 65581,
"Med": 412.5,
"Med Resp": 412.5
},
"Content Generation": {
"Min": 2,
"Max": 65581,
"Med": 543.0,
"Med Resp": 543.0
},
"Editing": {
"Min": 8,
"Max": 1791,
"Med": 250.0,
"Med Resp": 250.0
},
"Data Analysis": {
"Min": 1,
"Max": 65537,
"Med": 267.0,
"Med Resp": 267.0
},
"Reasoning": {
"Min": 2,
"Max": 2046,
"Med": 498.0,
"Med Resp": 498.0
},
"Hallucination": {
"Min": 12,
"Max": 2639,
"Med": 511.5,
"Med Resp": 511.5
},
"Safety": {
"Min": 4,
"Max": 2942,
"Med": 516.0,
"Med Resp": 516.0
},
"Repetition": {
"Min": 84,
"Max": 65536,
"Med": 341.5,
"Med Resp": 341.5
},
"Summarization": {
"Min": 26,
"Max": 2369,
"Med": 282.0,
"Med Resp": 282.0
},
"Translation": {
"Min": 7,
"Max": 35068,
"Med": 343.0,
"Med Resp": 343.0
},
"Multi-Turn": {
"Min": 3,
"Max": 9420,
"Med": 1455.0,
"Med Resp": 1455.0
}
},
"DeepSeek V3.1 Terminus (think)": {
"Overall": {
"Min": 123,
"Max": 36717,
"Med": 831.5,
"Med Resp": 377.0
},
"Content Generation": {
"Min": 152,
"Max": 11674,
"Med": 869.0,
"Med Resp": 540.5
},
"Editing": {
"Min": 153,
"Max": 13465,
"Med": 697.0,
"Med Resp": 323.0
},
"Data Analysis": {
"Min": 163,
"Max": 18833,
"Med": 731.0,
"Med Resp": 208.0
},
"Reasoning": {
"Min": 273,
"Max": 36717,
"Med": 1471.0,
"Med Resp": 337.5
},
"Hallucination": {
"Min": 183,
"Max": 8837,
"Med": 1170.0,
"Med Resp": 743.0
},
"Safety": {
"Min": 167,
"Max": 2442,
"Med": 721.0,
"Med Resp": 294.0
},
"Repetition": {
"Min": 308,
"Max": 9694,
"Med": 997.0,
"Med Resp": 508.0
},
"Summarization": {
"Min": 157,
"Max": 1562,
"Med": 460.0,
"Med Resp": 215.0
},
"Translation": {
"Min": 123,
"Max": 9706,
"Med": 617.5,
"Med Resp": 347.0
},
"Multi-Turn": {
"Min": 297,
"Max": 16318,
"Med": 2647.5,
"Med Resp": 1623.0
}
},
"Dhanishtha-2.0 Preview": {
"Overall": {
"Min": 93,
"Max": 6076,
"Med": 520.0,
"Med Resp": 356.0
},
"Content Generation": {
"Min": 102,
"Max": 3978,
"Med": 589.0,
"Med Resp": 439.0
},
"Editing": {
"Min": 116,
"Max": 1716,
"Med": 437.5,
"Med Resp": 301.5
},
"Data Analysis": {
"Min": 116,
"Max": 4070,
"Med": 470.0,
"Med Resp": 288.0
},
"Reasoning": {
"Min": 182,
"Max": 2451,
"Med": 625.0,
"Med Resp": 366.0
},
"Hallucination": {
"Min": 160,
"Max": 4068,
"Med": 494.0,
"Med Resp": 318.5
},
"Safety": {
"Min": 121,
"Max": 1470,
"Med": 381.0,
"Med Resp": 236.0
},
"Repetition": {
"Min": 240,
"Max": 3982,
"Med": 576.5,
"Med Resp": 384.5
},
"Summarization": {
"Min": 93,
"Max": 2578,
"Med": 385.0,
"Med Resp": 289.0
},
"Translation": {
"Min": 107,
"Max": 3331,
"Med": 426.0,
"Med Resp": 331.5
},
"Multi-Turn": {
"Min": 362,
"Max": 6076,
"Med": 1462.0,
"Med Resp": 1095.0
}
},
"GPT-5 (Reasoning: medium)": {
"Overall": {
"Min": -10,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Content Generation": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Editing": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Data Analysis": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Reasoning": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Hallucination": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Safety": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Repetition": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Summarization": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Translation": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Multi-Turn": {
"Min": -10,
"Max": -4,
"Med": -6.0,
"Med Resp": -3.0
}
},
"Kanana 1.5 15.7B A3B Instruct": {
"Overall": {
"Min": 1,
"Max": 34276,
"Med": 414.0,
"Med Resp": 414.0
},
"Content Generation": {
"Min": 10,
"Max": 22194,
"Med": 463.5,
"Med Resp": 463.5
},
"Editing": {
"Min": 5,
"Max": 1311,
"Med": 249.5,
"Med Resp": 249.5
},
"Data Analysis": {
"Min": 1,
"Max": 22211,
"Med": 396.0,
"Med Resp": 396.0
},
"Reasoning": {
"Min": 1,
"Max": 20275,
"Med": 581.0,
"Med Resp": 581.0
},
"Hallucination": {
"Min": 24,
"Max": 21645,
"Med": 441.5,
"Med Resp": 441.5
},
"Safety": {
"Min": 18,
"Max": 1531,
"Med": 414.0,
"Med Resp": 414.0
},
"Repetition": {
"Min": 76,
"Max": 1912,
"Med": 299.5,
"Med Resp": 299.5
},
"Summarization": {
"Min": 1,
"Max": 29578,
"Med": 275.5,
"Med Resp": 275.5
},
"Translation": {
"Min": 9,
"Max": 31839,
"Med": 308.5,
"Med Resp": 308.5
},
"Multi-Turn": {
"Min": 3,
"Max": 34276,
"Med": 1167.5,
"Med Resp": 1167.5
}
},
"DeepSeek V3 (0324) (top_p: 0.95, temp:1.3)": {
"Overall": {
"Min": 1,
"Max": 5178,
"Med": 408.0,
"Med Resp": 408.0
},
"Content Generation": {
"Min": 7,
"Max": 1974,
"Med": 439.5,
"Med Resp": 439.5
},
"Editing": {
"Min": 5,
"Max": 1192,
"Med": 293.0,
"Med Resp": 293.0
},
"Data Analysis": {
"Min": 1,
"Max": 3155,
"Med": 330.0,
"Med Resp": 330.0
},
"Reasoning": {
"Min": 63,
"Max": 5178,
"Med": 519.0,
"Med Resp": 519.0
},
"Hallucination": {
"Min": 57,
"Max": 1621,
"Med": 502.5,
"Med Resp": 502.5
},
"Safety": {
"Min": 12,
"Max": 1726,
"Med": 337.0,
"Med Resp": 337.0
},
"Repetition": {
"Min": 98,
"Max": 2754,
"Med": 406.5,
"Med Resp": 406.5
},
"Summarization": {
"Min": 32,
"Max": 959,
"Med": 251.0,
"Med Resp": 251.0
},
"Translation": {
"Min": 60,
"Max": 2197,
"Med": 351.5,
"Med Resp": 351.5
},
"Multi-Turn": {
"Min": 4,
"Max": 4959,
"Med": 1318.5,
"Med Resp": 1318.5
}
},
"GLM-4.6 FP8": {
"Overall": {
"Min": 126,
"Max": 23404,
"Med": 2645.5,
"Med Resp": 522.0
},
"Content Generation": {
"Min": 669,
"Max": 8438,
"Med": 2561.5,
"Med Resp": 687.5
},
"Editing": {
"Min": 976,
"Max": 4842,
"Med": 2395.5,
"Med Resp": 445.5
},
"Data Analysis": {
"Min": 487,
"Max": 9089,
"Med": 1743.0,
"Med Resp": 213.0
},
"Reasoning": {
"Min": 515,
"Max": 23404,
"Med": 2596.0,
"Med Resp": 697.0
},
"Hallucination": {
"Min": 838,
"Max": 10287,
"Med": 2426.0,
"Med Resp": 838.5
},
"Safety": {
"Min": 251,
"Max": 7182,
"Med": 2375.0,
"Med Resp": 621.0
},
"Repetition": {
"Min": 1115,
"Max": 9952,
"Med": 2572.5,
"Med Resp": 544.5
},
"Summarization": {
"Min": 956,
"Max": 6571,
"Med": 1978.5,
"Med Resp": 206.0
},
"Translation": {
"Min": 126,
"Max": 10588,
"Med": 3456.0,
"Med Resp": 421.5
},
"Multi-Turn": {
"Min": 1512,
"Max": 13662,
"Med": 6810.5,
"Med Resp": 2282.5
}
},
"DeepSeek V3.2 Speciale": {
"Overall": {
"Min": 160,
"Max": 65513,
"Med": 3226.5,
"Med Resp": 249.5
},
"Content Generation": {
"Min": 186,
"Max": 46347,
"Med": 3634.0,
"Med Resp": 364.0
},
"Editing": {
"Min": 329,
"Max": 24883,
"Med": 3043.0,
"Med Resp": 178.0
},
"Data Analysis": {
"Min": 191,
"Max": 64268,
"Med": 1640.0,
"Med Resp": 67.0
},
"Reasoning": {
"Min": 228,
"Max": 65472,
"Med": 2211.5,
"Med Resp": 165.0
},
"Hallucination": {
"Min": 373,
"Max": 23653,
"Med": 3253.5,
"Med Resp": 258.0
},
"Safety": {
"Min": 331,
"Max": 39236,
"Med": 2575.0,
"Med Resp": 158.0
},
"Repetition": {
"Min": 356,
"Max": 65513,
"Med": 3357.0,
"Med Resp": 246.0
},
"Summarization": {
"Min": 160,
"Max": 56309,
"Med": 1500.0,
"Med Resp": 189.5
},
"Translation": {
"Min": 522,
"Max": 25619,
"Med": 5143.5,
"Med Resp": 281.5
},
"Multi-Turn": {
"Min": 244,
"Max": 32258,
"Med": 4282.0,
"Med Resp": 854.0
}
},
"Claude 4.5 Opus (think, budget: 16K)": {
"Overall": {
"Min": -10,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Content Generation": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Editing": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Data Analysis": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Reasoning": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Hallucination": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Safety": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Repetition": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Summarization": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Translation": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Multi-Turn": {
"Min": -10,
"Max": -4,
"Med": -6.0,
"Med Resp": -3.0
}
},
"GLM-4.5 FP8": {
"Overall": {
"Min": 75,
"Max": 65432,
"Med": 1442.0,
"Med Resp": 604.0
},
"Content Generation": {
"Min": 322,
"Max": 9320,
"Med": 1283.0,
"Med Resp": 655.5
},
"Editing": {
"Min": 232,
"Max": 10227,
"Med": 1163.5,
"Med Resp": 571.0
},
"Data Analysis": {
"Min": 318,
"Max": 15748,
"Med": 1328.0,
"Med Resp": 481.0
},
"Reasoning": {
"Min": 558,
"Max": 65432,
"Med": 3187.5,
"Med Resp": 653.0
},
"Hallucination": {
"Min": 75,
"Max": 10541,
"Med": 1546.5,
"Med Resp": 962.5
},
"Safety": {
"Min": 159,
"Max": 5552,
"Med": 1418.0,
"Med Resp": 808.0
},
"Repetition": {
"Min": 284,
"Max": 65409,
"Med": 1492.0,
"Med Resp": 729.5
},
"Summarization": {
"Min": 242,
"Max": 3610,
"Med": 688.5,
"Med Resp": 268.0
},
"Translation": {
"Min": 156,
"Max": 10043,
"Med": 1448.5,
"Med Resp": 414.0
},
"Multi-Turn": {
"Min": 630,
"Max": 15831,
"Med": 3977.5,
"Med Resp": 2277.5
}
},
"Gauss2.3 Hybrid": {
"Overall": {
"Min": 7,
"Max": 134423,
"Med": 546.0,
"Med Resp": 308.0
},
"Content Generation": {
"Min": 16,
"Max": 6706,
"Med": 470.0,
"Med Resp": 416.5
},
"Editing": {
"Min": 9,
"Max": 2943,
"Med": 219.0,
"Med Resp": 188.5
},
"Data Analysis": {
"Min": 23,
"Max": 131072,
"Med": 585.0,
"Med Resp": 192.0
},
"Reasoning": {
"Min": 329,
"Max": 131072,
"Med": 2091.0,
"Med Resp": 387.0
},
"Hallucination": {
"Min": 20,
"Max": 131072,
"Med": 972.5,
"Med Resp": 387.0
},
"Safety": {
"Min": 20,
"Max": 131072,
"Med": 603.0,
"Med Resp": 270.0
},
"Repetition": {
"Min": 60,
"Max": 131085,
"Med": 869.5,
"Med Resp": 392.0
},
"Summarization": {
"Min": 26,
"Max": 2114,
"Med": 320.0,
"Med Resp": 208.0
},
"Translation": {
"Min": 7,
"Max": 71270,
"Med": 322.0,
"Med Resp": 273.0
},
"Multi-Turn": {
"Min": 7,
"Max": 134423,
"Med": 2478.5,
"Med Resp": 1208.5
}
},
"DeepSeek V3.2": {
"Overall": {
"Min": 134,
"Max": 22816,
"Med": 762.5,
"Med Resp": 312.0
},
"Content Generation": {
"Min": 153,
"Max": 5977,
"Med": 845.0,
"Med Resp": 462.0
},
"Editing": {
"Min": 141,
"Max": 6055,
"Med": 587.5,
"Med Resp": 245.5
},
"Data Analysis": {
"Min": 157,
"Max": 13414,
"Med": 695.0,
"Med Resp": 166.0
},
"Reasoning": {
"Min": 272,
"Max": 22816,
"Med": 1440.5,
"Med Resp": 245.0
},
"Hallucination": {
"Min": 213,
"Max": 9501,
"Med": 938.5,
"Med Resp": 532.5
},
"Safety": {
"Min": 184,
"Max": 5304,
"Med": 617.0,
"Med Resp": 238.0
},
"Repetition": {
"Min": 216,
"Max": 7227,
"Med": 919.5,
"Med Resp": 399.0
},
"Summarization": {
"Min": 134,
"Max": 1750,
"Med": 471.0,
"Med Resp": 197.5
},
"Translation": {
"Min": 154,
"Max": 6364,
"Med": 565.0,
"Med Resp": 301.0
},
"Multi-Turn": {
"Min": 401,
"Max": 14066,
"Med": 2538.5,
"Med Resp": 1261.0
}
},
"MiniMax-M2 (230B A10B)": {
"Overall": {
"Min": 64,
"Max": 28729,
"Med": 1142.0,
"Med Resp": 325.0
},
"Content Generation": {
"Min": 116,
"Max": 16249,
"Med": 1235.5,
"Med Resp": 501.5
},
"Editing": {
"Min": 111,
"Max": 11557,
"Med": 858.0,
"Med Resp": 201.0
},
"Data Analysis": {
"Min": 76,
"Max": 18529,
"Med": 834.0,
"Med Resp": 170.0
},
"Reasoning": {
"Min": 118,
"Max": 18596,
"Med": 1674.0,
"Med Resp": 418.5
},
"Hallucination": {
"Min": 92,
"Max": 8617,
"Med": 1130.0,
"Med Resp": 436.0
},
"Safety": {
"Min": 64,
"Max": 5803,
"Med": 563.0,
"Med Resp": 176.0
},
"Repetition": {
"Min": 175,
"Max": 14147,
"Med": 1054.5,
"Med Resp": 259.0
},
"Summarization": {
"Min": 135,
"Max": 15849,
"Med": 716.0,
"Med Resp": 197.5
},
"Translation": {
"Min": 216,
"Max": 22260,
"Med": 1133.0,
"Med Resp": 297.5
},
"Multi-Turn": {
"Min": 303,
"Max": 28729,
"Med": 3732.0,
"Med Resp": 1424.0
}
},
"gpt-oss-120B (Reasoning: medium)": {
"Overall": {
"Min": 43,
"Max": 18693,
"Med": 759.5,
"Med Resp": 370.5
},
"Content Generation": {
"Min": 126,
"Max": 6264,
"Med": 897.0,
"Med Resp": 613.5
},
"Editing": {
"Min": 61,
"Max": 4605,
"Med": 475.5,
"Med Resp": 248.5
},
"Data Analysis": {
"Min": 49,
"Max": 6975,
"Med": 596.0,
"Med Resp": 213.0
},
"Reasoning": {
"Min": 147,
"Max": 10387,
"Med": 1170.5,
"Med Resp": 635.0
},
"Hallucination": {
"Min": 88,
"Max": 5277,
"Med": 1317.0,
"Med Resp": 1106.5
},
"Safety": {
"Min": 43,
"Max": 3651,
"Med": 199.0,
"Med Resp": 12.0
},
"Repetition": {
"Min": 122,
"Max": 6986,
"Med": 940.0,
"Med Resp": 407.0
},
"Summarization": {
"Min": 83,
"Max": 15231,
"Med": 378.0,
"Med Resp": 246.0
},
"Translation": {
"Min": 107,
"Max": 3659,
"Med": 737.0,
"Med Resp": 299.5
},
"Multi-Turn": {
"Min": 135,
"Max": 18693,
"Med": 2826.0,
"Med Resp": 2150.0
}
},
"K2-Think": {
"Overall": {
"Min": 27,
"Max": 8178,
"Med": 1835.0,
"Med Resp": 486.0
},
"Content Generation": {
"Min": 138,
"Max": 2049,
"Med": 1821.5,
"Med Resp": 660.5
},
"Editing": {
"Min": 169,
"Max": 2054,
"Med": 1433.5,
"Med Resp": 283.5
},
"Data Analysis": {
"Min": 150,
"Max": 2053,
"Med": 1349.0,
"Med Resp": 264.0
},
"Reasoning": {
"Min": 419,
"Max": 2048,
"Med": 2045.5,
"Med Resp": 576.5
},
"Hallucination": {
"Min": 174,
"Max": 2054,
"Med": 1890.0,
"Med Resp": 522.5
},
"Safety": {
"Min": 27,
"Max": 2048,
"Med": 1393.0,
"Med Resp": 405.0
},
"Repetition": {
"Min": 870,
"Max": 2070,
"Med": 2048.0,
"Med Resp": 2048.0
},
"Summarization": {
"Min": 252,
"Max": 2053,
"Med": 1011.0,
"Med Resp": 262.5
},
"Translation": {
"Min": 195,
"Max": 2051,
"Med": 2006.0,
"Med Resp": 371.5
},
"Multi-Turn": {
"Min": 110,
"Max": 8178,
"Med": 3224.0,
"Med Resp": 1526.0
}
},
"MiMo V2 Flash": {
"Overall": {
"Min": 125,
"Max": 69375,
"Med": 1477.5,
"Med Resp": 373.0
},
"Content Generation": {
"Min": 222,
"Max": 65445,
"Med": 1321.5,
"Med Resp": 500.5
},
"Editing": {
"Min": 265,
"Max": 65423,
"Med": 1194.0,
"Med Resp": 314.0
},
"Data Analysis": {
"Min": 262,
"Max": 65439,
"Med": 1296.0,
"Med Resp": 235.0
},
"Reasoning": {
"Min": 319,
"Max": 65430,
"Med": 2559.5,
"Med Resp": 402.5
},
"Hallucination": {
"Min": 129,
"Max": 65447,
"Med": 1179.5,
"Med Resp": 499.0
},
"Safety": {
"Min": 133,
"Max": 5184,
"Med": 717.0,
"Med Resp": 294.0
},
"Repetition": {
"Min": 295,
"Max": 65472,
"Med": 2153.5,
"Med Resp": 573.5
},
"Summarization": {
"Min": 188,
"Max": 64302,
"Med": 789.5,
"Med Resp": 220.5
},
"Translation": {
"Min": 125,
"Max": 65041,
"Med": 1738.5,
"Med Resp": 339.5
},
"Multi-Turn": {
"Min": 323,
"Max": 69375,
"Med": 3331.5,
"Med Resp": 1361.0
}
},
"Qwen3 32B (think)": {
"Overall": {
"Min": 164,
"Max": 34272,
"Med": 1113.0,
"Med Resp": 390.0
},
"Content Generation": {
"Min": 164,
"Max": 32768,
"Med": 1027.5,
"Med Resp": 476.0
},
"Editing": {
"Min": 285,
"Max": 3646,
"Med": 843.0,
"Med Resp": 283.0
},
"Data Analysis": {
"Min": 210,
"Max": 18774,
"Med": 968.0,
"Med Resp": 278.0
},
"Reasoning": {
"Min": 477,
"Max": 18676,
"Med": 1759.0,
"Med Resp": 459.0
},
"Hallucination": {
"Min": 170,
"Max": 3776,
"Med": 1617.0,
"Med Resp": 646.0
},
"Safety": {
"Min": 169,
"Max": 4053,
"Med": 940.0,
"Med Resp": 429.0
},
"Repetition": {
"Min": 608,
"Max": 32768,
"Med": 2316.5,
"Med Resp": 537.5
},
"Summarization": {
"Min": 192,
"Max": 2255,
"Med": 586.0,
"Med Resp": 236.5
},
"Translation": {
"Min": 374,
"Max": 10683,
"Med": 1113.5,
"Med Resp": 307.0
},
"Multi-Turn": {
"Min": 493,
"Max": 34272,
"Med": 3210.0,
"Med Resp": 1481.0
}
},
"ERNIE 4.5 21B A3B Thinking": {
"Overall": {
"Min": 186,
"Max": 66114,
"Med": 1637.0,
"Med Resp": 541.0
},
"Content Generation": {
"Min": 302,
"Max": 12760,
"Med": 1586.5,
"Med Resp": 654.5
},
"Editing": {
"Min": 186,
"Max": 8703,
"Med": 1119.5,
"Med Resp": 336.0
},
"Data Analysis": {
"Min": 200,
"Max": 31928,
"Med": 1484.0,
"Med Resp": 418.0
},
"Reasoning": {
"Min": 511,
"Max": 29184,
"Med": 5312.0,
"Med Resp": 669.5
},
"Hallucination": {
"Min": 313,
"Max": 11452,
"Med": 1716.0,
"Med Resp": 797.5
},
"Safety": {
"Min": 213,
"Max": 6914,
"Med": 1242.0,
"Med Resp": 599.0
},
"Repetition": {
"Min": 643,
"Max": 65463,
"Med": 2387.0,
"Med Resp": 516.5
},
"Summarization": {
"Min": 215,
"Max": 12449,
"Med": 884.0,
"Med Resp": 269.5
},
"Translation": {
"Min": 298,
"Max": 19672,
"Med": 1466.5,
"Med Resp": 421.5
},
"Multi-Turn": {
"Min": 705,
"Max": 66114,
"Med": 4404.5,
"Med Resp": 1819.0
}
},
"Qwen3 235B A22B Instruct 2507": {
"Overall": {
"Min": 1,
"Max": 65405,
"Med": 433.0,
"Med Resp": 433.0
},
"Content Generation": {
"Min": 7,
"Max": 4604,
"Med": 492.5,
"Med Resp": 492.5
},
"Editing": {
"Min": 6,
"Max": 2067,
"Med": 248.5,
"Med Resp": 248.5
},
"Data Analysis": {
"Min": 1,
"Max": 5119,
"Med": 357.0,
"Med Resp": 357.0
},
"Reasoning": {
"Min": 1,
"Max": 11933,
"Med": 730.5,
"Med Resp": 730.5
},
"Hallucination": {
"Min": 38,
"Max": 2395,
"Med": 630.0,
"Med Resp": 630.0
},
"Safety": {
"Min": 12,
"Max": 2497,
"Med": 352.0,
"Med Resp": 352.0
},
"Repetition": {
"Min": 73,
"Max": 65405,
"Med": 468.5,
"Med Resp": 468.5
},
"Summarization": {
"Min": 24,
"Max": 1899,
"Med": 249.0,
"Med Resp": 249.0
},
"Translation": {
"Min": 10,
"Max": 64183,
"Med": 299.0,
"Med Resp": 299.0
},
"Multi-Turn": {
"Min": 3,
"Max": 8009,
"Med": 1728.5,
"Med Resp": 1728.5
}
},
"Grok-4": {
"Overall": {
"Min": -10,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Content Generation": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Editing": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Data Analysis": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Reasoning": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Hallucination": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Safety": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Repetition": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Summarization": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Translation": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Multi-Turn": {
"Min": -10,
"Max": -4,
"Med": -6.0,
"Med Resp": -3.0
}
},
"GPT-5.2 (Reasoning: medium)": {
"Overall": {
"Min": 11,
"Max": 7735,
"Med": 347.0,
"Med Resp": 264.0
},
"Content Generation": {
"Min": 12,
"Max": 7735,
"Med": 537.0,
"Med Resp": 370.0
},
"Editing": {
"Min": 11,
"Max": 1562,
"Med": 173.5,
"Med Resp": 166.0
},
"Data Analysis": {
"Min": 18,
"Max": 3954,
"Med": 222.0,
"Med Resp": 98.0
},
"Reasoning": {
"Min": 29,
"Max": 6895,
"Med": 445.5,
"Med Resp": 246.5
},
"Hallucination": {
"Min": 72,
"Max": 3525,
"Med": 633.0,
"Med Resp": 357.5
},
"Safety": {
"Min": 58,
"Max": 2808,
"Med": 434.0,
"Med Resp": 285.0
},
"Repetition": {
"Min": 34,
"Max": 5202,
"Med": 272.0,
"Med Resp": 223.0
},
"Summarization": {
"Min": 37,
"Max": 2339,
"Med": 201.0,
"Med Resp": 194.5
},
"Translation": {
"Min": 12,
"Max": 3684,
"Med": 307.0,
"Med Resp": 283.5
},
"Multi-Turn": {
"Min": 41,
"Max": 7003,
"Med": 983.5,
"Med Resp": 844.5
}
},
"Gemini 3 Flash Preview (Thinking Level: High)": {
"Overall": {
"Min": 137,
"Max": 24472,
"Med": 1296.5,
"Med Resp": 424.5
},
"Content Generation": {
"Min": 248,
"Max": 16374,
"Med": 1368.5,
"Med Resp": 535.5
},
"Editing": {
"Min": 137,
"Max": 10610,
"Med": 1113.5,
"Med Resp": 338.0
},
"Data Analysis": {
"Min": 166,
"Max": 13595,
"Med": 923.0,
"Med Resp": 232.0
},
"Reasoning": {
"Min": 318,
"Max": 24472,
"Med": 1210.5,
"Med Resp": 556.0
},
"Hallucination": {
"Min": 349,
"Max": 5023,
"Med": 1295.5,
"Med Resp": 639.5
},
"Safety": {
"Min": 380,
"Max": 5510,
"Med": 1297.0,
"Med Resp": 482.0
},
"Repetition": {
"Min": 309,
"Max": 7743,
"Med": 1477.5,
"Med Resp": 389.5
},
"Summarization": {
"Min": 306,
"Max": 18709,
"Med": 905.5,
"Med Resp": 195.0
},
"Translation": {
"Min": 289,
"Max": 17871,
"Med": 1421.0,
"Med Resp": 381.5
},
"Multi-Turn": {
"Min": 231,
"Max": 11926,
"Med": 3075.5,
"Med Resp": 1466.5
}
},
"Kanana 2 30B A3B Thinking": {
"Overall": {
"Min": 584,
"Max": 247274,
"Med": 4263.0,
"Med Resp": 854.5
},
"Content Generation": {
"Min": 1055,
"Max": 139421,
"Med": 3898.5,
"Med Resp": 1028.0
},
"Editing": {
"Min": 747,
"Max": 134253,
"Med": 3199.0,
"Med Resp": 606.5
},
"Data Analysis": {
"Min": 618,
"Max": 120325,
"Med": 3402.0,
"Med Resp": 509.0
},
"Reasoning": {
"Min": 1042,
"Max": 160440,
"Med": 6428.5,
"Med Resp": 925.5
},
"Hallucination": {
"Min": 760,
"Max": 137639,
"Med": 4215.0,
"Med Resp": 1061.5
},
"Safety": {
"Min": 787,
"Max": 116591,
"Med": 3686.0,
"Med Resp": 867.0
},
"Repetition": {
"Min": 1238,
"Max": 134651,
"Med": 8164.0,
"Med Resp": 517.5
},
"Summarization": {
"Min": 584,
"Max": 59519,
"Med": 2540.0,
"Med Resp": 656.5
},
"Translation": {
"Min": 899,
"Max": 131258,
"Med": 4796.0,
"Med Resp": 894.0
},
"Multi-Turn": {
"Min": 1560,
"Max": 247274,
"Med": 12632.5,
"Med Resp": 2593.0
}
},
"Kanana 2 30B A3B Instruct": {
"Overall": {
"Min": 51,
"Max": 177683,
"Med": 1195.0,
"Med Resp": 1195.0
},
"Content Generation": {
"Min": 58,
"Max": 12603,
"Med": 1448.0,
"Med Resp": 1448.0
},
"Editing": {
"Min": 69,
"Max": 51628,
"Med": 836.5,
"Med Resp": 836.5
},
"Data Analysis": {
"Min": 51,
"Max": 11567,
"Med": 916.0,
"Med Resp": 916.0
},
"Reasoning": {
"Min": 51,
"Max": 122001,
"Med": 1587.0,
"Med Resp": 1587.0
},
"Hallucination": {
"Min": 104,
"Max": 17989,
"Med": 1419.0,
"Med Resp": 1419.0
},
"Safety": {
"Min": 96,
"Max": 7485,
"Med": 1377.0,
"Med Resp": 1377.0
},
"Repetition": {
"Min": 255,
"Max": 177683,
"Med": 844.0,
"Med Resp": 844.0
},
"Summarization": {
"Min": 108,
"Max": 4592,
"Med": 778.0,
"Med Resp": 778.0
},
"Translation": {
"Min": 69,
"Max": 30611,
"Med": 1059.0,
"Med Resp": 1059.0
},
"Multi-Turn": {
"Min": 119,
"Max": 74203,
"Med": 3252.5,
"Med Resp": 3252.5
}
},
"Claude 4 Opus (20250514) (think)": {
"Overall": {
"Min": -10,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Content Generation": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Editing": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Data Analysis": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Reasoning": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Hallucination": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Safety": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Repetition": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Summarization": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Translation": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Multi-Turn": {
"Min": -10,
"Max": -4,
"Med": -6.0,
"Med Resp": -3.0
}
},
"Gemini 2.5 Pro": {
"Overall": {
"Min": -10,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Content Generation": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Editing": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Data Analysis": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Reasoning": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Hallucination": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Safety": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Repetition": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Summarization": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Translation": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Multi-Turn": {
"Min": -10,
"Max": -4,
"Med": -6.0,
"Med Resp": -3.0
}
},
"Tongyi DeepResearch 30B A3B": {
"Overall": {
"Min": 153,
"Max": 68912,
"Med": 1147.0,
"Med Resp": 408.0
},
"Content Generation": {
"Min": 216,
"Max": 65477,
"Med": 1086.5,
"Med Resp": 510.5
},
"Editing": {
"Min": 251,
"Max": 65470,
"Med": 985.5,
"Med Resp": 313.0
},
"Data Analysis": {
"Min": 242,
"Max": 65499,
"Med": 998.0,
"Med Resp": 239.0
},
"Reasoning": {
"Min": 333,
"Max": 65477,
"Med": 2043.5,
"Med Resp": 388.5
},
"Hallucination": {
"Min": 194,
"Max": 65501,
"Med": 1344.5,
"Med Resp": 593.0
},
"Safety": {
"Min": 153,
"Max": 65472,
"Med": 992.0,
"Med Resp": 392.0
},
"Repetition": {
"Min": 425,
"Max": 65513,
"Med": 1986.5,
"Med Resp": 472.5
},
"Summarization": {
"Min": 290,
"Max": 2410,
"Med": 662.5,
"Med Resp": 262.0
},
"Translation": {
"Min": 360,
"Max": 65406,
"Med": 1107.0,
"Med Resp": 317.5
},
"Multi-Turn": {
"Min": 240,
"Max": 68912,
"Med": 3134.5,
"Med Resp": 1349.5
}
},
"GPT-5 mini (Reasoning: medium)": {
"Overall": {
"Min": -10,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Content Generation": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Editing": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Data Analysis": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Reasoning": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Hallucination": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Safety": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Repetition": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Summarization": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Translation": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Multi-Turn": {
"Min": -10,
"Max": -4,
"Med": -6.0,
"Med Resp": -3.0
}
},
"Gemma 3 27B it": {
"Overall": {
"Min": 1,
"Max": 65458,
"Med": 380.0,
"Med Resp": 380.0
},
"Content Generation": {
"Min": 7,
"Max": 3893,
"Med": 484.0,
"Med Resp": 484.0
},
"Editing": {
"Min": 6,
"Max": 1776,
"Med": 254.0,
"Med Resp": 254.0
},
"Data Analysis": {
"Min": 1,
"Max": 63850,
"Med": 180.0,
"Med Resp": 180.0
},
"Reasoning": {
"Min": 2,
"Max": 1926,
"Med": 485.5,
"Med Resp": 485.5
},
"Hallucination": {
"Min": 13,
"Max": 2494,
"Med": 534.0,
"Med Resp": 534.0
},
"Safety": {
"Min": 31,
"Max": 2440,
"Med": 518.0,
"Med Resp": 518.0
},
"Repetition": {
"Min": 95,
"Max": 65433,
"Med": 299.0,
"Med Resp": 299.0
},
"Summarization": {
"Min": 30,
"Max": 1080,
"Med": 202.5,
"Med Resp": 202.5
},
"Translation": {
"Min": 46,
"Max": 62659,
"Med": 374.0,
"Med Resp": 374.0
},
"Multi-Turn": {
"Min": 4,
"Max": 65458,
"Med": 1558.0,
"Med Resp": 1558.0
}
},
"GLM-4.7 FP8": {
"Overall": {
"Min": 212,
"Max": 131072,
"Med": 2252.5,
"Med Resp": 328.0
},
"Content Generation": {
"Min": 383,
"Max": 18712,
"Med": 2094.0,
"Med Resp": 423.0
},
"Editing": {
"Min": 384,
"Max": 14538,
"Med": 2070.5,
"Med Resp": 263.0
},
"Data Analysis": {
"Min": 396,
"Max": 13525,
"Med": 1477.0,
"Med Resp": 162.0
},
"Reasoning": {
"Min": 254,
"Max": 40295,
"Med": 2298.5,
"Med Resp": 465.5
},
"Hallucination": {
"Min": 443,
"Max": 19838,
"Med": 2156.5,
"Med Resp": 481.0
},
"Safety": {
"Min": 212,
"Max": 10792,
"Med": 2121.0,
"Med Resp": 197.0
},
"Repetition": {
"Min": 768,
"Max": 131072,
"Med": 2963.5,
"Med Resp": 289.0
},
"Summarization": {
"Min": 599,
"Max": 10452,
"Med": 1426.0,
"Med Resp": 182.5
},
"Translation": {
"Min": 796,
"Max": 12247,
"Med": 3159.5,
"Med Resp": 312.5
},
"Multi-Turn": {
"Min": 420,
"Max": 15706,
"Med": 5514.5,
"Med Resp": 1361.0
}
},
"GPT-5 nano (Reasoning: medium)": {
"Overall": {
"Min": -10,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Content Generation": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Editing": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Data Analysis": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Reasoning": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Hallucination": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Safety": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Repetition": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Summarization": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Translation": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Multi-Turn": {
"Min": -10,
"Max": -4,
"Med": -6.0,
"Med Resp": -3.0
}
},
"GPT-5.1 (Reasoning: medium, verbosity: medium)": {
"Overall": {
"Min": -10,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Content Generation": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Editing": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Data Analysis": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Reasoning": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Hallucination": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Safety": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Repetition": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Summarization": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Translation": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Multi-Turn": {
"Min": -10,
"Max": -4,
"Med": -6.0,
"Med Resp": -3.0
}
},
"KAT Dev 72B Exp": {
"Overall": {
"Min": 6,
"Max": 65602,
"Med": 397.0,
"Med Resp": 397.0
},
"Content Generation": {
"Min": 26,
"Max": 65466,
"Med": 554.5,
"Med Resp": 554.5
},
"Editing": {
"Min": 13,
"Max": 65363,
"Med": 223.0,
"Med Resp": 223.0
},
"Data Analysis": {
"Min": 21,
"Max": 15350,
"Med": 289.0,
"Med Resp": 289.0
},
"Reasoning": {
"Min": 10,
"Max": 65442,
"Med": 487.5,
"Med Resp": 487.5
},
"Hallucination": {
"Min": 24,
"Max": 65455,
"Med": 402.0,
"Med Resp": 402.0
},
"Safety": {
"Min": 17,
"Max": 65474,
"Med": 345.0,
"Med Resp": 345.0
},
"Repetition": {
"Min": 96,
"Max": 65602,
"Med": 405.0,
"Med Resp": 405.0
},
"Summarization": {
"Min": 39,
"Max": 65376,
"Med": 292.0,
"Med Resp": 292.0
},
"Translation": {
"Min": 10,
"Max": 65331,
"Med": 339.0,
"Med Resp": 339.0
},
"Multi-Turn": {
"Min": 6,
"Max": 65466,
"Med": 1083.5,
"Med Resp": 1083.5
}
},
"gpt-oss-20B (Reasoning: medium)": {
"Overall": {
"Min": 32,
"Max": 18763,
"Med": 953.5,
"Med Resp": 326.0
},
"Content Generation": {
"Min": 126,
"Max": 6343,
"Med": 983.5,
"Med Resp": 486.5
},
"Editing": {
"Min": 107,
"Max": 7213,
"Med": 667.0,
"Med Resp": 195.0
},
"Data Analysis": {
"Min": 94,
"Max": 14599,
"Med": 750.0,
"Med Resp": 192.0
},
"Reasoning": {
"Min": 109,
"Max": 18763,
"Med": 1290.5,
"Med Resp": 475.5
},
"Hallucination": {
"Min": 132,
"Max": 7937,
"Med": 1493.5,
"Med Resp": 620.5
},
"Safety": {
"Min": 32,
"Max": 6678,
"Med": 268.0,
"Med Resp": 12.0
},
"Repetition": {
"Min": 258,
"Max": 17217,
"Med": 1847.0,
"Med Resp": 332.5
},
"Summarization": {
"Min": 99,
"Max": 4060,
"Med": 438.5,
"Med Resp": 219.0
},
"Translation": {
"Min": 133,
"Max": 10446,
"Med": 1028.5,
"Med Resp": 290.0
},
"Multi-Turn": {
"Min": 102,
"Max": 14863,
"Med": 2483.0,
"Med Resp": 1514.0
}
},
"o3-pro (Reasoning: medium)": {
"Overall": {
"Min": -10,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Content Generation": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Editing": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Data Analysis": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Reasoning": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Hallucination": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Safety": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Repetition": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Summarization": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Translation": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Multi-Turn": {
"Min": -10,
"Max": -4,
"Med": -6.0,
"Med Resp": -3.0
}
},
"Apriel 1.5 15B Thinker": {
"Overall": {
"Min": 118,
"Max": 74664,
"Med": 2238.0,
"Med Resp": 375.0
},
"Content Generation": {
"Min": 216,
"Max": 65428,
"Med": 2397.0,
"Med Resp": 565.5
},
"Editing": {
"Min": 318,
"Max": 65412,
"Med": 1577.0,
"Med Resp": 247.0
},
"Data Analysis": {
"Min": 213,
"Max": 65413,
"Med": 1562.0,
"Med Resp": 225.0
},
"Reasoning": {
"Min": 307,
"Max": 65372,
"Med": 2393.5,
"Med Resp": 548.0
},
"Hallucination": {
"Min": 219,
"Max": 65421,
"Med": 2986.5,
"Med Resp": 1016.5
},
"Safety": {
"Min": 118,
"Max": 65407,
"Med": 380.0,
"Med Resp": 12.0
},
"Repetition": {
"Min": 181,
"Max": 65431,
"Med": 65362.5,
"Med Resp": 65328.5
},
"Summarization": {
"Min": 252,
"Max": 65065,
"Med": 981.5,
"Med Resp": 238.5
},
"Translation": {
"Min": 297,
"Max": 65781,
"Med": 3477.0,
"Med Resp": 342.0
},
"Multi-Turn": {
"Min": 378,
"Max": 74664,
"Med": 6692.0,
"Med Resp": 2079.0
}
},
"Gemini 2.5 Flash-lite Preview (09-2025)": {
"Overall": {
"Min": -10,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Content Generation": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Editing": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Data Analysis": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Reasoning": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Hallucination": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Safety": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Repetition": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Summarization": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Translation": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Multi-Turn": {
"Min": -10,
"Max": -4,
"Med": -6.0,
"Med Resp": -3.0
}
},
"Mistral Small 3.2 24B Instruct 2506": {
"Overall": {
"Min": 1,
"Max": 65516,
"Med": 369.0,
"Med Resp": 369.0
},
"Content Generation": {
"Min": 7,
"Max": 2684,
"Med": 389.5,
"Med Resp": 389.5
},
"Editing": {
"Min": 9,
"Max": 1172,
"Med": 269.0,
"Med Resp": 269.0
},
"Data Analysis": {
"Min": 1,
"Max": 3973,
"Med": 295.0,
"Med Resp": 295.0
},
"Reasoning": {
"Min": 1,
"Max": 65462,
"Med": 484.5,
"Med Resp": 484.5
},
"Hallucination": {
"Min": 61,
"Max": 5920,
"Med": 489.0,
"Med Resp": 489.0
},
"Safety": {
"Min": 10,
"Max": 65465,
"Med": 320.0,
"Med Resp": 320.0
},
"Repetition": {
"Min": 103,
"Max": 65516,
"Med": 376.5,
"Med Resp": 376.5
},
"Summarization": {
"Min": 28,
"Max": 1266,
"Med": 234.5,
"Med Resp": 234.5
},
"Translation": {
"Min": 9,
"Max": 3248,
"Med": 327.0,
"Med Resp": 327.0
},
"Multi-Turn": {
"Min": 4,
"Max": 65494,
"Med": 1279.0,
"Med Resp": 1279.0
}
},
"Mistral Large 3 675B Instruct 2512": {
"Overall": {
"Min": 1,
"Max": 12120,
"Med": 448.0,
"Med Resp": 448.0
},
"Content Generation": {
"Min": 13,
"Max": 6162,
"Med": 565.0,
"Med Resp": 565.0
},
"Editing": {
"Min": 12,
"Max": 2369,
"Med": 299.0,
"Med Resp": 299.0
},
"Data Analysis": {
"Min": 1,
"Max": 3902,
"Med": 295.0,
"Med Resp": 295.0
},
"Reasoning": {
"Min": 1,
"Max": 6293,
"Med": 530.0,
"Med Resp": 530.0
},
"Hallucination": {
"Min": 54,
"Max": 4461,
"Med": 896.0,
"Med Resp": 896.0
},
"Safety": {
"Min": 27,
"Max": 4250,
"Med": 589.0,
"Med Resp": 589.0
},
"Repetition": {
"Min": 89,
"Max": 5264,
"Med": 448.0,
"Med Resp": 448.0
},
"Summarization": {
"Min": 31,
"Max": 1357,
"Med": 251.5,
"Med Resp": 251.5
},
"Translation": {
"Min": 22,
"Max": 3529,
"Med": 354.5,
"Med Resp": 354.5
},
"Multi-Turn": {
"Min": 4,
"Max": 12120,
"Med": 2191.5,
"Med Resp": 2191.5
}
},
"Mi:dm 2.0 Base Instruct": {
"Overall": {
"Min": 1,
"Max": 32764,
"Med": 316.0,
"Med Resp": 316.0
},
"Content Generation": {
"Min": 7,
"Max": 3515,
"Med": 400.0,
"Med Resp": 400.0
},
"Editing": {
"Min": 10,
"Max": 1998,
"Med": 191.0,
"Med Resp": 191.0
},
"Data Analysis": {
"Min": 1,
"Max": 3302,
"Med": 260.0,
"Med Resp": 260.0
},
"Reasoning": {
"Min": 1,
"Max": 32071,
"Med": 398.0,
"Med Resp": 398.0
},
"Hallucination": {
"Min": 13,
"Max": 3061,
"Med": 191.5,
"Med Resp": 191.5
},
"Safety": {
"Min": 10,
"Max": 1110,
"Med": 159.0,
"Med Resp": 159.0
},
"Repetition": {
"Min": 50,
"Max": 2734,
"Med": 316.5,
"Med Resp": 316.5
},
"Summarization": {
"Min": 35,
"Max": 2967,
"Med": 261.0,
"Med Resp": 261.0
},
"Translation": {
"Min": 7,
"Max": 4703,
"Med": 289.5,
"Med Resp": 289.5
},
"Multi-Turn": {
"Min": 3,
"Max": 32764,
"Med": 957.0,
"Med Resp": 957.0
}
},
"Qwen3 235B A22B Thinking 2507": {
"Overall": {
"Min": 8,
"Max": 19533,
"Med": 2404.5,
"Med Resp": 423.0
},
"Content Generation": {
"Min": 402,
"Max": 13776,
"Med": 2337.0,
"Med Resp": 577.5
},
"Editing": {
"Min": 482,
"Max": 13235,
"Med": 1894.5,
"Med Resp": 274.5
},
"Data Analysis": {
"Min": 8,
"Max": 13217,
"Med": 1427.0,
"Med Resp": 303.0
},
"Reasoning": {
"Min": 8,
"Max": 19533,
"Med": 2340.0,
"Med Resp": 568.5
},
"Hallucination": {
"Min": 305,
"Max": 6670,
"Med": 2005.0,
"Med Resp": 848.0
},
"Safety": {
"Min": 304,
"Max": 8302,
"Med": 1708.0,
"Med Resp": 619.0
},
"Repetition": {
"Min": 8,
"Max": 11012,
"Med": 3533.0,
"Med Resp": 514.5
},
"Summarization": {
"Min": 373,
"Max": 11701,
"Med": 1468.5,
"Med Resp": 233.5
},
"Translation": {
"Min": 381,
"Max": 12124,
"Med": 3332.5,
"Med Resp": 284.0
},
"Multi-Turn": {
"Min": 721,
"Max": 19299,
"Med": 5745.0,
"Med Resp": 1736.5
}
},
"HyperCLOVAX SEED Think 14B (think)": {
"Overall": {
"Min": 223,
"Max": 131436,
"Med": 1444.0,
"Med Resp": 382.5
},
"Content Generation": {
"Min": 279,
"Max": 72029,
"Med": 1222.0,
"Med Resp": 476.5
},
"Editing": {
"Min": 304,
"Max": 65536,
"Med": 1228.5,
"Med Resp": 351.0
},
"Data Analysis": {
"Min": 240,
"Max": 65536,
"Med": 1352.0,
"Med Resp": 234.0
},
"Reasoning": {
"Min": 414,
"Max": 65536,
"Med": 3010.0,
"Med Resp": 315.0
},
"Hallucination": {
"Min": 263,
"Max": 65536,
"Med": 1310.5,
"Med Resp": 444.0
},
"Safety": {
"Min": 241,
"Max": 65536,
"Med": 1100.0,
"Med Resp": 412.0
},
"Repetition": {
"Min": 389,
"Max": 65536,
"Med": 2233.0,
"Med Resp": 355.0
},
"Summarization": {
"Min": 223,
"Max": 5987,
"Med": 833.5,
"Med Resp": 285.0
},
"Translation": {
"Min": 457,
"Max": 65536,
"Med": 1611.0,
"Med Resp": 352.0
},
"Multi-Turn": {
"Min": 648,
"Max": 131436,
"Med": 3234.5,
"Med Resp": 1324.5
}
},
"o3": {
"Overall": {
"Min": -10,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Content Generation": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Editing": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Data Analysis": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Reasoning": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Hallucination": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Safety": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Repetition": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Summarization": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Translation": {
"Min": -2,
"Max": -2,
"Med": -2.0,
"Med Resp": -1.0
},
"Multi-Turn": {
"Min": -10,
"Max": -4,
"Med": -6.0,
"Med Resp": -3.0
}
},
"Qwen3 30B A3B Instruct 2507": {
"Overall": {
"Min": 1,
"Max": 65516,
"Med": 441.5,
"Med Resp": 441.5
},
"Content Generation": {
"Min": 7,
"Max": 5659,
"Med": 510.5,
"Med Resp": 510.5
},
"Editing": {
"Min": 7,
"Max": 2231,
"Med": 255.0,
"Med Resp": 255.0
},
"Data Analysis": {
"Min": 1,
"Max": 8094,
"Med": 381.0,
"Med Resp": 381.0
},
"Reasoning": {
"Min": 1,
"Max": 9376,
"Med": 753.5,
"Med Resp": 753.5
},
"Hallucination": {
"Min": 19,
"Max": 65495,
"Med": 689.5,
"Med Resp": 689.5
},
"Safety": {
"Min": 16,
"Max": 65456,
"Med": 445.0,
"Med Resp": 445.0
},
"Repetition": {
"Min": 81,
"Max": 65516,
"Med": 533.5,
"Med Resp": 533.5
},
"Summarization": {
"Min": 34,
"Max": 1870,
"Med": 251.0,
"Med Resp": 251.0
},
"Translation": {
"Min": 8,
"Max": 3257,
"Med": 292.5,
"Med Resp": 292.5
},
"Multi-Turn": {
"Min": 3,
"Max": 6825,
"Med": 1809.5,
"Med Resp": 1809.5
}
},
"Kimi K2 Thinking": {
"Overall": {
"Min": 115,
"Max": 65500,
"Med": 1692.0,
"Med Resp": 330.0
},
"Content Generation": {
"Min": 115,
"Max": 29508,
"Med": 1696.0,
"Med Resp": 478.0
},
"Editing": {
"Min": 302,
"Max": 11808,
"Med": 1347.5,
"Med Resp": 219.0
},
"Data Analysis": {
"Min": 186,
"Max": 65462,
"Med": 978.0,
"Med Resp": 156.0
},
"Reasoning": {
"Min": 291,
"Max": 55791,
"Med": 1842.0,
"Med Resp": 377.0
},
"Hallucination": {
"Min": 194,
"Max": 5063,
"Med": 1140.5,
"Med Resp": 382.5
},
"Safety": {
"Min": 171,
"Max": 5707,
"Med": 1013.0,
"Med Resp": 395.0
},
"Repetition": {
"Min": 236,
"Max": 65500,
"Med": 1890.0,
"Med Resp": 264.0
},
"Summarization": {
"Min": 276,
"Max": 13220,
"Med": 996.0,
"Med Resp": 196.5
},
"Translation": {
"Min": 433,
"Max": 13703,
"Med": 2637.0,
"Med Resp": 310.5
},
"Multi-Turn": {
"Min": 333,
"Max": 17384,
"Med": 3771.5,
"Med Resp": 1102.0
}
}
}