Chinese-Writing-Bench / data /all-scores.json
zake7749's picture
Deploy Chinese Writing Bench leaderboard with dual judge support
8eecd10
{
"Mistral-3.1-24B-2503": {
"comprehension_score": 4.707142857142857,
"structure_score": 4.796428571428572,
"prose_style_score": 4.2178571428571425,
"creativity_score": 3.914285714285714,
"depth_score": 3.9035714285714285,
"helpfulness_score": 4.242857142857143,
"overall_score": 4.203571428571428
},
"Phi-4-14B": {
"comprehension_score": 4.735714285714286,
"structure_score": 4.889285714285714,
"prose_style_score": 4.460714285714285,
"creativity_score": 4.242857142857143,
"depth_score": 4.178571428571429,
"helpfulness_score": 4.317857142857143,
"overall_score": 4.296428571428572
},
"gpt-4o-mini-2024-07-18": {
"comprehension_score": 4.760714285714286,
"structure_score": 4.9,
"prose_style_score": 4.542857142857143,
"creativity_score": 4.289285714285715,
"depth_score": 4.082142857142857,
"helpfulness_score": 4.35,
"overall_score": 4.321428571428571
},
"gpt-4.1-mini-2025-04-14": {
"comprehension_score": 5.042857142857143,
"structure_score": 4.935714285714286,
"prose_style_score": 4.675,
"creativity_score": 4.335714285714285,
"depth_score": 4.2,
"helpfulness_score": 4.564285714285714,
"overall_score": 4.435714285714286
},
"Qwen3-30B-A3B": {
"comprehension_score": 5.146428571428571,
"structure_score": 5.289285714285715,
"prose_style_score": 4.853571428571429,
"creativity_score": 4.714285714285714,
"depth_score": 4.5321428571428575,
"helpfulness_score": 4.925,
"overall_score": 4.814285714285714
},
"Qwen3-8B": {
"comprehension_score": 5.146428571428571,
"structure_score": 5.260714285714286,
"prose_style_score": 4.910714285714286,
"creativity_score": 4.689285714285714,
"depth_score": 4.496428571428571,
"helpfulness_score": 4.8464285714285715,
"overall_score": 4.817857142857143
},
"gpt-4o-2024-11-20": {
"comprehension_score": 5.2785714285714285,
"structure_score": 5.3464285714285715,
"prose_style_score": 5.207142857142857,
"creativity_score": 5.082142857142857,
"depth_score": 4.746428571428571,
"helpfulness_score": 5.010714285714286,
"overall_score": 4.985714285714286
},
"gemma3-27b": {
"comprehension_score": 5.428571428571429,
"structure_score": 5.535714285714286,
"prose_style_score": 5.0285714285714285,
"creativity_score": 5.121428571428571,
"depth_score": 4.8464285714285715,
"helpfulness_score": 5.2178571428571425,
"overall_score": 5.082142857142857
},
"Qwen3-32B": {
"comprehension_score": 5.457142857142857,
"structure_score": 5.628571428571429,
"prose_style_score": 5.285714285714286,
"creativity_score": 5.078571428571428,
"depth_score": 4.882142857142857,
"helpfulness_score": 5.314285714285714,
"overall_score": 5.2214285714285715
},
"Mistral-3.2-24B-2506": {
"comprehension_score": 5.5321428571428575,
"structure_score": 5.817857142857143,
"prose_style_score": 5.5321428571428575,
"creativity_score": 5.607142857142857,
"depth_score": 5.010714285714286,
"helpfulness_score": 5.3464285714285715,
"overall_score": 5.3464285714285715
},
"o4-mini-2025-04-16": {
"comprehension_score": 5.803571428571429,
"structure_score": 5.696428571428571,
"prose_style_score": 5.589285714285714,
"creativity_score": 5.442857142857143,
"depth_score": 5.1,
"helpfulness_score": 5.582142857142857,
"overall_score": 5.542857142857143
},
"Qwen3-235B": {
"comprehension_score": 5.735714285714286,
"structure_score": 5.875,
"prose_style_score": 5.628571428571429,
"creativity_score": 5.514285714285714,
"depth_score": 5.214285714285714,
"helpfulness_score": 5.55,
"overall_score": 5.5928571428571425
},
"Gemini-2.5-Flash": {
"comprehension_score": 6.196428571428571,
"structure_score": 6.2214285714285715,
"prose_style_score": 6.0,
"creativity_score": 5.828571428571428,
"depth_score": 5.75,
"helpfulness_score": 6.0321428571428575,
"overall_score": 6.007142857142857
},
"Qwen3-30B-A3B-Thinking": {
"comprehension_score": 5.914285714285715,
"structure_score": 6.053571428571429,
"prose_style_score": 6.5321428571428575,
"creativity_score": 6.5285714285714285,
"depth_score": 5.65,
"helpfulness_score": 5.817857142857143,
"overall_score": 6.064285714285714
},
"Deepseek-V3-0324": {
"comprehension_score": 6.082142857142857,
"structure_score": 6.242857142857143,
"prose_style_score": 6.560714285714286,
"creativity_score": 6.610714285714286,
"depth_score": 5.796428571428572,
"helpfulness_score": 5.957142857142857,
"overall_score": 6.203571428571428
},
"Qwen3-32B-Thinking": {
"comprehension_score": 6.339285714285714,
"structure_score": 6.425,
"prose_style_score": 7.089285714285714,
"creativity_score": 7.185714285714286,
"depth_score": 6.192857142857143,
"helpfulness_score": 6.182142857142857,
"overall_score": 6.578571428571428
},
"Qwen3-235B-Thinking": {
"comprehension_score": 6.385714285714286,
"structure_score": 6.45,
"prose_style_score": 7.296428571428572,
"creativity_score": 7.435714285714286,
"depth_score": 6.321428571428571,
"helpfulness_score": 6.285714285714286,
"overall_score": 6.660714285714286
},
"Deepseek-R1-0528": {
"comprehension_score": 6.7178571428571425,
"structure_score": 6.703571428571428,
"prose_style_score": 7.0964285714285715,
"creativity_score": 7.021428571428571,
"depth_score": 6.5964285714285715,
"helpfulness_score": 6.575,
"overall_score": 6.771428571428571
},
"o3-2025-04-16": {
"comprehension_score": 6.914285714285715,
"structure_score": 6.746428571428571,
"prose_style_score": 7.05,
"creativity_score": 6.896428571428571,
"depth_score": 6.439285714285714,
"helpfulness_score": 6.742857142857143,
"overall_score": 6.946428571428571
},
"gpt-5.4": {
"comprehension_score": 7.035714285714286,
"structure_score": 6.975,
"prose_style_score": 7.128571428571429,
"creativity_score": 6.9035714285714285,
"depth_score": 6.760714285714286,
"helpfulness_score": 7.025,
"overall_score": 7.060714285714286
},
"Gemini-3.1-Flash": {
"comprehension_score": 6.414285714285715,
"structure_score": 6.625,
"prose_style_score": 6.75,
"creativity_score": 6.660714285714286,
"depth_score": 6.057142857142857,
"helpfulness_score": 6.414285714285715,
"overall_score": 6.5285714285714285
},
"Gemini-3.1-Pro": {
"comprehension_score": 6.767857142857143,
"structure_score": 6.867857142857143,
"prose_style_score": 7.114285714285714,
"creativity_score": 6.875,
"depth_score": 6.325,
"helpfulness_score": 6.757142857142857,
"overall_score": 6.864285714285714
},
"MiniMax-M2.5": {
"comprehension_score": 6.364285714285714,
"structure_score": 6.428571428571429,
"prose_style_score": 6.021428571428571,
"creativity_score": 6.075,
"depth_score": 5.853571428571429,
"helpfulness_score": 6.275,
"overall_score": 6.260714285714286
},
"Qwen3.5-Plus": {
"comprehension_score": 6.7821428571428575,
"structure_score": 6.75,
"prose_style_score": 6.925,
"creativity_score": 6.735714285714286,
"depth_score": 6.296428571428572,
"helpfulness_score": 6.714285714285714,
"overall_score": 6.796428571428572
},
"GLM-5": {
"comprehension_score": 6.410714285714286,
"structure_score": 6.5285714285714285,
"prose_style_score": 6.689285714285714,
"creativity_score": 6.507142857142857,
"depth_score": 5.996428571428571,
"helpfulness_score": 6.360714285714286,
"overall_score": 6.492857142857143
},
"step-3.5-flash": {
"comprehension_score": 6.696428571428571,
"structure_score": 6.814285714285714,
"prose_style_score": 7.175,
"creativity_score": 7.260714285714286,
"depth_score": 6.714285714285714,
"helpfulness_score": 6.628571428571429,
"overall_score": 6.828571428571428
}
}