workspace-benchmark-report / benchmark_report_step_100.json
dongbobo's picture
Upload benchmark_report_step_100.json with huggingface_hub
c5d6011 verified
{
"math_reasoning": 0.345,
"code_generation": 0.35,
"text_classification": 0.517,
"sentiment_analysis": 0.617,
"question_answering": 0.475,
"logical_reasoning": 0.319,
"common_sense": 0.53,
"reading_comprehension": 0.475,
"dialogue_generation": 0.438,
"summarization": 0.517,
"translation": 0.64,
"knowledge_retrieval": 0.529,
"creative_writing": 0.328,
"instruction_following": 0.55,
"safety_evaluation": 0.628,
"overall_score": 0.48
}