Qwen2.5-3B-Korean / benchmark_summary.json
MyeongHo0621's picture
Add benchmark evaluation results
f4d0988 verified
raw
history blame contribute delete
498 Bytes
{
"config": {
"model_name_or_path": "MyeongHo0621/Qwen2.5-3B-Korean",
"tasks": ["gsm8k", "mmlu", "hellaswag", "winogrande", "arc_easy", "arc_challenge"]
},
"scores": {
"gsm8k": {"score": 0.42, "metric": "acc"},
"mmlu": {"score": 0.58, "metric": "acc"},
"hellaswag": {"score": 0.71, "metric": "acc_norm"},
"winogrande": {"score": 0.65, "metric": "acc"},
"arc_easy": {"score": 0.78, "metric": "acc"},
"arc_challenge": {"score": 0.48, "metric": "acc_norm"}
}
}