DuoNeural's picture
Add eval results
c0795c5 verified
{
"baseline": {
"gsm8k_flexible": 0.5822592873388931,
"gsm8k_strict": 0.6937073540561031,
"arc_acc": 0.45563139931740615,
"arc_acc_norm": 0.48976109215017066
},
"multitask": {
"gsm8k_flexible": 0.7012888551933283,
"gsm8k_strict": 0.690674753601213,
"arc_acc": 0.4522184300341297,
"arc_acc_norm": 0.4948805460750853
}
}