| task,metric,value,err,version | |
| anli_r1,acc,0.32,0.0147586523035749,0 | |
| anli_r2,acc,0.335,0.014933117490932572,0 | |
| anli_r3,acc,0.3641666666666667,0.013896714966807255,0 | |
| arc_challenge,acc,0.3199658703071672,0.013631345807016196,0 | |
| arc_challenge,acc_norm,0.3438566552901024,0.013880644570156217,0 | |
| arc_easy,acc,0.6531986531986532,0.009766326091716007,0 | |
| arc_easy,acc_norm,0.6418350168350169,0.009838331651451853,0 | |
| boolq,acc,0.6275229357798165,0.008455846866956081,1 | |
| cb,acc,0.44642857142857145,0.06703189227942398,1 | |
| cb,f1,0.3114930182599356,,1 | |
| copa,acc,0.86,0.03487350880197771,0 | |
| hellaswag,acc,0.4963154750049791,0.004989645929811442,0 | |
| hellaswag,acc_norm,0.6570404301931886,0.004737279691036198,0 | |
| piqa,acc,0.7595212187159956,0.009971345364651076,0 | |
| piqa,acc_norm,0.7671381936887922,0.009861236071080751,0 | |
| rte,acc,0.5234657039711191,0.03006330041190266,0 | |
| sciq,acc,0.916,0.008776162089491139,0 | |
| sciq,acc_norm,0.91,0.009054390204866442,0 | |
| storycloze_2016,acc,0.7466595403527525,0.010057563497401457,0 | |
| winogrande,acc,0.6045777426992897,0.013741678387545345,0 | |