| task,metric,value,err,version | |
| anli_r1,acc,0.335,0.014933117490932575,0 | |
| anli_r2,acc,0.327,0.014842213153411242,0 | |
| anli_r3,acc,0.3458333333333333,0.013736245342311012,0 | |
| arc_challenge,acc,0.2687713310580205,0.012955065963710691,0 | |
| arc_challenge,acc_norm,0.30204778156996587,0.013417519144716417,0 | |
| arc_easy,acc,0.6153198653198653,0.009983171707009008,0 | |
| arc_easy,acc_norm,0.5980639730639731,0.010060521220920566,0 | |
| boolq,acc,0.5908256880733945,0.008599563442397349,1 | |
| cb,acc,0.39285714285714285,0.0658538889806635,1 | |
| cb,f1,0.28503144654088053,,1 | |
| copa,acc,0.8,0.040201512610368445,0 | |
| hellaswag,acc,0.4422425811591316,0.004956378590571537,0 | |
| hellaswag,acc_norm,0.5832503485361482,0.004920130733271772,0 | |
| piqa,acc,0.7230685527747551,0.010440499969334535,0 | |
| piqa,acc_norm,0.733949945593036,0.010310039263352826,0 | |
| rte,acc,0.5415162454873647,0.029992535385373314,0 | |
| sciq,acc,0.889,0.009938701010583726,0 | |
| sciq,acc_norm,0.862,0.010912152632504394,0 | |
| storycloze_2016,acc,0.7220737573490112,0.010359403651225854,0 | |
| winogrande,acc,0.5659037095501184,0.01392988255569405,0 | |