task,metric,value,err,version anli_r1,acc,0.33,0.014876872027456732,0 anli_r2,acc,0.341,0.014998131348402702,0 anli_r3,acc,0.32166666666666666,0.013490095282989526,0 arc_challenge,acc,0.30887372013651876,0.013501770929344003,0 arc_challenge,acc_norm,0.3302047781569966,0.013743085603760427,0 arc_easy,acc,0.6296296296296297,0.009908978578665757,0 arc_easy,acc_norm,0.6123737373737373,0.00999730791444761,0 boolq,acc,0.6244648318042814,0.008469774334938068,1 cb,acc,0.3392857142857143,0.06384226561930825,1 cb,f1,0.2736908716975162,,1 copa,acc,0.78,0.04163331998932262,0 hellaswag,acc,0.4962158932483569,0.004989638507409918,0 hellaswag,acc_norm,0.6642103166699861,0.004713006072807722,0 piqa,acc,0.7573449401523396,0.010002002569708698,0 piqa,acc_norm,0.7714907508161044,0.009796313511829512,0 rte,acc,0.5415162454873647,0.029992535385373314,0 sciq,acc,0.916,0.008776162089491122,0 sciq,acc_norm,0.898,0.009575368801653902,0 storycloze_2016,acc,0.7338321753073223,0.010220104800551206,0 winogrande,acc,0.5935280189423836,0.013804448697753375,0