task,metric,value,err,version anli_r1,acc,0.341,0.014998131348402709,0 anli_r2,acc,0.358,0.01516792886540756,0 anli_r3,acc,0.3516666666666667,0.013789711695404794,0 arc_challenge,acc,0.3165529010238908,0.01359243151906808,0 arc_challenge,acc_norm,0.3378839590443686,0.013822047922283509,0 arc_easy,acc,0.6506734006734006,0.009782853449399284,0 arc_easy,acc_norm,0.6300505050505051,0.009906656266021148,0 boolq,acc,0.6425076452599389,0.008382336069484898,1 cb,acc,0.4642857142857143,0.06724777654937658,1 cb,f1,0.41697135221649567,,1 copa,acc,0.8,0.040201512610368445,0 hellaswag,acc,0.4998008364867556,0.0049897810155954715,0 hellaswag,acc_norm,0.6693885680143398,0.004694718918225764,0 piqa,acc,0.7622415669205659,0.009932525779525489,0 piqa,acc_norm,0.779651795429815,0.009670535456853148,0 rte,acc,0.5523465703971119,0.02993107036293953,0 sciq,acc,0.935,0.007799733061832011,0 sciq,acc_norm,0.925,0.008333333333333364,0 storycloze_2016,acc,0.7471940138963121,0.01005054390987858,0 winogrande,acc,0.5887924230465666,0.013829128358676862,0