| task,metric,value,err,version | |
| anli_r1,acc,0.33,0.014876872027456732,0 | |
| anli_r2,acc,0.341,0.014998131348402702,0 | |
| anli_r3,acc,0.32166666666666666,0.013490095282989526,0 | |
| arc_challenge,acc,0.30887372013651876,0.013501770929344003,0 | |
| arc_challenge,acc_norm,0.3302047781569966,0.013743085603760427,0 | |
| arc_easy,acc,0.6296296296296297,0.009908978578665757,0 | |
| arc_easy,acc_norm,0.6123737373737373,0.00999730791444761,0 | |
| boolq,acc,0.6244648318042814,0.008469774334938068,1 | |
| cb,acc,0.3392857142857143,0.06384226561930825,1 | |
| cb,f1,0.2736908716975162,,1 | |
| copa,acc,0.78,0.04163331998932262,0 | |
| hellaswag,acc,0.4962158932483569,0.004989638507409918,0 | |
| hellaswag,acc_norm,0.6642103166699861,0.004713006072807722,0 | |
| piqa,acc,0.7573449401523396,0.010002002569708698,0 | |
| piqa,acc_norm,0.7714907508161044,0.009796313511829512,0 | |
| rte,acc,0.5415162454873647,0.029992535385373314,0 | |
| sciq,acc,0.916,0.008776162089491122,0 | |
| sciq,acc_norm,0.898,0.009575368801653902,0 | |
| storycloze_2016,acc,0.7338321753073223,0.010220104800551206,0 | |
| winogrande,acc,0.5935280189423836,0.013804448697753375,0 | |