| task,metric,value,err,version | |
| anli_r1,acc,0.315,0.0146966319607925,0 | |
| anli_r2,acc,0.335,0.014933117490932575,0 | |
| anli_r3,acc,0.34,0.013680495725767789,0 | |
| arc_challenge,acc,0.3293515358361775,0.013734057652635473,0 | |
| arc_challenge,acc_norm,0.3387372013651877,0.01383056892797433,0 | |
| arc_easy,acc,0.6523569023569024,0.00977186884683091,0 | |
| arc_easy,acc_norm,0.6266835016835017,0.009925009142802893,0 | |
| boolq,acc,0.6217125382262997,0.008482001133930994,1 | |
| cb,acc,0.4107142857142857,0.0663363415035954,1 | |
| cb,f1,0.2854808590102708,,1 | |
| copa,acc,0.84,0.03684529491774709,0 | |
| hellaswag,acc,0.4934276040629357,0.004989350311751647,0 | |
| hellaswag,acc_norm,0.6552479585739892,0.004743160034271143,0 | |
| piqa,acc,0.7627856365614799,0.00992469493358637,0 | |
| piqa,acc_norm,0.7709466811751904,0.009804509865175505,0 | |
| rte,acc,0.5018050541516246,0.030096267148976626,0 | |
| sciq,acc,0.916,0.008776162089491132,0 | |
| sciq,acc_norm,0.892,0.009820001651345682,0 | |
| storycloze_2016,acc,0.7413148049171566,0.010126662138021712,0 | |
| winogrande,acc,0.6156274664561957,0.013671567600836192,0 | |