| task,metric,value,err,version | |
| anli_r1,acc,0.332,0.014899597242811483,0 | |
| anli_r2,acc,0.335,0.014933117490932572,0 | |
| anli_r3,acc,0.3383333333333333,0.013664144006618275,0 | |
| arc_challenge,acc,0.28071672354948807,0.013131238126975576,0 | |
| arc_challenge,acc_norm,0.28498293515358364,0.013191348179838793,0 | |
| arc_easy,acc,0.6106902356902357,0.01000521278287814,0 | |
| arc_easy,acc_norm,0.5315656565656566,0.010239317603199509,0 | |
| boolq,acc,0.5675840978593272,0.008664798701065799,1 | |
| cb,acc,0.44642857142857145,0.06703189227942398,1 | |
| cb,f1,0.24357864357864356,,1 | |
| copa,acc,0.81,0.03942772444036623,0 | |
| hellaswag,acc,0.46703843855805616,0.0049789271647928835,0 | |
| hellaswag,acc_norm,0.6088428599880502,0.004870121051762726,0 | |
| piqa,acc,0.7524483133841132,0.010069703966857102,0 | |
| piqa,acc_norm,0.7540805223068553,0.010047331865625184,0 | |
| rte,acc,0.5234657039711191,0.03006330041190266,0 | |
| sciq,acc,0.838,0.011657267771304405,0 | |
| sciq,acc_norm,0.734,0.01397996564514516,0 | |
| storycloze_2016,acc,0.7140566541956174,0.010449259851345843,0 | |
| winogrande,acc,0.590370955011839,0.013821049109655462,0 | |