| task,gpt2_124m_acc,gpt2_medium_355m_acc,pythia_410m_acc,rrt_355m_acc,gpt2_124m_centered,gpt2_medium_355m_centered,pythia_410m_centered,rrt_355m_centered | |
| hellaswag_zeroshot,0.3092,0.3937,0.3977,0.3690,0.0789,0.1917,0.1970,0.1587 | |
| jeopardy,0.0019,0.0387,0.0392,0.0132,0.0019,0.0387,0.0392,0.0132 | |
| bigbench_qa_wikidata,0.2834,0.4209,0.5093,0.3665,0.2834,0.4209,0.5093,0.3665 | |
| arc_easy,0.4150,0.4853,0.5274,0.5732,0.2200,0.3137,0.3698,0.4310 | |
| arc_challenge,0.2184,0.2696,0.2483,0.2884,-0.0421,0.0262,-0.0023,0.0512 | |
| copa,0.6300,0.6400,0.6400,0.6400,0.2600,0.2800,0.2800,0.2800 | |
| commonsense_qa,0.2326,0.2138,0.2408,0.2015,0.0407,0.0172,0.0510,0.0018 | |
| piqa,0.6289,0.6594,0.6855,0.6551,0.2579,0.3188,0.3711,0.3101 | |
| openbook_qa,0.2640,0.3060,0.3060,0.3600,0.0187,0.0747,0.0747,0.1467 | |
| lambada_openai,0.3208,0.4211,0.4758,0.2633,0.3208,0.4211,0.4758,0.2633 | |
| hellaswag,0.3077,0.3981,0.3977,0.3626,0.0769,0.1975,0.1970,0.1501 | |
| winograd,0.5897,0.6154,0.6996,0.5861,0.1795,0.2308,0.3993,0.1722 | |
| winogrande,0.5114,0.5130,0.5296,0.5099,0.0229,0.0260,0.0592,0.0197 | |
| bigbench_dyck_languages,0.1580,0.1800,0.2720,0.1290,0.1580,0.1800,0.2720,0.1290 | |
| agi_eval_lsat_ar,0.2261,0.2261,0.2609,0.3000,0.0326,0.0326,0.0761,0.1250 | |
| bigbench_cs_algorithms,0.4280,0.4295,0.4644,0.4432,0.4280,0.4295,0.4644,0.4432 | |
| bigbench_operators,0.0952,0.1143,0.1333,0.0714,0.0952,0.1143,0.1333,0.0714 | |
| bigbench_repeat_copy_logic,0.0312,0.0625,0.0000,0.0000,0.0312,0.0625,0.0000,0.0000 | |
| squad,0.0574,0.1550,0.2173,0.0858,0.0574,0.1550,0.2173,0.0858 | |
| coqa,0.1362,0.2233,0.2013,0.0911,0.1362,0.2233,0.2013,0.0911 | |
| boolq,0.5541,0.6024,0.4700,0.5963,-0.1733,-0.0462,-0.3947,-0.0623 | |
| bigbench_language_identification,0.2546,0.2590,0.2537,0.2549,0.1800,0.1848,0.1790,0.1803 | |
| CORE_METRIC,0.1211,0.1770,0.1895,0.1558,,,, | |