File size: 1,440 Bytes
cec7595
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
Task                               , Accuracy  , Centered  
hellaswag_zeroshot                 , 0.444633  , 0.259510  
jeopardy                           , 0.101559  , 0.101559  
bigbench_qa_wikidata               , 0.530830  , 0.530830  
arc_easy                           , 0.634259  , 0.512346  
arc_challenge                      , 0.339590  , 0.119454  
copa                               , 0.650000  , 0.300000  
commonsense_qa                     , 0.281736  , 0.102170  
piqa                               , 0.687704  , 0.375408  
openbook_qa                        , 0.330000  , 0.106667  
lambada_openai                     , 0.366000  , 0.366000  
hellaswag                          , 0.443637  , 0.258182  
winograd                           , 0.604396  , 0.208791  
winogrande                         , 0.528808  , 0.057616  
bigbench_dyck_languages            , 0.108000  , 0.108000  
agi_eval_lsat_ar                   , 0.260870  , 0.076087  
bigbench_cs_algorithms             , 0.350758  , 0.350758  
bigbench_operators                 , 0.185714  , 0.185714  
bigbench_repeat_copy_logic         , 0.000000  , 0.000000  
squad                              , 0.230558  , 0.230558  
coqa                               , 0.202054  , 0.202054  
boolq                              , 0.537003  , -0.218413 
bigbench_language_identification   , 0.254900  , 0.180308  
CORE                               ,           , 0.200618