File size: 1,620 Bytes
fb9a58b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
Task                               , Accuracy  , Centered  
hellaswag_zeroshot                 , 0.493328  , 0.324437  
jeopardy                           , 0.143599  , 0.143599  
bigbench_qa_wikidata               , 0.519069  , 0.519069  
arc_easy                           , 0.654040  , 0.538721  
arc_challenge                      , 0.376280  , 0.168373  
copa                               , 0.620000  , 0.240000  
commonsense_qa                     , 0.218673  , 0.023342  
piqa                               , 0.714363  , 0.428727  
openbook_qa                        , 0.372000  , 0.162667  
lambada_openai                     , 0.405977  , 0.405977  
hellaswag                          , 0.498208  , 0.330943  
winograd                           , 0.659341  , 0.318681  
winogrande                         , 0.543804  , 0.087609  
bigbench_dyck_languages            , 0.171000  , 0.171000  
agi_eval_lsat_ar                   , 0.217391  , 0.021739  
bigbench_cs_algorithms             , 0.428788  , 0.428788  
bigbench_operators                 , 0.180952  , 0.180952  
bigbench_repeat_copy_logic         , 0.031250  , 0.031250  
squad                              , 0.326301  , 0.326301  
coqa                               , 0.217587  , 0.217587  
boolq                              , 0.521713  , -0.258651 
bigbench_language_identification   , 0.260000  , 0.185919  
CORE                               ,           , 0.227138  
fwe_bpb                            , 0.758697  ,           
sv2_bpb                            , 0.444557  ,           
avg_bpb                            , 0.601627  ,