Upload folder using huggingface_hub

#329
Pretrain_language_model/save/slimpajama_moe_no_attmoe_660M_standardlb_remoe/export/result-model-340000.pth.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "val/loss": 2.2663046216207836,
3
+ "val/accuracy": 0.526702396453373,
4
+ "val/perplexity": 9.643697774330832,
5
+ "val/time_since_best_loss": 0,
6
+ "val/time_since_best_accuracy": 0,
7
+ "lambada/loss": 2.357945412582492,
8
+ "lambada/accuracy/total": 0.3825698757763975,
9
+ "lambada/accuracy/openai_last_token": 0.8060947204968945,
10
+ "lambada/perplexity": 6.443450138786187,
11
+ "lambada/lm_loss": 2.8742456989123397,
12
+ "lambada/lm_perplexity": 17.71205885611838,
13
+ "lambada/time_since_best_loss": 0,
14
+ "lambada/time_since_best_accuracy": 0,
15
+ "mean_accuracy": 0.4546361361148853,
16
+ "mean_loss": 2.312125017101638,
17
+ "blimp/accuracy/passive_2": 0.911,
18
+ "blimp/accuracy/determiner_noun_agreement_2": 0.984,
19
+ "blimp/accuracy/ellipsis_n_bar_1": 0.806,
20
+ "blimp/accuracy/tough_vs_raising_2": 0.899,
21
+ "blimp/accuracy/tough_vs_raising_1": 0.628,
22
+ "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.899,
23
+ "blimp/accuracy/principle_A_reconstruction": 0.314,
24
+ "blimp/accuracy/wh_vs_that_with_gap": 0.485,
25
+ "blimp/accuracy/principle_A_domain_2": 0.875,
26
+ "blimp/accuracy/determiner_noun_agreement_1": 0.994,
27
+ "blimp/accuracy/ellipsis_n_bar_2": 0.908,
28
+ "blimp/accuracy/principle_A_domain_3": 0.634,
29
+ "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.937,
30
+ "blimp/accuracy/animate_subject_trans": 0.928,
31
+ "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.947,
32
+ "blimp/accuracy/distractor_agreement_relative_clause": 0.722,
33
+ "blimp/accuracy/transitive": 0.88,
34
+ "blimp/accuracy/sentential_subject_island": 0.352,
35
+ "blimp/accuracy/adjunct_island": 0.836,
36
+ "blimp/accuracy/intransitive": 0.751,
37
+ "blimp/accuracy/existential_there_subject_raising": 0.886,
38
+ "blimp/accuracy/irregular_past_participle_adjectives": 0.949,
39
+ "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.747,
40
+ "blimp/accuracy/principle_A_case_1": 1.0,
41
+ "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.328,
42
+ "blimp/accuracy/only_npi_scope": 0.638,
43
+ "blimp/accuracy/superlative_quantifiers_2": 0.848,
44
+ "blimp/accuracy/passive_1": 0.92,
45
+ "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.941,
46
+ "blimp/accuracy/inchoative": 0.616,
47
+ "blimp/accuracy/anaphor_gender_agreement": 0.981,
48
+ "blimp/accuracy/principle_A_c_command": 0.661,
49
+ "blimp/accuracy/only_npi_licensor_present": 0.611,
50
+ "blimp/accuracy/expletive_it_object_raising": 0.801,
51
+ "blimp/accuracy/left_branch_island_simple_question": 0.787,
52
+ "blimp/accuracy/wh_questions_subject_gap": 0.941,
53
+ "blimp/accuracy/existential_there_quantifiers_2": 0.489,
54
+ "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.939,
55
+ "blimp/accuracy/sentential_negation_npi_scope": 0.647,
56
+ "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.839,
57
+ "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.911,
58
+ "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.92,
59
+ "blimp/accuracy/principle_A_case_2": 0.952,
60
+ "blimp/accuracy/distractor_agreement_relational_noun": 0.858,
61
+ "blimp/accuracy/sentential_negation_npi_licensor_present": 0.989,
62
+ "blimp/accuracy/superlative_quantifiers_1": 0.84,
63
+ "blimp/accuracy/wh_island": 0.805,
64
+ "blimp/accuracy/principle_A_domain_1": 0.988,
65
+ "blimp/accuracy/complex_NP_island": 0.595,
66
+ "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.977,
67
+ "blimp/accuracy/irregular_past_participle_verbs": 0.913,
68
+ "blimp/accuracy/drop_argument": 0.717,
69
+ "blimp/accuracy/wh_questions_object_gap": 0.85,
70
+ "blimp/accuracy/animate_subject_passive": 0.812,
71
+ "blimp/accuracy/existential_there_quantifiers_1": 0.979,
72
+ "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.929,
73
+ "blimp/accuracy/npi_present_2": 0.57,
74
+ "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.964,
75
+ "blimp/accuracy/anaphor_number_agreement": 0.99,
76
+ "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.965,
77
+ "blimp/accuracy/existential_there_object_raising": 0.87,
78
+ "blimp/accuracy/matrix_question_npi_licensor_present": 0.395,
79
+ "blimp/accuracy/npi_present_1": 0.587,
80
+ "blimp/accuracy/wh_vs_that_no_gap": 0.982,
81
+ "blimp/accuracy/left_branch_island_echo_question": 0.465,
82
+ "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.979,
83
+ "blimp/accuracy/causative": 0.759,
84
+ "blimp/accuracy/group_average": 0.8032835820895522,
85
+ "blimp/accuracy/seq_average": 0.8032835820895522,
86
+ "cbt/accuracy/NE": 0.8277243589743589,
87
+ "cbt/accuracy/V": 0.9384,
88
+ "cbt/accuracy/CN": 0.89,
89
+ "cbt/accuracy/P": 0.9212,
90
+ "cbt/accuracy/group_average": 0.8943310897435897,
91
+ "cbt/accuracy/seq_average": 0.8943577430972389,
92
+ "hellaswag/accuracy/val": 0.3657637920732922,
93
+ "hellaswag/accuracy/group_average": 0.3657637920732922,
94
+ "hellaswag/accuracy/seq_average": 0.3657637920732922,
95
+ "piqa/accuracy/val": 0.6338411316648531,
96
+ "piqa/accuracy/group_average": 0.6338411316648531,
97
+ "piqa/accuracy/seq_average": 0.6338411316648531,
98
+ "ai2arc/accuracy/ARC-Easy": 0.37420718816067655,
99
+ "ai2arc/accuracy/ARC-Challenge": 0.2334763948497854,
100
+ "ai2arc/accuracy/group_average": 0.303841791505231,
101
+ "ai2arc/accuracy/seq_average": 0.3277620396600567,
102
+ "race/accuracy/test/high": 0.2878787878787879,
103
+ "race/accuracy/test/middle": 0.3767409470752089,
104
+ "race/accuracy/group_average": 0.3323098674769984,
105
+ "race/accuracy/seq_average": 0.3137413862991488,
106
+ "siqa/accuracy/dev": 0.37615148413510746,
107
+ "siqa/accuracy/group_average": 0.37615148413510746,
108
+ "siqa/accuracy/seq_average": 0.37615148413510746,
109
+ "commonsenseqa/accuracy/dev_rand_split": 0.27764127764127766,
110
+ "commonsenseqa/accuracy/group_average": 0.27764127764127766,
111
+ "commonsenseqa/accuracy/seq_average": 0.27764127764127766
112
+ }
Pretrain_language_model/save/slimpajama_moe_no_attmoe_660M_standardlb_remoe/export/result-model-360000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.260709732297867, "val/accuracy": 0.5283164372519841, "val/perplexity": 9.589893008826998, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.3385417298500584, "lambada/accuracy/total": 0.37558229813664595, "lambada/accuracy/openai_last_token": 0.8072593167701864, "lambada/perplexity": 6.459880642874371, "lambada/lm_loss": 2.8613641744698417, "lambada/lm_perplexity": 17.485363760928298, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.45194936769431504, "mean_loss": 2.299625731073963, "blimp/accuracy/passive_2": 0.907, "blimp/accuracy/determiner_noun_agreement_2": 0.988, "blimp/accuracy/ellipsis_n_bar_1": 0.826, "blimp/accuracy/tough_vs_raising_2": 0.878, "blimp/accuracy/tough_vs_raising_1": 0.621, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.894, "blimp/accuracy/principle_A_reconstruction": 0.316, "blimp/accuracy/wh_vs_that_with_gap": 0.495, "blimp/accuracy/principle_A_domain_2": 0.873, "blimp/accuracy/determiner_noun_agreement_1": 0.99, "blimp/accuracy/ellipsis_n_bar_2": 0.904, "blimp/accuracy/principle_A_domain_3": 0.63, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.93, "blimp/accuracy/animate_subject_trans": 0.917, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.943, "blimp/accuracy/distractor_agreement_relative_clause": 0.713, "blimp/accuracy/transitive": 0.892, "blimp/accuracy/sentential_subject_island": 0.354, "blimp/accuracy/adjunct_island": 0.826, "blimp/accuracy/intransitive": 0.767, "blimp/accuracy/existential_there_subject_raising": 0.892, "blimp/accuracy/irregular_past_participle_adjectives": 0.905, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.712, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.372, "blimp/accuracy/only_npi_scope": 0.656, "blimp/accuracy/superlative_quantifiers_2": 0.81, "blimp/accuracy/passive_1": 0.917, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.927, "blimp/accuracy/inchoative": 0.634, "blimp/accuracy/anaphor_gender_agreement": 0.978, "blimp/accuracy/principle_A_c_command": 0.69, "blimp/accuracy/only_npi_licensor_present": 0.689, "blimp/accuracy/expletive_it_object_raising": 0.781, "blimp/accuracy/left_branch_island_simple_question": 0.742, "blimp/accuracy/wh_questions_subject_gap": 0.952, "blimp/accuracy/existential_there_quantifiers_2": 0.466, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.944, "blimp/accuracy/sentential_negation_npi_scope": 0.61, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.848, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.907, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.906, "blimp/accuracy/principle_A_case_2": 0.95, "blimp/accuracy/distractor_agreement_relational_noun": 0.852, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.991, "blimp/accuracy/superlative_quantifiers_1": 0.853, "blimp/accuracy/wh_island": 0.776, "blimp/accuracy/principle_A_domain_1": 0.991, "blimp/accuracy/complex_NP_island": 0.57, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.98, "blimp/accuracy/irregular_past_participle_verbs": 0.909, "blimp/accuracy/drop_argument": 0.71, "blimp/accuracy/wh_questions_object_gap": 0.862, "blimp/accuracy/animate_subject_passive": 0.803, "blimp/accuracy/existential_there_quantifiers_1": 0.985, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.912, "blimp/accuracy/npi_present_2": 0.565, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.957, "blimp/accuracy/anaphor_number_agreement": 0.991, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.97, "blimp/accuracy/existential_there_object_raising": 0.868, "blimp/accuracy/matrix_question_npi_licensor_present": 0.393, "blimp/accuracy/npi_present_1": 0.556, "blimp/accuracy/wh_vs_that_no_gap": 0.978, "blimp/accuracy/left_branch_island_echo_question": 0.411, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.97, "blimp/accuracy/causative": 0.755, "blimp/accuracy/group_average": 0.7994029850746269, "blimp/accuracy/seq_average": 0.7994029850746268, "cbt/accuracy/NE": 0.8261217948717948, "cbt/accuracy/V": 0.9456, "cbt/accuracy/CN": 0.8956, "cbt/accuracy/P": 0.9216, "cbt/accuracy/group_average": 0.8972304487179488, "cbt/accuracy/seq_average": 0.8972589035614246, "hellaswag/accuracy/val": 0.3686516630153356, "hellaswag/accuracy/group_average": 0.3686516630153356, "hellaswag/accuracy/seq_average": 0.3686516630153356, "piqa/accuracy/val": 0.6398258977149075, "piqa/accuracy/group_average": 0.6398258977149075, "piqa/accuracy/seq_average": 0.6398258977149075, "ai2arc/accuracy/ARC-Easy": 0.3763213530655391, "ai2arc/accuracy/ARC-Challenge": 0.23090128755364808, "ai2arc/accuracy/group_average": 0.3036113203095936, "ai2arc/accuracy/seq_average": 0.328328611898017, "race/accuracy/test/high": 0.2904516866781018, "race/accuracy/test/middle": 0.3697771587743733, "race/accuracy/group_average": 0.33011442272623753, "race/accuracy/seq_average": 0.313538710985002, "siqa/accuracy/dev": 0.3781985670419652, "siqa/accuracy/group_average": 0.3781985670419652, "siqa/accuracy/seq_average": 0.3781985670419652, "commonsenseqa/accuracy/dev_rand_split": 0.2800982800982801, "commonsenseqa/accuracy/group_average": 0.2800982800982801, "commonsenseqa/accuracy/seq_average": 0.2800982800982801}
Pretrain_language_model/save/slimpajama_moe_no_attmoe_660M_standardlb_remoe/export/result-model-40000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.5876406715029763, "val/accuracy": 0.4782288566468254, "val/perplexity": 13.298359363660653, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.6465018136160716, "lambada/accuracy/total": 0.2843555900621118, "lambada/accuracy/openai_last_token": 0.7635869565217391, "lambada/perplexity": 10.601802160659874, "lambada/lm_loss": 3.1739570216002497, "lambada/lm_perplexity": 23.901877721398986, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.3812922233544686, "mean_loss": 2.6170712425595237, "blimp/accuracy/passive_2": 0.908, "blimp/accuracy/determiner_noun_agreement_2": 0.979, "blimp/accuracy/ellipsis_n_bar_1": 0.785, "blimp/accuracy/tough_vs_raising_2": 0.842, "blimp/accuracy/tough_vs_raising_1": 0.656, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.897, "blimp/accuracy/principle_A_reconstruction": 0.361, "blimp/accuracy/wh_vs_that_with_gap": 0.445, "blimp/accuracy/principle_A_domain_2": 0.815, "blimp/accuracy/determiner_noun_agreement_1": 0.988, "blimp/accuracy/ellipsis_n_bar_2": 0.922, "blimp/accuracy/principle_A_domain_3": 0.633, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.914, "blimp/accuracy/animate_subject_trans": 0.894, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.897, "blimp/accuracy/distractor_agreement_relative_clause": 0.613, "blimp/accuracy/transitive": 0.851, "blimp/accuracy/sentential_subject_island": 0.339, "blimp/accuracy/adjunct_island": 0.839, "blimp/accuracy/intransitive": 0.724, "blimp/accuracy/existential_there_subject_raising": 0.831, "blimp/accuracy/irregular_past_participle_adjectives": 0.991, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.584, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.243, "blimp/accuracy/only_npi_scope": 0.643, "blimp/accuracy/superlative_quantifiers_2": 0.742, "blimp/accuracy/passive_1": 0.898, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.918, "blimp/accuracy/inchoative": 0.579, "blimp/accuracy/anaphor_gender_agreement": 0.961, "blimp/accuracy/principle_A_c_command": 0.675, "blimp/accuracy/only_npi_licensor_present": 0.639, "blimp/accuracy/expletive_it_object_raising": 0.78, "blimp/accuracy/left_branch_island_simple_question": 0.659, "blimp/accuracy/wh_questions_subject_gap": 0.944, "blimp/accuracy/existential_there_quantifiers_2": 0.381, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.926, "blimp/accuracy/sentential_negation_npi_scope": 0.563, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.792, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.93, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.883, "blimp/accuracy/principle_A_case_2": 0.916, "blimp/accuracy/distractor_agreement_relational_noun": 0.859, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.996, "blimp/accuracy/superlative_quantifiers_1": 0.729, "blimp/accuracy/wh_island": 0.726, "blimp/accuracy/principle_A_domain_1": 0.97, "blimp/accuracy/complex_NP_island": 0.569, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.969, "blimp/accuracy/irregular_past_participle_verbs": 0.885, "blimp/accuracy/drop_argument": 0.737, "blimp/accuracy/wh_questions_object_gap": 0.836, "blimp/accuracy/animate_subject_passive": 0.798, "blimp/accuracy/existential_there_quantifiers_1": 0.966, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.908, "blimp/accuracy/npi_present_2": 0.542, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.937, "blimp/accuracy/anaphor_number_agreement": 0.981, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.966, "blimp/accuracy/existential_there_object_raising": 0.832, "blimp/accuracy/matrix_question_npi_licensor_present": 0.264, "blimp/accuracy/npi_present_1": 0.484, "blimp/accuracy/wh_vs_that_no_gap": 0.985, "blimp/accuracy/left_branch_island_echo_question": 0.401, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.982, "blimp/accuracy/causative": 0.688, "blimp/accuracy/group_average": 0.7729850746268658, "blimp/accuracy/seq_average": 0.7729850746268657, "cbt/accuracy/NE": 0.7752403846153846, "cbt/accuracy/V": 0.9176, "cbt/accuracy/CN": 0.8332, "cbt/accuracy/P": 0.8952, "cbt/accuracy/group_average": 0.8553100961538461, "cbt/accuracy/seq_average": 0.8553421368547419, "hellaswag/accuracy/val": 0.30033857797251545, "hellaswag/accuracy/group_average": 0.30033857797251545, "hellaswag/accuracy/seq_average": 0.30033857797251545, "piqa/accuracy/val": 0.6050054406964092, "piqa/accuracy/group_average": 0.6050054406964092, "piqa/accuracy/seq_average": 0.6050054406964092, "ai2arc/accuracy/ARC-Easy": 0.3412262156448203, "ai2arc/accuracy/ARC-Challenge": 0.21545064377682405, "ai2arc/accuracy/group_average": 0.2783384297108222, "ai2arc/accuracy/seq_average": 0.29971671388101984, "race/accuracy/test/high": 0.26300743281875355, "race/accuracy/test/middle": 0.3363509749303621, "race/accuracy/group_average": 0.2996792038745578, "race/accuracy/seq_average": 0.2843534657478719, "siqa/accuracy/dev": 0.35516888433981575, "siqa/accuracy/group_average": 0.35516888433981575, "siqa/accuracy/seq_average": 0.35516888433981575, "commonsenseqa/accuracy/dev_rand_split": 0.26044226044226043, "commonsenseqa/accuracy/group_average": 0.26044226044226043, "commonsenseqa/accuracy/seq_average": 0.26044226044226043}
Pretrain_language_model/save/slimpajama_moe_no_attmoe_660M_standardlb_remoe/export/result-model-400000.pth.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "val/loss": 2.252528357127356,
3
+ "val/accuracy": 0.5296892438616071,
4
+ "val/perplexity": 9.511754572058493,
5
+ "val/time_since_best_loss": 0,
6
+ "val/time_since_best_accuracy": 0,
7
+ "lambada/loss": 2.396760170504173,
8
+ "lambada/accuracy/total": 0.3705357142857143,
9
+ "lambada/accuracy/openai_last_token": 0.8082298136645962,
10
+ "lambada/perplexity": 6.511672332228969,
11
+ "lambada/lm_loss": 2.857919794746293,
12
+ "lambada/lm_perplexity": 17.425241130555758,
13
+ "lambada/time_since_best_loss": 0,
14
+ "lambada/time_since_best_accuracy": 0,
15
+ "mean_accuracy": 0.4501124790736607,
16
+ "mean_loss": 2.3246442638157645,
17
+ "blimp/accuracy/passive_2": 0.907,
18
+ "blimp/accuracy/determiner_noun_agreement_2": 0.986,
19
+ "blimp/accuracy/ellipsis_n_bar_1": 0.813,
20
+ "blimp/accuracy/tough_vs_raising_2": 0.886,
21
+ "blimp/accuracy/tough_vs_raising_1": 0.636,
22
+ "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.921,
23
+ "blimp/accuracy/principle_A_reconstruction": 0.347,
24
+ "blimp/accuracy/wh_vs_that_with_gap": 0.471,
25
+ "blimp/accuracy/principle_A_domain_2": 0.87,
26
+ "blimp/accuracy/determiner_noun_agreement_1": 0.995,
27
+ "blimp/accuracy/ellipsis_n_bar_2": 0.907,
28
+ "blimp/accuracy/principle_A_domain_3": 0.656,
29
+ "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.936,
30
+ "blimp/accuracy/animate_subject_trans": 0.916,
31
+ "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.941,
32
+ "blimp/accuracy/distractor_agreement_relative_clause": 0.719,
33
+ "blimp/accuracy/transitive": 0.899,
34
+ "blimp/accuracy/sentential_subject_island": 0.354,
35
+ "blimp/accuracy/adjunct_island": 0.848,
36
+ "blimp/accuracy/intransitive": 0.77,
37
+ "blimp/accuracy/existential_there_subject_raising": 0.895,
38
+ "blimp/accuracy/irregular_past_participle_adjectives": 0.874,
39
+ "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.722,
40
+ "blimp/accuracy/principle_A_case_1": 1.0,
41
+ "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.344,
42
+ "blimp/accuracy/only_npi_scope": 0.692,
43
+ "blimp/accuracy/superlative_quantifiers_2": 0.79,
44
+ "blimp/accuracy/passive_1": 0.927,
45
+ "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.926,
46
+ "blimp/accuracy/inchoative": 0.618,
47
+ "blimp/accuracy/anaphor_gender_agreement": 0.981,
48
+ "blimp/accuracy/principle_A_c_command": 0.676,
49
+ "blimp/accuracy/only_npi_licensor_present": 0.692,
50
+ "blimp/accuracy/expletive_it_object_raising": 0.787,
51
+ "blimp/accuracy/left_branch_island_simple_question": 0.768,
52
+ "blimp/accuracy/wh_questions_subject_gap": 0.938,
53
+ "blimp/accuracy/existential_there_quantifiers_2": 0.451,
54
+ "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.945,
55
+ "blimp/accuracy/sentential_negation_npi_scope": 0.616,
56
+ "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.837,
57
+ "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.909,
58
+ "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.916,
59
+ "blimp/accuracy/principle_A_case_2": 0.972,
60
+ "blimp/accuracy/distractor_agreement_relational_noun": 0.857,
61
+ "blimp/accuracy/sentential_negation_npi_licensor_present": 0.991,
62
+ "blimp/accuracy/superlative_quantifiers_1": 0.787,
63
+ "blimp/accuracy/wh_island": 0.803,
64
+ "blimp/accuracy/principle_A_domain_1": 0.988,
65
+ "blimp/accuracy/complex_NP_island": 0.564,
66
+ "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.975,
67
+ "blimp/accuracy/irregular_past_participle_verbs": 0.915,
68
+ "blimp/accuracy/drop_argument": 0.719,
69
+ "blimp/accuracy/wh_questions_object_gap": 0.855,
70
+ "blimp/accuracy/animate_subject_passive": 0.805,
71
+ "blimp/accuracy/existential_there_quantifiers_1": 0.979,
72
+ "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.903,
73
+ "blimp/accuracy/npi_present_2": 0.58,
74
+ "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.963,
75
+ "blimp/accuracy/anaphor_number_agreement": 0.995,
76
+ "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.972,
77
+ "blimp/accuracy/existential_there_object_raising": 0.872,
78
+ "blimp/accuracy/matrix_question_npi_licensor_present": 0.417,
79
+ "blimp/accuracy/npi_present_1": 0.575,
80
+ "blimp/accuracy/wh_vs_that_no_gap": 0.981,
81
+ "blimp/accuracy/left_branch_island_echo_question": 0.423,
82
+ "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.976,
83
+ "blimp/accuracy/causative": 0.744,
84
+ "blimp/accuracy/group_average": 0.8013880597014924,
85
+ "blimp/accuracy/seq_average": 0.8013880597014925,
86
+ "cbt/accuracy/NE": 0.8293269230769231,
87
+ "cbt/accuracy/V": 0.9432,
88
+ "cbt/accuracy/CN": 0.8912,
89
+ "cbt/accuracy/P": 0.9252,
90
+ "cbt/accuracy/group_average": 0.8972317307692308,
91
+ "cbt/accuracy/seq_average": 0.8972589035614246,
92
+ "hellaswag/accuracy/val": 0.37134037044413465,
93
+ "hellaswag/accuracy/group_average": 0.37134037044413465,
94
+ "hellaswag/accuracy/seq_average": 0.37134037044413465,
95
+ "piqa/accuracy/val": 0.6365614798694232,
96
+ "piqa/accuracy/group_average": 0.6365614798694232,
97
+ "piqa/accuracy/seq_average": 0.6365614798694232,
98
+ "ai2arc/accuracy/ARC-Easy": 0.38012684989429174,
99
+ "ai2arc/accuracy/ARC-Challenge": 0.2257510729613734,
100
+ "ai2arc/accuracy/group_average": 0.30293896142783255,
101
+ "ai2arc/accuracy/seq_average": 0.3291784702549575,
102
+ "race/accuracy/test/high": 0.29273870783304745,
103
+ "race/accuracy/test/middle": 0.36559888579387184,
104
+ "race/accuracy/group_average": 0.3291687968134597,
105
+ "race/accuracy/seq_average": 0.3139440616132955,
106
+ "siqa/accuracy/dev": 0.37871033776867963,
107
+ "siqa/accuracy/group_average": 0.37871033776867963,
108
+ "siqa/accuracy/seq_average": 0.37871033776867963,
109
+ "commonsenseqa/accuracy/dev_rand_split": 0.2784602784602785,
110
+ "commonsenseqa/accuracy/group_average": 0.2784602784602785,
111
+ "commonsenseqa/accuracy/seq_average": 0.2784602784602785
112
+ }