DavidNguyen commited on
Commit
abe6b1b
·
verified ·
1 Parent(s): ae9026b

1b8152d875db5fac3e5490e634ee15335ecd67f7cea83853f58c6eaf0243dc6e

Browse files
Pretrain_language_model/save/slimpajama_xmoe_no_attmoe_154M_standard_lb/export/result-model-10000.pth.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "val/loss": 3.082154289124504,
3
+ "val/accuracy": 0.4153355189732143,
4
+ "val/perplexity": 21.805326812077322,
5
+ "val/time_since_best_loss": 0,
6
+ "val/time_since_best_accuracy": 0,
7
+ "lambada/loss": 2.7326447860054346,
8
+ "lambada/accuracy/total": 0.14460403726708074,
9
+ "lambada/accuracy/openai_last_token": 0.7109860248447205,
10
+ "lambada/perplexity": 27.812959440862006,
11
+ "lambada/lm_loss": 3.6232970926431887,
12
+ "lambada/lm_perplexity": 37.460876410448684,
13
+ "lambada/time_since_best_loss": 0,
14
+ "lambada/time_since_best_accuracy": 0,
15
+ "mean_accuracy": 0.2799697781201475,
16
+ "mean_loss": 2.9073995375649693,
17
+ "blimp/accuracy/passive_2": 0.868,
18
+ "blimp/accuracy/determiner_noun_agreement_2": 0.965,
19
+ "blimp/accuracy/ellipsis_n_bar_1": 0.692,
20
+ "blimp/accuracy/tough_vs_raising_2": 0.774,
21
+ "blimp/accuracy/tough_vs_raising_1": 0.5,
22
+ "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.859,
23
+ "blimp/accuracy/principle_A_reconstruction": 0.333,
24
+ "blimp/accuracy/wh_vs_that_with_gap": 0.36,
25
+ "blimp/accuracy/principle_A_domain_2": 0.741,
26
+ "blimp/accuracy/determiner_noun_agreement_1": 0.98,
27
+ "blimp/accuracy/ellipsis_n_bar_2": 0.884,
28
+ "blimp/accuracy/principle_A_domain_3": 0.49,
29
+ "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.855,
30
+ "blimp/accuracy/animate_subject_trans": 0.858,
31
+ "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.773,
32
+ "blimp/accuracy/distractor_agreement_relative_clause": 0.486,
33
+ "blimp/accuracy/transitive": 0.792,
34
+ "blimp/accuracy/sentential_subject_island": 0.48,
35
+ "blimp/accuracy/adjunct_island": 0.763,
36
+ "blimp/accuracy/intransitive": 0.691,
37
+ "blimp/accuracy/existential_there_subject_raising": 0.768,
38
+ "blimp/accuracy/irregular_past_participle_adjectives": 0.986,
39
+ "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.135,
40
+ "blimp/accuracy/principle_A_case_1": 1.0,
41
+ "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.145,
42
+ "blimp/accuracy/only_npi_scope": 0.556,
43
+ "blimp/accuracy/superlative_quantifiers_2": 0.785,
44
+ "blimp/accuracy/passive_1": 0.883,
45
+ "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.853,
46
+ "blimp/accuracy/inchoative": 0.538,
47
+ "blimp/accuracy/anaphor_gender_agreement": 0.929,
48
+ "blimp/accuracy/principle_A_c_command": 0.459,
49
+ "blimp/accuracy/only_npi_licensor_present": 0.421,
50
+ "blimp/accuracy/expletive_it_object_raising": 0.767,
51
+ "blimp/accuracy/left_branch_island_simple_question": 0.143,
52
+ "blimp/accuracy/wh_questions_subject_gap": 0.917,
53
+ "blimp/accuracy/existential_there_quantifiers_2": 0.346,
54
+ "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.903,
55
+ "blimp/accuracy/sentential_negation_npi_scope": 0.393,
56
+ "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.747,
57
+ "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.888,
58
+ "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.79,
59
+ "blimp/accuracy/principle_A_case_2": 0.942,
60
+ "blimp/accuracy/distractor_agreement_relational_noun": 0.66,
61
+ "blimp/accuracy/sentential_negation_npi_licensor_present": 0.998,
62
+ "blimp/accuracy/superlative_quantifiers_1": 0.645,
63
+ "blimp/accuracy/wh_island": 0.779,
64
+ "blimp/accuracy/principle_A_domain_1": 0.951,
65
+ "blimp/accuracy/complex_NP_island": 0.554,
66
+ "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.899,
67
+ "blimp/accuracy/irregular_past_participle_verbs": 0.796,
68
+ "blimp/accuracy/drop_argument": 0.743,
69
+ "blimp/accuracy/wh_questions_object_gap": 0.735,
70
+ "blimp/accuracy/animate_subject_passive": 0.725,
71
+ "blimp/accuracy/existential_there_quantifiers_1": 0.96,
72
+ "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.891,
73
+ "blimp/accuracy/npi_present_2": 0.583,
74
+ "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.836,
75
+ "blimp/accuracy/anaphor_number_agreement": 0.971,
76
+ "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.938,
77
+ "blimp/accuracy/existential_there_object_raising": 0.718,
78
+ "blimp/accuracy/matrix_question_npi_licensor_present": 0.047,
79
+ "blimp/accuracy/npi_present_1": 0.492,
80
+ "blimp/accuracy/wh_vs_that_no_gap": 0.957,
81
+ "blimp/accuracy/left_branch_island_echo_question": 0.351,
82
+ "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.977,
83
+ "blimp/accuracy/causative": 0.64,
84
+ "blimp/accuracy/group_average": 0.7057313432835821,
85
+ "blimp/accuracy/seq_average": 0.7057313432835821,
86
+ "cbt/accuracy/NE": 0.6875,
87
+ "cbt/accuracy/V": 0.8544,
88
+ "cbt/accuracy/CN": 0.716,
89
+ "cbt/accuracy/P": 0.8212,
90
+ "cbt/accuracy/group_average": 0.7697750000000001,
91
+ "cbt/accuracy/seq_average": 0.7698079231692677,
92
+ "hellaswag/accuracy/val": 0.26628161720772753,
93
+ "hellaswag/accuracy/group_average": 0.26628161720772753,
94
+ "hellaswag/accuracy/seq_average": 0.26628161720772753,
95
+ "piqa/accuracy/val": 0.5631120783460283,
96
+ "piqa/accuracy/group_average": 0.5631120783460283,
97
+ "piqa/accuracy/seq_average": 0.5631120783460283,
98
+ "ai2arc/accuracy/ARC-Easy": 0.30528541226215644,
99
+ "ai2arc/accuracy/ARC-Challenge": 0.2034334763948498,
100
+ "ai2arc/accuracy/group_average": 0.2543594443285031,
101
+ "ai2arc/accuracy/seq_average": 0.271671388101983,
102
+ "race/accuracy/test/high": 0.2567181246426529,
103
+ "race/accuracy/test/middle": 0.31545961002785516,
104
+ "race/accuracy/group_average": 0.28608886733525407,
105
+ "race/accuracy/seq_average": 0.2738143494122416,
106
+ "siqa/accuracy/dev": 0.3638689866939611,
107
+ "siqa/accuracy/group_average": 0.3638689866939611,
108
+ "siqa/accuracy/seq_average": 0.3638689866939611,
109
+ "commonsenseqa/accuracy/dev_rand_split": 0.23095823095823095,
110
+ "commonsenseqa/accuracy/group_average": 0.23095823095823095,
111
+ "commonsenseqa/accuracy/seq_average": 0.23095823095823095
112
+ }
Pretrain_language_model/save/slimpajama_xmoe_no_attmoe_154M_standard_lb/export/result-model-100000.pth.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "val/loss": 2.6378416031125993,
3
+ "val/accuracy": 0.4758814251612103,
4
+ "val/perplexity": 13.982990170707938,
5
+ "val/time_since_best_loss": 0,
6
+ "val/time_since_best_accuracy": 0,
7
+ "lambada/loss": 2.591333803923234,
8
+ "lambada/accuracy/total": 0.24572981366459629,
9
+ "lambada/accuracy/openai_last_token": 0.7556288819875776,
10
+ "lambada/perplexity": 12.418063515018053,
11
+ "lambada/lm_loss": 3.2057600278248044,
12
+ "lambada/lm_perplexity": 24.674246005337423,
13
+ "lambada/time_since_best_loss": 0,
14
+ "lambada/time_since_best_accuracy": 0,
15
+ "mean_accuracy": 0.3608056194129033,
16
+ "mean_loss": 2.6145877035179166,
17
+ "blimp/accuracy/passive_2": 0.91,
18
+ "blimp/accuracy/determiner_noun_agreement_2": 0.985,
19
+ "blimp/accuracy/ellipsis_n_bar_1": 0.843,
20
+ "blimp/accuracy/tough_vs_raising_2": 0.848,
21
+ "blimp/accuracy/tough_vs_raising_1": 0.636,
22
+ "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.889,
23
+ "blimp/accuracy/principle_A_reconstruction": 0.423,
24
+ "blimp/accuracy/wh_vs_that_with_gap": 0.541,
25
+ "blimp/accuracy/principle_A_domain_2": 0.781,
26
+ "blimp/accuracy/determiner_noun_agreement_1": 0.987,
27
+ "blimp/accuracy/ellipsis_n_bar_2": 0.903,
28
+ "blimp/accuracy/principle_A_domain_3": 0.544,
29
+ "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.925,
30
+ "blimp/accuracy/animate_subject_trans": 0.898,
31
+ "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.898,
32
+ "blimp/accuracy/distractor_agreement_relative_clause": 0.684,
33
+ "blimp/accuracy/transitive": 0.847,
34
+ "blimp/accuracy/sentential_subject_island": 0.423,
35
+ "blimp/accuracy/adjunct_island": 0.825,
36
+ "blimp/accuracy/intransitive": 0.788,
37
+ "blimp/accuracy/existential_there_subject_raising": 0.845,
38
+ "blimp/accuracy/irregular_past_participle_adjectives": 0.846,
39
+ "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.252,
40
+ "blimp/accuracy/principle_A_case_1": 1.0,
41
+ "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.247,
42
+ "blimp/accuracy/only_npi_scope": 0.619,
43
+ "blimp/accuracy/superlative_quantifiers_2": 0.841,
44
+ "blimp/accuracy/passive_1": 0.894,
45
+ "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.902,
46
+ "blimp/accuracy/inchoative": 0.619,
47
+ "blimp/accuracy/anaphor_gender_agreement": 0.964,
48
+ "blimp/accuracy/principle_A_c_command": 0.614,
49
+ "blimp/accuracy/only_npi_licensor_present": 0.674,
50
+ "blimp/accuracy/expletive_it_object_raising": 0.794,
51
+ "blimp/accuracy/left_branch_island_simple_question": 0.338,
52
+ "blimp/accuracy/wh_questions_subject_gap": 0.916,
53
+ "blimp/accuracy/existential_there_quantifiers_2": 0.43,
54
+ "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.934,
55
+ "blimp/accuracy/sentential_negation_npi_scope": 0.569,
56
+ "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.824,
57
+ "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.868,
58
+ "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.875,
59
+ "blimp/accuracy/principle_A_case_2": 0.965,
60
+ "blimp/accuracy/distractor_agreement_relational_noun": 0.799,
61
+ "blimp/accuracy/sentential_negation_npi_licensor_present": 0.989,
62
+ "blimp/accuracy/superlative_quantifiers_1": 0.748,
63
+ "blimp/accuracy/wh_island": 0.782,
64
+ "blimp/accuracy/principle_A_domain_1": 0.976,
65
+ "blimp/accuracy/complex_NP_island": 0.563,
66
+ "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.964,
67
+ "blimp/accuracy/irregular_past_participle_verbs": 0.887,
68
+ "blimp/accuracy/drop_argument": 0.766,
69
+ "blimp/accuracy/wh_questions_object_gap": 0.79,
70
+ "blimp/accuracy/animate_subject_passive": 0.788,
71
+ "blimp/accuracy/existential_there_quantifiers_1": 0.957,
72
+ "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.899,
73
+ "blimp/accuracy/npi_present_2": 0.535,
74
+ "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.929,
75
+ "blimp/accuracy/anaphor_number_agreement": 0.984,
76
+ "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.956,
77
+ "blimp/accuracy/existential_there_object_raising": 0.851,
78
+ "blimp/accuracy/matrix_question_npi_licensor_present": 0.16,
79
+ "blimp/accuracy/npi_present_1": 0.525,
80
+ "blimp/accuracy/wh_vs_that_no_gap": 0.975,
81
+ "blimp/accuracy/left_branch_island_echo_question": 0.378,
82
+ "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.978,
83
+ "blimp/accuracy/causative": 0.687,
84
+ "blimp/accuracy/group_average": 0.7652835820895522,
85
+ "blimp/accuracy/seq_average": 0.7652835820895523,
86
+ "cbt/accuracy/NE": 0.7588141025641025,
87
+ "cbt/accuracy/V": 0.9068,
88
+ "cbt/accuracy/CN": 0.8128,
89
+ "cbt/accuracy/P": 0.8864,
90
+ "cbt/accuracy/group_average": 0.8412035256410257,
91
+ "cbt/accuracy/seq_average": 0.8412364945978391,
92
+ "hellaswag/accuracy/val": 0.29336785500896234,
93
+ "hellaswag/accuracy/group_average": 0.29336785500896234,
94
+ "hellaswag/accuracy/seq_average": 0.29336785500896234,
95
+ "piqa/accuracy/val": 0.5826985854189336,
96
+ "piqa/accuracy/group_average": 0.5826985854189336,
97
+ "piqa/accuracy/seq_average": 0.5826985854189336,
98
+ "ai2arc/accuracy/ARC-Easy": 0.3226215644820296,
99
+ "ai2arc/accuracy/ARC-Challenge": 0.2094420600858369,
100
+ "ai2arc/accuracy/group_average": 0.26603181228393324,
101
+ "ai2arc/accuracy/seq_average": 0.28526912181303116,
102
+ "race/accuracy/test/high": 0.26300743281875355,
103
+ "race/accuracy/test/middle": 0.3307799442896936,
104
+ "race/accuracy/group_average": 0.2968936885542236,
105
+ "race/accuracy/seq_average": 0.282732063234698,
106
+ "siqa/accuracy/dev": 0.3546571136131013,
107
+ "siqa/accuracy/group_average": 0.3546571136131013,
108
+ "siqa/accuracy/seq_average": 0.3546571136131013,
109
+ "commonsenseqa/accuracy/dev_rand_split": 0.24488124488124488,
110
+ "commonsenseqa/accuracy/group_average": 0.24488124488124488,
111
+ "commonsenseqa/accuracy/seq_average": 0.24488124488124488
112
+ }
Pretrain_language_model/save/slimpajama_xmoe_no_attmoe_154M_standard_lb/export/result-model-20000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.927763439360119, "val/accuracy": 0.4344094897073413, "val/perplexity": 18.68579182023476, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.665010250873447, "lambada/accuracy/total": 0.1907996894409938, "lambada/accuracy/openai_last_token": 0.7327251552795031, "lambada/perplexity": 19.304637599854928, "lambada/lm_loss": 3.4492595417288254, "lambada/lm_perplexity": 31.477076216079823, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.31260458957416754, "mean_loss": 2.796386845116783, "blimp/accuracy/passive_2": 0.899, "blimp/accuracy/determiner_noun_agreement_2": 0.949, "blimp/accuracy/ellipsis_n_bar_1": 0.781, "blimp/accuracy/tough_vs_raising_2": 0.823, "blimp/accuracy/tough_vs_raising_1": 0.575, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.876, "blimp/accuracy/principle_A_reconstruction": 0.477, "blimp/accuracy/wh_vs_that_with_gap": 0.513, "blimp/accuracy/principle_A_domain_2": 0.78, "blimp/accuracy/determiner_noun_agreement_1": 0.975, "blimp/accuracy/ellipsis_n_bar_2": 0.906, "blimp/accuracy/principle_A_domain_3": 0.524, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.861, "blimp/accuracy/animate_subject_trans": 0.871, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.807, "blimp/accuracy/distractor_agreement_relative_clause": 0.601, "blimp/accuracy/transitive": 0.84, "blimp/accuracy/sentential_subject_island": 0.432, "blimp/accuracy/adjunct_island": 0.73, "blimp/accuracy/intransitive": 0.695, "blimp/accuracy/existential_there_subject_raising": 0.832, "blimp/accuracy/irregular_past_participle_adjectives": 0.891, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.188, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.214, "blimp/accuracy/only_npi_scope": 0.499, "blimp/accuracy/superlative_quantifiers_2": 0.759, "blimp/accuracy/passive_1": 0.895, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.879, "blimp/accuracy/inchoative": 0.531, "blimp/accuracy/anaphor_gender_agreement": 0.911, "blimp/accuracy/principle_A_c_command": 0.535, "blimp/accuracy/only_npi_licensor_present": 0.442, "blimp/accuracy/expletive_it_object_raising": 0.766, "blimp/accuracy/left_branch_island_simple_question": 0.225, "blimp/accuracy/wh_questions_subject_gap": 0.885, "blimp/accuracy/existential_there_quantifiers_2": 0.533, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.886, "blimp/accuracy/sentential_negation_npi_scope": 0.377, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.788, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.874, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.834, "blimp/accuracy/principle_A_case_2": 0.964, "blimp/accuracy/distractor_agreement_relational_noun": 0.76, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.969, "blimp/accuracy/superlative_quantifiers_1": 0.657, "blimp/accuracy/wh_island": 0.734, "blimp/accuracy/principle_A_domain_1": 0.983, "blimp/accuracy/complex_NP_island": 0.568, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.905, "blimp/accuracy/irregular_past_participle_verbs": 0.892, "blimp/accuracy/drop_argument": 0.732, "blimp/accuracy/wh_questions_object_gap": 0.775, "blimp/accuracy/animate_subject_passive": 0.752, "blimp/accuracy/existential_there_quantifiers_1": 0.982, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.889, "blimp/accuracy/npi_present_2": 0.587, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.881, "blimp/accuracy/anaphor_number_agreement": 0.961, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.932, "blimp/accuracy/existential_there_object_raising": 0.716, "blimp/accuracy/matrix_question_npi_licensor_present": 0.101, "blimp/accuracy/npi_present_1": 0.553, "blimp/accuracy/wh_vs_that_no_gap": 0.951, "blimp/accuracy/left_branch_island_echo_question": 0.332, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.973, "blimp/accuracy/causative": 0.693, "blimp/accuracy/group_average": 0.7298656716417911, "blimp/accuracy/seq_average": 0.7298656716417911, "cbt/accuracy/NE": 0.6955128205128205, "cbt/accuracy/V": 0.8748, "cbt/accuracy/CN": 0.7608, "cbt/accuracy/P": 0.8508, "cbt/accuracy/group_average": 0.7954782051282051, "cbt/accuracy/seq_average": 0.7955182072829131, "hellaswag/accuracy/val": 0.2741485759808803, "hellaswag/accuracy/group_average": 0.2741485759808803, "hellaswag/accuracy/seq_average": 0.2741485759808803, "piqa/accuracy/val": 0.5625680087051143, "piqa/accuracy/group_average": 0.5625680087051143, "piqa/accuracy/seq_average": 0.5625680087051143, "ai2arc/accuracy/ARC-Easy": 0.29978858350951376, "ai2arc/accuracy/ARC-Challenge": 0.20429184549356222, "ai2arc/accuracy/group_average": 0.252040214501538, "ai2arc/accuracy/seq_average": 0.26827195467422094, "race/accuracy/test/high": 0.25443110348770726, "race/accuracy/test/middle": 0.31337047353760444, "race/accuracy/group_average": 0.2839007885126559, "race/accuracy/seq_average": 0.2715849209566275, "siqa/accuracy/dev": 0.3633572159672467, "siqa/accuracy/group_average": 0.3633572159672467, "siqa/accuracy/seq_average": 0.3633572159672467, "commonsenseqa/accuracy/dev_rand_split": 0.23423423423423423, "commonsenseqa/accuracy/group_average": 0.23423423423423423, "commonsenseqa/accuracy/seq_average": 0.23423423423423423}
Pretrain_language_model/save/slimpajama_xmoe_no_attmoe_154M_standard_lb/export/result-model-30000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.8461814759269592, "val/accuracy": 0.4464498852926587, "val/perplexity": 17.221893906830505, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.6310036226829387, "lambada/accuracy/total": 0.1999223602484472, "lambada/accuracy/openai_last_token": 0.7404891304347826, "lambada/perplexity": 17.055114946895998, "lambada/lm_loss": 3.4029719950049446, "lambada/lm_perplexity": 30.053285667027353, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.323186122770553, "mean_loss": 2.7385925493049488, "blimp/accuracy/passive_2": 0.87, "blimp/accuracy/determiner_noun_agreement_2": 0.969, "blimp/accuracy/ellipsis_n_bar_1": 0.802, "blimp/accuracy/tough_vs_raising_2": 0.85, "blimp/accuracy/tough_vs_raising_1": 0.603, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.88, "blimp/accuracy/principle_A_reconstruction": 0.352, "blimp/accuracy/wh_vs_that_with_gap": 0.612, "blimp/accuracy/principle_A_domain_2": 0.742, "blimp/accuracy/determiner_noun_agreement_1": 0.983, "blimp/accuracy/ellipsis_n_bar_2": 0.897, "blimp/accuracy/principle_A_domain_3": 0.538, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.881, "blimp/accuracy/animate_subject_trans": 0.894, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.851, "blimp/accuracy/distractor_agreement_relative_clause": 0.632, "blimp/accuracy/transitive": 0.828, "blimp/accuracy/sentential_subject_island": 0.496, "blimp/accuracy/adjunct_island": 0.798, "blimp/accuracy/intransitive": 0.758, "blimp/accuracy/existential_there_subject_raising": 0.859, "blimp/accuracy/irregular_past_participle_adjectives": 0.906, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.179, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.298, "blimp/accuracy/only_npi_scope": 0.683, "blimp/accuracy/superlative_quantifiers_2": 0.736, "blimp/accuracy/passive_1": 0.89, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.868, "blimp/accuracy/inchoative": 0.598, "blimp/accuracy/anaphor_gender_agreement": 0.949, "blimp/accuracy/principle_A_c_command": 0.563, "blimp/accuracy/only_npi_licensor_present": 0.53, "blimp/accuracy/expletive_it_object_raising": 0.769, "blimp/accuracy/left_branch_island_simple_question": 0.226, "blimp/accuracy/wh_questions_subject_gap": 0.856, "blimp/accuracy/existential_there_quantifiers_2": 0.37, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.908, "blimp/accuracy/sentential_negation_npi_scope": 0.48, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.818, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.813, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.845, "blimp/accuracy/principle_A_case_2": 0.957, "blimp/accuracy/distractor_agreement_relational_noun": 0.789, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.979, "blimp/accuracy/superlative_quantifiers_1": 0.846, "blimp/accuracy/wh_island": 0.676, "blimp/accuracy/principle_A_domain_1": 0.976, "blimp/accuracy/complex_NP_island": 0.515, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.946, "blimp/accuracy/irregular_past_participle_verbs": 0.827, "blimp/accuracy/drop_argument": 0.769, "blimp/accuracy/wh_questions_object_gap": 0.711, "blimp/accuracy/animate_subject_passive": 0.808, "blimp/accuracy/existential_there_quantifiers_1": 0.977, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.86, "blimp/accuracy/npi_present_2": 0.571, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.918, "blimp/accuracy/anaphor_number_agreement": 0.972, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.937, "blimp/accuracy/existential_there_object_raising": 0.737, "blimp/accuracy/matrix_question_npi_licensor_present": 0.128, "blimp/accuracy/npi_present_1": 0.53, "blimp/accuracy/wh_vs_that_no_gap": 0.918, "blimp/accuracy/left_branch_island_echo_question": 0.219, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.937, "blimp/accuracy/causative": 0.691, "blimp/accuracy/group_average": 0.7399104477611937, "blimp/accuracy/seq_average": 0.7399104477611941, "cbt/accuracy/NE": 0.7263621794871795, "cbt/accuracy/V": 0.8844, "cbt/accuracy/CN": 0.7764, "cbt/accuracy/P": 0.8556, "cbt/accuracy/group_average": 0.8106905448717949, "cbt/accuracy/seq_average": 0.8107242897158864, "hellaswag/accuracy/val": 0.27673770165305717, "hellaswag/accuracy/group_average": 0.27673770165305717, "hellaswag/accuracy/seq_average": 0.27673770165305717, "piqa/accuracy/val": 0.5799782372143635, "piqa/accuracy/group_average": 0.5799782372143635, "piqa/accuracy/seq_average": 0.5799782372143635, "ai2arc/accuracy/ARC-Easy": 0.3090909090909091, "ai2arc/accuracy/ARC-Challenge": 0.21373390557939914, "ai2arc/accuracy/group_average": 0.26141240733515414, "ai2arc/accuracy/seq_average": 0.2776203966005666, "race/accuracy/test/high": 0.26043453401943967, "race/accuracy/test/middle": 0.318941504178273, "race/accuracy/group_average": 0.28968801909885633, "race/accuracy/seq_average": 0.27746250506688286, "siqa/accuracy/dev": 0.3623336745138178, "siqa/accuracy/group_average": 0.3623336745138178, "siqa/accuracy/seq_average": 0.3623336745138178, "commonsenseqa/accuracy/dev_rand_split": 0.23505323505323505, "commonsenseqa/accuracy/group_average": 0.23505323505323505, "commonsenseqa/accuracy/seq_average": 0.23505323505323505}
Pretrain_language_model/save/slimpajama_xmoe_no_attmoe_154M_standard_lb/export/result-model-40000.pth.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "val/loss": 2.7919435046968006,
3
+ "val/accuracy": 0.45302230592757936,
4
+ "val/perplexity": 16.312692808591017,
5
+ "val/time_since_best_loss": 0,
6
+ "val/time_since_best_accuracy": 0,
7
+ "lambada/loss": 2.6135883212829967,
8
+ "lambada/accuracy/total": 0.19157608695652173,
9
+ "lambada/accuracy/openai_last_token": 0.7393245341614907,
10
+ "lambada/perplexity": 16.95823182935926,
11
+ "lambada/lm_loss": 3.3357095115573507,
12
+ "lambada/lm_perplexity": 28.09831223030425,
13
+ "lambada/time_since_best_loss": 0,
14
+ "lambada/time_since_best_accuracy": 0,
15
+ "mean_accuracy": 0.3222991964420505,
16
+ "mean_loss": 2.7027659129898987,
17
+ "blimp/accuracy/passive_2": 0.878,
18
+ "blimp/accuracy/determiner_noun_agreement_2": 0.973,
19
+ "blimp/accuracy/ellipsis_n_bar_1": 0.822,
20
+ "blimp/accuracy/tough_vs_raising_2": 0.87,
21
+ "blimp/accuracy/tough_vs_raising_1": 0.592,
22
+ "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.88,
23
+ "blimp/accuracy/principle_A_reconstruction": 0.344,
24
+ "blimp/accuracy/wh_vs_that_with_gap": 0.533,
25
+ "blimp/accuracy/principle_A_domain_2": 0.762,
26
+ "blimp/accuracy/determiner_noun_agreement_1": 0.987,
27
+ "blimp/accuracy/ellipsis_n_bar_2": 0.893,
28
+ "blimp/accuracy/principle_A_domain_3": 0.538,
29
+ "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.88,
30
+ "blimp/accuracy/animate_subject_trans": 0.896,
31
+ "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.875,
32
+ "blimp/accuracy/distractor_agreement_relative_clause": 0.73,
33
+ "blimp/accuracy/transitive": 0.851,
34
+ "blimp/accuracy/sentential_subject_island": 0.411,
35
+ "blimp/accuracy/adjunct_island": 0.773,
36
+ "blimp/accuracy/intransitive": 0.743,
37
+ "blimp/accuracy/existential_there_subject_raising": 0.858,
38
+ "blimp/accuracy/irregular_past_participle_adjectives": 0.857,
39
+ "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.132,
40
+ "blimp/accuracy/principle_A_case_1": 1.0,
41
+ "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.217,
42
+ "blimp/accuracy/only_npi_scope": 0.541,
43
+ "blimp/accuracy/superlative_quantifiers_2": 0.789,
44
+ "blimp/accuracy/passive_1": 0.897,
45
+ "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.867,
46
+ "blimp/accuracy/inchoative": 0.591,
47
+ "blimp/accuracy/anaphor_gender_agreement": 0.966,
48
+ "blimp/accuracy/principle_A_c_command": 0.551,
49
+ "blimp/accuracy/only_npi_licensor_present": 0.783,
50
+ "blimp/accuracy/expletive_it_object_raising": 0.793,
51
+ "blimp/accuracy/left_branch_island_simple_question": 0.187,
52
+ "blimp/accuracy/wh_questions_subject_gap": 0.908,
53
+ "blimp/accuracy/existential_there_quantifiers_2": 0.359,
54
+ "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.912,
55
+ "blimp/accuracy/sentential_negation_npi_scope": 0.558,
56
+ "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.807,
57
+ "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.889,
58
+ "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.876,
59
+ "blimp/accuracy/principle_A_case_2": 0.959,
60
+ "blimp/accuracy/distractor_agreement_relational_noun": 0.817,
61
+ "blimp/accuracy/sentential_negation_npi_licensor_present": 0.993,
62
+ "blimp/accuracy/superlative_quantifiers_1": 0.793,
63
+ "blimp/accuracy/wh_island": 0.709,
64
+ "blimp/accuracy/principle_A_domain_1": 0.952,
65
+ "blimp/accuracy/complex_NP_island": 0.538,
66
+ "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.953,
67
+ "blimp/accuracy/irregular_past_participle_verbs": 0.901,
68
+ "blimp/accuracy/drop_argument": 0.782,
69
+ "blimp/accuracy/wh_questions_object_gap": 0.764,
70
+ "blimp/accuracy/animate_subject_passive": 0.756,
71
+ "blimp/accuracy/existential_there_quantifiers_1": 0.965,
72
+ "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.874,
73
+ "blimp/accuracy/npi_present_2": 0.49,
74
+ "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.915,
75
+ "blimp/accuracy/anaphor_number_agreement": 0.98,
76
+ "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.948,
77
+ "blimp/accuracy/existential_there_object_raising": 0.811,
78
+ "blimp/accuracy/matrix_question_npi_licensor_present": 0.098,
79
+ "blimp/accuracy/npi_present_1": 0.456,
80
+ "blimp/accuracy/wh_vs_that_no_gap": 0.97,
81
+ "blimp/accuracy/left_branch_island_echo_question": 0.313,
82
+ "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.969,
83
+ "blimp/accuracy/causative": 0.687,
84
+ "blimp/accuracy/group_average": 0.7457014925373137,
85
+ "blimp/accuracy/seq_average": 0.7457014925373134,
86
+ "cbt/accuracy/NE": 0.7287660256410257,
87
+ "cbt/accuracy/V": 0.8904,
88
+ "cbt/accuracy/CN": 0.78,
89
+ "cbt/accuracy/P": 0.8556,
90
+ "cbt/accuracy/group_average": 0.8136915064102563,
91
+ "cbt/accuracy/seq_average": 0.8137254901960784,
92
+ "hellaswag/accuracy/val": 0.2802230631348337,
93
+ "hellaswag/accuracy/group_average": 0.2802230631348337,
94
+ "hellaswag/accuracy/seq_average": 0.2802230631348337,
95
+ "piqa/accuracy/val": 0.5832426550598476,
96
+ "piqa/accuracy/group_average": 0.5832426550598476,
97
+ "piqa/accuracy/seq_average": 0.5832426550598476,
98
+ "ai2arc/accuracy/ARC-Easy": 0.3120507399577167,
99
+ "ai2arc/accuracy/ARC-Challenge": 0.20085836909871244,
100
+ "ai2arc/accuracy/group_average": 0.2564545545282146,
101
+ "ai2arc/accuracy/seq_average": 0.2753541076487252,
102
+ "race/accuracy/test/high": 0.26157804459691253,
103
+ "race/accuracy/test/middle": 0.3321727019498607,
104
+ "race/accuracy/group_average": 0.29687537327338664,
105
+ "race/accuracy/seq_average": 0.2821240372922578,
106
+ "siqa/accuracy/dev": 0.3572159672466735,
107
+ "siqa/accuracy/group_average": 0.3572159672466735,
108
+ "siqa/accuracy/seq_average": 0.3572159672466735,
109
+ "commonsenseqa/accuracy/dev_rand_split": 0.23587223587223588,
110
+ "commonsenseqa/accuracy/group_average": 0.23587223587223588,
111
+ "commonsenseqa/accuracy/seq_average": 0.23587223587223588
112
+ }
Pretrain_language_model/save/slimpajama_xmoe_no_attmoe_154M_standard_lb/export/result-model-50000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.744597904265873, "val/accuracy": 0.4605761331225198, "val/perplexity": 15.558356725401492, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.608001708984375, "lambada/accuracy/total": 0.21486801242236025, "lambada/accuracy/openai_last_token": 0.749805900621118, "lambada/perplexity": 14.954680203004754, "lambada/lm_loss": 3.318650318895724, "lambada/lm_perplexity": 27.62304308828747, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.33772207277244004, "mean_loss": 2.6762998066251242, "blimp/accuracy/passive_2": 0.896, "blimp/accuracy/determiner_noun_agreement_2": 0.98, "blimp/accuracy/ellipsis_n_bar_1": 0.826, "blimp/accuracy/tough_vs_raising_2": 0.861, "blimp/accuracy/tough_vs_raising_1": 0.574, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.892, "blimp/accuracy/principle_A_reconstruction": 0.26, "blimp/accuracy/wh_vs_that_with_gap": 0.487, "blimp/accuracy/principle_A_domain_2": 0.801, "blimp/accuracy/determiner_noun_agreement_1": 0.99, "blimp/accuracy/ellipsis_n_bar_2": 0.904, "blimp/accuracy/principle_A_domain_3": 0.561, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.886, "blimp/accuracy/animate_subject_trans": 0.876, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.86, "blimp/accuracy/distractor_agreement_relative_clause": 0.637, "blimp/accuracy/transitive": 0.838, "blimp/accuracy/sentential_subject_island": 0.437, "blimp/accuracy/adjunct_island": 0.727, "blimp/accuracy/intransitive": 0.757, "blimp/accuracy/existential_there_subject_raising": 0.849, "blimp/accuracy/irregular_past_participle_adjectives": 0.882, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.216, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.189, "blimp/accuracy/only_npi_scope": 0.625, "blimp/accuracy/superlative_quantifiers_2": 0.767, "blimp/accuracy/passive_1": 0.894, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.889, "blimp/accuracy/inchoative": 0.577, "blimp/accuracy/anaphor_gender_agreement": 0.95, "blimp/accuracy/principle_A_c_command": 0.574, "blimp/accuracy/only_npi_licensor_present": 0.409, "blimp/accuracy/expletive_it_object_raising": 0.793, "blimp/accuracy/left_branch_island_simple_question": 0.249, "blimp/accuracy/wh_questions_subject_gap": 0.93, "blimp/accuracy/existential_there_quantifiers_2": 0.279, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.912, "blimp/accuracy/sentential_negation_npi_scope": 0.459, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.827, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.903, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.847, "blimp/accuracy/principle_A_case_2": 0.951, "blimp/accuracy/distractor_agreement_relational_noun": 0.78, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.988, "blimp/accuracy/superlative_quantifiers_1": 0.713, "blimp/accuracy/wh_island": 0.793, "blimp/accuracy/principle_A_domain_1": 0.982, "blimp/accuracy/complex_NP_island": 0.534, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.954, "blimp/accuracy/irregular_past_participle_verbs": 0.845, "blimp/accuracy/drop_argument": 0.75, "blimp/accuracy/wh_questions_object_gap": 0.828, "blimp/accuracy/animate_subject_passive": 0.798, "blimp/accuracy/existential_there_quantifiers_1": 0.961, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.894, "blimp/accuracy/npi_present_2": 0.556, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.91, "blimp/accuracy/anaphor_number_agreement": 0.976, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.94, "blimp/accuracy/existential_there_object_raising": 0.833, "blimp/accuracy/matrix_question_npi_licensor_present": 0.151, "blimp/accuracy/npi_present_1": 0.468, "blimp/accuracy/wh_vs_that_no_gap": 0.978, "blimp/accuracy/left_branch_island_echo_question": 0.35, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.98, "blimp/accuracy/causative": 0.69, "blimp/accuracy/group_average": 0.7413880597014924, "blimp/accuracy/seq_average": 0.7413880597014926, "cbt/accuracy/NE": 0.7303685897435898, "cbt/accuracy/V": 0.8976, "cbt/accuracy/CN": 0.7972, "cbt/accuracy/P": 0.87, "cbt/accuracy/group_average": 0.8237921474358975, "cbt/accuracy/seq_average": 0.8238295318127251, "hellaswag/accuracy/val": 0.2846046604262099, "hellaswag/accuracy/group_average": 0.2846046604262099, "hellaswag/accuracy/seq_average": 0.2846046604262099, "piqa/accuracy/val": 0.5712731229597389, "piqa/accuracy/group_average": 0.5712731229597389, "piqa/accuracy/seq_average": 0.5712731229597389, "ai2arc/accuracy/ARC-Easy": 0.3120507399577167, "ai2arc/accuracy/ARC-Challenge": 0.21201716738197424, "ai2arc/accuracy/group_average": 0.2620339536698455, "ai2arc/accuracy/seq_average": 0.2790368271954674, "race/accuracy/test/high": 0.2564322469982847, "race/accuracy/test/middle": 0.3279944289693593, "race/accuracy/group_average": 0.292213337983822, "race/accuracy/seq_average": 0.2772598297527361, "siqa/accuracy/dev": 0.35977482088024565, "siqa/accuracy/group_average": 0.35977482088024565, "siqa/accuracy/seq_average": 0.35977482088024565, "commonsenseqa/accuracy/dev_rand_split": 0.23587223587223588, "commonsenseqa/accuracy/group_average": 0.23587223587223588, "commonsenseqa/accuracy/seq_average": 0.23587223587223588}
Pretrain_language_model/save/slimpajama_xmoe_no_attmoe_154M_standard_lb/export/result-model-60000.pth.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "val/loss": 2.7088145906963046,
3
+ "val/accuracy": 0.4653746589781746,
4
+ "val/perplexity": 15.011470227214083,
5
+ "val/time_since_best_loss": 0,
6
+ "val/time_since_best_accuracy": 0,
7
+ "lambada/loss": 2.5780768542556287,
8
+ "lambada/accuracy/total": 0.22243788819875776,
9
+ "lambada/accuracy/openai_last_token": 0.750194099378882,
10
+ "lambada/perplexity": 14.345165224725555,
11
+ "lambada/lm_loss": 3.264494879230292,
12
+ "lambada/lm_perplexity": 26.16689021233424,
13
+ "lambada/time_since_best_loss": 0,
14
+ "lambada/time_since_best_accuracy": 0,
15
+ "mean_accuracy": 0.34390627358846615,
16
+ "mean_loss": 2.643445722475967,
17
+ "blimp/accuracy/passive_2": 0.905,
18
+ "blimp/accuracy/determiner_noun_agreement_2": 0.975,
19
+ "blimp/accuracy/ellipsis_n_bar_1": 0.842,
20
+ "blimp/accuracy/tough_vs_raising_2": 0.858,
21
+ "blimp/accuracy/tough_vs_raising_1": 0.609,
22
+ "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.89,
23
+ "blimp/accuracy/principle_A_reconstruction": 0.407,
24
+ "blimp/accuracy/wh_vs_that_with_gap": 0.55,
25
+ "blimp/accuracy/principle_A_domain_2": 0.782,
26
+ "blimp/accuracy/determiner_noun_agreement_1": 0.983,
27
+ "blimp/accuracy/ellipsis_n_bar_2": 0.895,
28
+ "blimp/accuracy/principle_A_domain_3": 0.578,
29
+ "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.902,
30
+ "blimp/accuracy/animate_subject_trans": 0.891,
31
+ "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.879,
32
+ "blimp/accuracy/distractor_agreement_relative_clause": 0.691,
33
+ "blimp/accuracy/transitive": 0.858,
34
+ "blimp/accuracy/sentential_subject_island": 0.435,
35
+ "blimp/accuracy/adjunct_island": 0.793,
36
+ "blimp/accuracy/intransitive": 0.784,
37
+ "blimp/accuracy/existential_there_subject_raising": 0.845,
38
+ "blimp/accuracy/irregular_past_participle_adjectives": 0.98,
39
+ "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.223,
40
+ "blimp/accuracy/principle_A_case_1": 1.0,
41
+ "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.24,
42
+ "blimp/accuracy/only_npi_scope": 0.688,
43
+ "blimp/accuracy/superlative_quantifiers_2": 0.669,
44
+ "blimp/accuracy/passive_1": 0.891,
45
+ "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.892,
46
+ "blimp/accuracy/inchoative": 0.626,
47
+ "blimp/accuracy/anaphor_gender_agreement": 0.972,
48
+ "blimp/accuracy/principle_A_c_command": 0.617,
49
+ "blimp/accuracy/only_npi_licensor_present": 0.586,
50
+ "blimp/accuracy/expletive_it_object_raising": 0.775,
51
+ "blimp/accuracy/left_branch_island_simple_question": 0.27,
52
+ "blimp/accuracy/wh_questions_subject_gap": 0.924,
53
+ "blimp/accuracy/existential_there_quantifiers_2": 0.344,
54
+ "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.919,
55
+ "blimp/accuracy/sentential_negation_npi_scope": 0.498,
56
+ "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.821,
57
+ "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.862,
58
+ "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.865,
59
+ "blimp/accuracy/principle_A_case_2": 0.948,
60
+ "blimp/accuracy/distractor_agreement_relational_noun": 0.809,
61
+ "blimp/accuracy/sentential_negation_npi_licensor_present": 0.993,
62
+ "blimp/accuracy/superlative_quantifiers_1": 0.831,
63
+ "blimp/accuracy/wh_island": 0.78,
64
+ "blimp/accuracy/principle_A_domain_1": 0.981,
65
+ "blimp/accuracy/complex_NP_island": 0.558,
66
+ "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.951,
67
+ "blimp/accuracy/irregular_past_participle_verbs": 0.893,
68
+ "blimp/accuracy/drop_argument": 0.782,
69
+ "blimp/accuracy/wh_questions_object_gap": 0.801,
70
+ "blimp/accuracy/animate_subject_passive": 0.791,
71
+ "blimp/accuracy/existential_there_quantifiers_1": 0.949,
72
+ "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.914,
73
+ "blimp/accuracy/npi_present_2": 0.493,
74
+ "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.916,
75
+ "blimp/accuracy/anaphor_number_agreement": 0.977,
76
+ "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.944,
77
+ "blimp/accuracy/existential_there_object_raising": 0.785,
78
+ "blimp/accuracy/matrix_question_npi_licensor_present": 0.133,
79
+ "blimp/accuracy/npi_present_1": 0.462,
80
+ "blimp/accuracy/wh_vs_that_no_gap": 0.978,
81
+ "blimp/accuracy/left_branch_island_echo_question": 0.32,
82
+ "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.978,
83
+ "blimp/accuracy/causative": 0.696,
84
+ "blimp/accuracy/group_average": 0.7563731343283583,
85
+ "blimp/accuracy/seq_average": 0.7563731343283582,
86
+ "cbt/accuracy/NE": 0.7451923076923077,
87
+ "cbt/accuracy/V": 0.8992,
88
+ "cbt/accuracy/CN": 0.7976,
89
+ "cbt/accuracy/P": 0.8748,
90
+ "cbt/accuracy/group_average": 0.8291980769230769,
91
+ "cbt/accuracy/seq_average": 0.8292316926770709,
92
+ "hellaswag/accuracy/val": 0.286795459071898,
93
+ "hellaswag/accuracy/group_average": 0.286795459071898,
94
+ "hellaswag/accuracy/seq_average": 0.286795459071898,
95
+ "piqa/accuracy/val": 0.5854189336235038,
96
+ "piqa/accuracy/group_average": 0.5854189336235038,
97
+ "piqa/accuracy/seq_average": 0.5854189336235038,
98
+ "ai2arc/accuracy/ARC-Easy": 0.3150105708245243,
99
+ "ai2arc/accuracy/ARC-Challenge": 0.20429184549356222,
100
+ "ai2arc/accuracy/group_average": 0.2596512081590433,
101
+ "ai2arc/accuracy/seq_average": 0.2784702549575071,
102
+ "race/accuracy/test/high": 0.26014865637507145,
103
+ "race/accuracy/test/middle": 0.33008356545961004,
104
+ "race/accuracy/group_average": 0.29511611091734075,
105
+ "race/accuracy/seq_average": 0.2805026347790839,
106
+ "siqa/accuracy/dev": 0.3490276356192426,
107
+ "siqa/accuracy/group_average": 0.3490276356192426,
108
+ "siqa/accuracy/seq_average": 0.3490276356192426,
109
+ "commonsenseqa/accuracy/dev_rand_split": 0.23505323505323505,
110
+ "commonsenseqa/accuracy/group_average": 0.23505323505323505,
111
+ "commonsenseqa/accuracy/seq_average": 0.23505323505323505
112
+ }
Pretrain_language_model/save/slimpajama_xmoe_no_attmoe_154M_standard_lb/export/result-model-70000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.6803521050347223, "val/accuracy": 0.46969749813988093, "val/perplexity": 14.590229684883312, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.6440372822447595, "lambada/accuracy/total": 0.23020186335403728, "lambada/accuracy/openai_last_token": 0.7542701863354038, "lambada/perplexity": 13.618648521677962, "lambada/lm_loss": 3.2510871683233042, "lambada/lm_perplexity": 25.818393604575284, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.3499496807469591, "mean_loss": 2.6621946936397407, "blimp/accuracy/passive_2": 0.905, "blimp/accuracy/determiner_noun_agreement_2": 0.98, "blimp/accuracy/ellipsis_n_bar_1": 0.843, "blimp/accuracy/tough_vs_raising_2": 0.823, "blimp/accuracy/tough_vs_raising_1": 0.619, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.856, "blimp/accuracy/principle_A_reconstruction": 0.394, "blimp/accuracy/wh_vs_that_with_gap": 0.541, "blimp/accuracy/principle_A_domain_2": 0.789, "blimp/accuracy/determiner_noun_agreement_1": 0.987, "blimp/accuracy/ellipsis_n_bar_2": 0.909, "blimp/accuracy/principle_A_domain_3": 0.572, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.91, "blimp/accuracy/animate_subject_trans": 0.906, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.899, "blimp/accuracy/distractor_agreement_relative_clause": 0.694, "blimp/accuracy/transitive": 0.863, "blimp/accuracy/sentential_subject_island": 0.427, "blimp/accuracy/adjunct_island": 0.784, "blimp/accuracy/intransitive": 0.762, "blimp/accuracy/existential_there_subject_raising": 0.841, "blimp/accuracy/irregular_past_participle_adjectives": 0.87, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.226, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.304, "blimp/accuracy/only_npi_scope": 0.684, "blimp/accuracy/superlative_quantifiers_2": 0.813, "blimp/accuracy/passive_1": 0.894, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.881, "blimp/accuracy/inchoative": 0.587, "blimp/accuracy/anaphor_gender_agreement": 0.962, "blimp/accuracy/principle_A_c_command": 0.605, "blimp/accuracy/only_npi_licensor_present": 0.402, "blimp/accuracy/expletive_it_object_raising": 0.782, "blimp/accuracy/left_branch_island_simple_question": 0.282, "blimp/accuracy/wh_questions_subject_gap": 0.92, "blimp/accuracy/existential_there_quantifiers_2": 0.307, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.926, "blimp/accuracy/sentential_negation_npi_scope": 0.552, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.835, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.839, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.865, "blimp/accuracy/principle_A_case_2": 0.956, "blimp/accuracy/distractor_agreement_relational_noun": 0.797, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.994, "blimp/accuracy/superlative_quantifiers_1": 0.829, "blimp/accuracy/wh_island": 0.756, "blimp/accuracy/principle_A_domain_1": 0.976, "blimp/accuracy/complex_NP_island": 0.553, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.958, "blimp/accuracy/irregular_past_participle_verbs": 0.863, "blimp/accuracy/drop_argument": 0.763, "blimp/accuracy/wh_questions_object_gap": 0.788, "blimp/accuracy/animate_subject_passive": 0.789, "blimp/accuracy/existential_there_quantifiers_1": 0.959, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.904, "blimp/accuracy/npi_present_2": 0.47, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.914, "blimp/accuracy/anaphor_number_agreement": 0.979, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.954, "blimp/accuracy/existential_there_object_raising": 0.843, "blimp/accuracy/matrix_question_npi_licensor_present": 0.148, "blimp/accuracy/npi_present_1": 0.469, "blimp/accuracy/wh_vs_that_no_gap": 0.974, "blimp/accuracy/left_branch_island_echo_question": 0.326, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.967, "blimp/accuracy/causative": 0.695, "blimp/accuracy/group_average": 0.7531940298507461, "blimp/accuracy/seq_average": 0.7531940298507462, "cbt/accuracy/NE": 0.7455929487179487, "cbt/accuracy/V": 0.8984, "cbt/accuracy/CN": 0.7996, "cbt/accuracy/P": 0.8752, "cbt/accuracy/group_average": 0.8296982371794871, "cbt/accuracy/seq_average": 0.8297318927571028, "hellaswag/accuracy/val": 0.2865962955586537, "hellaswag/accuracy/group_average": 0.2865962955586537, "hellaswag/accuracy/seq_average": 0.2865962955586537, "piqa/accuracy/val": 0.5865070729053319, "piqa/accuracy/group_average": 0.5865070729053319, "piqa/accuracy/seq_average": 0.5865070729053319, "ai2arc/accuracy/ARC-Easy": 0.32642706131078225, "ai2arc/accuracy/ARC-Challenge": 0.20515021459227467, "ai2arc/accuracy/group_average": 0.26578863795152846, "ai2arc/accuracy/seq_average": 0.2864022662889518, "race/accuracy/test/high": 0.2612921669525443, "race/accuracy/test/middle": 0.3286908077994429, "race/accuracy/group_average": 0.29499148737599357, "race/accuracy/seq_average": 0.2809079854073774, "siqa/accuracy/dev": 0.3602865916069601, "siqa/accuracy/group_average": 0.3602865916069601, "siqa/accuracy/seq_average": 0.3602865916069601, "commonsenseqa/accuracy/dev_rand_split": 0.23095823095823095, "commonsenseqa/accuracy/group_average": 0.23095823095823095, "commonsenseqa/accuracy/seq_average": 0.23095823095823095}
Pretrain_language_model/save/slimpajama_xmoe_no_attmoe_154M_standard_lb/export/result-model-80000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.6567273821149553, "val/accuracy": 0.4727124410962302, "val/perplexity": 14.24957928066503, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.5068359375, "lambada/accuracy/total": 0.23486024844720496, "lambada/accuracy/openai_last_token": 0.7585403726708074, "lambada/perplexity": 13.102069321724693, "lambada/lm_loss": 3.226651154896998, "lambada/lm_perplexity": 25.195140910094015, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.35378634477171755, "mean_loss": 2.5817816598074774, "blimp/accuracy/passive_2": 0.908, "blimp/accuracy/determiner_noun_agreement_2": 0.987, "blimp/accuracy/ellipsis_n_bar_1": 0.838, "blimp/accuracy/tough_vs_raising_2": 0.86, "blimp/accuracy/tough_vs_raising_1": 0.608, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.905, "blimp/accuracy/principle_A_reconstruction": 0.417, "blimp/accuracy/wh_vs_that_with_gap": 0.515, "blimp/accuracy/principle_A_domain_2": 0.818, "blimp/accuracy/determiner_noun_agreement_1": 0.985, "blimp/accuracy/ellipsis_n_bar_2": 0.886, "blimp/accuracy/principle_A_domain_3": 0.589, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.912, "blimp/accuracy/animate_subject_trans": 0.905, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.887, "blimp/accuracy/distractor_agreement_relative_clause": 0.709, "blimp/accuracy/transitive": 0.852, "blimp/accuracy/sentential_subject_island": 0.42, "blimp/accuracy/adjunct_island": 0.81, "blimp/accuracy/intransitive": 0.777, "blimp/accuracy/existential_there_subject_raising": 0.841, "blimp/accuracy/irregular_past_participle_adjectives": 0.831, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.228, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.243, "blimp/accuracy/only_npi_scope": 0.65, "blimp/accuracy/superlative_quantifiers_2": 0.849, "blimp/accuracy/passive_1": 0.9, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.887, "blimp/accuracy/inchoative": 0.603, "blimp/accuracy/anaphor_gender_agreement": 0.97, "blimp/accuracy/principle_A_c_command": 0.617, "blimp/accuracy/only_npi_licensor_present": 0.564, "blimp/accuracy/expletive_it_object_raising": 0.794, "blimp/accuracy/left_branch_island_simple_question": 0.276, "blimp/accuracy/wh_questions_subject_gap": 0.906, "blimp/accuracy/existential_there_quantifiers_2": 0.242, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.922, "blimp/accuracy/sentential_negation_npi_scope": 0.568, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.827, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.878, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.866, "blimp/accuracy/principle_A_case_2": 0.954, "blimp/accuracy/distractor_agreement_relational_noun": 0.798, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.989, "blimp/accuracy/superlative_quantifiers_1": 0.751, "blimp/accuracy/wh_island": 0.819, "blimp/accuracy/principle_A_domain_1": 0.974, "blimp/accuracy/complex_NP_island": 0.591, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.968, "blimp/accuracy/irregular_past_participle_verbs": 0.875, "blimp/accuracy/drop_argument": 0.763, "blimp/accuracy/wh_questions_object_gap": 0.796, "blimp/accuracy/animate_subject_passive": 0.78, "blimp/accuracy/existential_there_quantifiers_1": 0.957, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.882, "blimp/accuracy/npi_present_2": 0.54, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.935, "blimp/accuracy/anaphor_number_agreement": 0.98, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.954, "blimp/accuracy/existential_there_object_raising": 0.831, "blimp/accuracy/matrix_question_npi_licensor_present": 0.183, "blimp/accuracy/npi_present_1": 0.508, "blimp/accuracy/wh_vs_that_no_gap": 0.98, "blimp/accuracy/left_branch_island_echo_question": 0.324, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.974, "blimp/accuracy/causative": 0.698, "blimp/accuracy/group_average": 0.7590149253731341, "blimp/accuracy/seq_average": 0.7590149253731343, "cbt/accuracy/NE": 0.7580128205128205, "cbt/accuracy/V": 0.9024, "cbt/accuracy/CN": 0.8064, "cbt/accuracy/P": 0.8796, "cbt/accuracy/group_average": 0.8366032051282051, "cbt/accuracy/seq_average": 0.8366346538615446, "hellaswag/accuracy/val": 0.28818960366460866, "hellaswag/accuracy/group_average": 0.28818960366460866, "hellaswag/accuracy/seq_average": 0.28818960366460866, "piqa/accuracy/val": 0.5826985854189336, "piqa/accuracy/group_average": 0.5826985854189336, "piqa/accuracy/seq_average": 0.5826985854189336, "ai2arc/accuracy/ARC-Easy": 0.32600422832980974, "ai2arc/accuracy/ARC-Challenge": 0.21716738197424892, "ai2arc/accuracy/group_average": 0.27158580515202935, "ai2arc/accuracy/seq_average": 0.29008498583569403, "race/accuracy/test/high": 0.26186392224128074, "race/accuracy/test/middle": 0.33913649025069637, "race/accuracy/group_average": 0.30050020624598855, "race/accuracy/seq_average": 0.2843534657478719, "siqa/accuracy/dev": 0.34851586489252817, "siqa/accuracy/group_average": 0.34851586489252817, "siqa/accuracy/seq_average": 0.34851586489252817, "commonsenseqa/accuracy/dev_rand_split": 0.23996723996723995, "commonsenseqa/accuracy/group_average": 0.23996723996723995, "commonsenseqa/accuracy/seq_average": 0.23996723996723995}
Pretrain_language_model/save/slimpajama_xmoe_no_attmoe_154M_standard_lb/export/result-model-90000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.645333668542287, "val/accuracy": 0.4747556656125992, "val/perplexity": 14.088145069790919, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.625901500630823, "lambada/accuracy/total": 0.23680124223602483, "lambada/accuracy/openai_last_token": 0.7536878881987578, "lambada/perplexity": 12.904226120983314, "lambada/lm_loss": 3.200719139191069, "lambada/lm_perplexity": 24.55017884618719, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.355778453924312, "mean_loss": 2.635617584586555, "blimp/accuracy/passive_2": 0.908, "blimp/accuracy/determiner_noun_agreement_2": 0.984, "blimp/accuracy/ellipsis_n_bar_1": 0.822, "blimp/accuracy/tough_vs_raising_2": 0.858, "blimp/accuracy/tough_vs_raising_1": 0.619, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.899, "blimp/accuracy/principle_A_reconstruction": 0.409, "blimp/accuracy/wh_vs_that_with_gap": 0.495, "blimp/accuracy/principle_A_domain_2": 0.8, "blimp/accuracy/determiner_noun_agreement_1": 0.984, "blimp/accuracy/ellipsis_n_bar_2": 0.895, "blimp/accuracy/principle_A_domain_3": 0.567, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.905, "blimp/accuracy/animate_subject_trans": 0.894, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.891, "blimp/accuracy/distractor_agreement_relative_clause": 0.708, "blimp/accuracy/transitive": 0.86, "blimp/accuracy/sentential_subject_island": 0.443, "blimp/accuracy/adjunct_island": 0.801, "blimp/accuracy/intransitive": 0.792, "blimp/accuracy/existential_there_subject_raising": 0.865, "blimp/accuracy/irregular_past_participle_adjectives": 0.883, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.242, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.235, "blimp/accuracy/only_npi_scope": 0.617, "blimp/accuracy/superlative_quantifiers_2": 0.8, "blimp/accuracy/passive_1": 0.889, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.895, "blimp/accuracy/inchoative": 0.61, "blimp/accuracy/anaphor_gender_agreement": 0.969, "blimp/accuracy/principle_A_c_command": 0.618, "blimp/accuracy/only_npi_licensor_present": 0.526, "blimp/accuracy/expletive_it_object_raising": 0.796, "blimp/accuracy/left_branch_island_simple_question": 0.283, "blimp/accuracy/wh_questions_subject_gap": 0.919, "blimp/accuracy/existential_there_quantifiers_2": 0.376, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.928, "blimp/accuracy/sentential_negation_npi_scope": 0.543, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.836, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.848, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.893, "blimp/accuracy/principle_A_case_2": 0.964, "blimp/accuracy/distractor_agreement_relational_noun": 0.827, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.992, "blimp/accuracy/superlative_quantifiers_1": 0.812, "blimp/accuracy/wh_island": 0.788, "blimp/accuracy/principle_A_domain_1": 0.977, "blimp/accuracy/complex_NP_island": 0.575, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.961, "blimp/accuracy/irregular_past_participle_verbs": 0.906, "blimp/accuracy/drop_argument": 0.762, "blimp/accuracy/wh_questions_object_gap": 0.802, "blimp/accuracy/animate_subject_passive": 0.791, "blimp/accuracy/existential_there_quantifiers_1": 0.975, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.899, "blimp/accuracy/npi_present_2": 0.559, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.928, "blimp/accuracy/anaphor_number_agreement": 0.987, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.957, "blimp/accuracy/existential_there_object_raising": 0.844, "blimp/accuracy/matrix_question_npi_licensor_present": 0.198, "blimp/accuracy/npi_present_1": 0.557, "blimp/accuracy/wh_vs_that_no_gap": 0.98, "blimp/accuracy/left_branch_island_echo_question": 0.363, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.977, "blimp/accuracy/causative": 0.708, "blimp/accuracy/group_average": 0.7640895522388056, "blimp/accuracy/seq_average": 0.7640895522388059, "cbt/accuracy/NE": 0.750801282051282, "cbt/accuracy/V": 0.9084, "cbt/accuracy/CN": 0.8084, "cbt/accuracy/P": 0.8844, "cbt/accuracy/group_average": 0.8380003205128205, "cbt/accuracy/seq_average": 0.8380352140856343, "hellaswag/accuracy/val": 0.29117705636327423, "hellaswag/accuracy/group_average": 0.29117705636327423, "hellaswag/accuracy/seq_average": 0.29117705636327423, "piqa/accuracy/val": 0.5919477693144722, "piqa/accuracy/group_average": 0.5919477693144722, "piqa/accuracy/seq_average": 0.5919477693144722, "ai2arc/accuracy/ARC-Easy": 0.32558139534883723, "ai2arc/accuracy/ARC-Challenge": 0.21630901287553647, "ai2arc/accuracy/group_average": 0.27094520411218687, "ai2arc/accuracy/seq_average": 0.2895184135977337, "race/accuracy/test/high": 0.26500857632933106, "race/accuracy/test/middle": 0.3307799442896936, "race/accuracy/group_average": 0.2978942603095123, "race/accuracy/seq_average": 0.28415079043372515, "siqa/accuracy/dev": 0.35516888433981575, "siqa/accuracy/group_average": 0.35516888433981575, "siqa/accuracy/seq_average": 0.35516888433981575, "commonsenseqa/accuracy/dev_rand_split": 0.24078624078624078, "commonsenseqa/accuracy/group_average": 0.24078624078624078, "commonsenseqa/accuracy/seq_average": 0.24078624078624078}