DavidNguyen commited on
Commit
dbf0e1e
·
verified ·
1 Parent(s): bd5a8d5

73bebc12bb080ae84c7647473cae93e6ced99d3dbd319ef725f99f5e449c99fd

Browse files
Pretrain_language_model/save/slimpajama_moe_no_attmoe_154M_standard_lb_tcmoe/export/result-model-10000.pth.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "val/loss": 3.01871575249566,
3
+ "val/accuracy": 0.4236605205233135,
4
+ "val/perplexity": 20.464992684813527,
5
+ "val/time_since_best_loss": 0,
6
+ "val/time_since_best_accuracy": 0,
7
+ "lambada/loss": 2.7049215565557065,
8
+ "lambada/accuracy/total": 0.1653726708074534,
9
+ "lambada/accuracy/openai_last_token": 0.720108695652174,
10
+ "lambada/perplexity": 23.920566937013117,
11
+ "lambada/lm_loss": 3.554632107058449,
12
+ "lambada/lm_perplexity": 34.974950563113914,
13
+ "lambada/time_since_best_loss": 0,
14
+ "lambada/time_since_best_accuracy": 0,
15
+ "mean_accuracy": 0.29451659566538346,
16
+ "mean_loss": 2.861818654525683,
17
+ "blimp/accuracy/passive_2": 0.882,
18
+ "blimp/accuracy/determiner_noun_agreement_2": 0.964,
19
+ "blimp/accuracy/ellipsis_n_bar_1": 0.706,
20
+ "blimp/accuracy/tough_vs_raising_2": 0.836,
21
+ "blimp/accuracy/tough_vs_raising_1": 0.504,
22
+ "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.896,
23
+ "blimp/accuracy/principle_A_reconstruction": 0.461,
24
+ "blimp/accuracy/wh_vs_that_with_gap": 0.39,
25
+ "blimp/accuracy/principle_A_domain_2": 0.732,
26
+ "blimp/accuracy/determiner_noun_agreement_1": 0.969,
27
+ "blimp/accuracy/ellipsis_n_bar_2": 0.872,
28
+ "blimp/accuracy/principle_A_domain_3": 0.537,
29
+ "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.844,
30
+ "blimp/accuracy/animate_subject_trans": 0.876,
31
+ "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.743,
32
+ "blimp/accuracy/distractor_agreement_relative_clause": 0.406,
33
+ "blimp/accuracy/transitive": 0.817,
34
+ "blimp/accuracy/sentential_subject_island": 0.407,
35
+ "blimp/accuracy/adjunct_island": 0.709,
36
+ "blimp/accuracy/intransitive": 0.729,
37
+ "blimp/accuracy/existential_there_subject_raising": 0.813,
38
+ "blimp/accuracy/irregular_past_participle_adjectives": 0.968,
39
+ "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.206,
40
+ "blimp/accuracy/principle_A_case_1": 1.0,
41
+ "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.145,
42
+ "blimp/accuracy/only_npi_scope": 0.64,
43
+ "blimp/accuracy/superlative_quantifiers_2": 0.636,
44
+ "blimp/accuracy/passive_1": 0.89,
45
+ "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.886,
46
+ "blimp/accuracy/inchoative": 0.584,
47
+ "blimp/accuracy/anaphor_gender_agreement": 0.938,
48
+ "blimp/accuracy/principle_A_c_command": 0.549,
49
+ "blimp/accuracy/only_npi_licensor_present": 0.555,
50
+ "blimp/accuracy/expletive_it_object_raising": 0.767,
51
+ "blimp/accuracy/left_branch_island_simple_question": 0.205,
52
+ "blimp/accuracy/wh_questions_subject_gap": 0.862,
53
+ "blimp/accuracy/existential_there_quantifiers_2": 0.353,
54
+ "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.886,
55
+ "blimp/accuracy/sentential_negation_npi_scope": 0.449,
56
+ "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.763,
57
+ "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.816,
58
+ "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.817,
59
+ "blimp/accuracy/principle_A_case_2": 0.945,
60
+ "blimp/accuracy/distractor_agreement_relational_noun": 0.717,
61
+ "blimp/accuracy/sentential_negation_npi_licensor_present": 0.985,
62
+ "blimp/accuracy/superlative_quantifiers_1": 0.678,
63
+ "blimp/accuracy/wh_island": 0.763,
64
+ "blimp/accuracy/principle_A_domain_1": 0.969,
65
+ "blimp/accuracy/complex_NP_island": 0.527,
66
+ "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.928,
67
+ "blimp/accuracy/irregular_past_participle_verbs": 0.851,
68
+ "blimp/accuracy/drop_argument": 0.73,
69
+ "blimp/accuracy/wh_questions_object_gap": 0.707,
70
+ "blimp/accuracy/animate_subject_passive": 0.741,
71
+ "blimp/accuracy/existential_there_quantifiers_1": 0.987,
72
+ "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.855,
73
+ "blimp/accuracy/npi_present_2": 0.602,
74
+ "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.812,
75
+ "blimp/accuracy/anaphor_number_agreement": 0.978,
76
+ "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.911,
77
+ "blimp/accuracy/existential_there_object_raising": 0.765,
78
+ "blimp/accuracy/matrix_question_npi_licensor_present": 0.165,
79
+ "blimp/accuracy/npi_present_1": 0.633,
80
+ "blimp/accuracy/wh_vs_that_no_gap": 0.94,
81
+ "blimp/accuracy/left_branch_island_echo_question": 0.301,
82
+ "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.961,
83
+ "blimp/accuracy/causative": 0.654,
84
+ "blimp/accuracy/group_average": 0.7181044776119401,
85
+ "blimp/accuracy/seq_average": 0.7181044776119403,
86
+ "cbt/accuracy/NE": 0.6838942307692307,
87
+ "cbt/accuracy/V": 0.8648,
88
+ "cbt/accuracy/CN": 0.7356,
89
+ "cbt/accuracy/P": 0.8372,
90
+ "cbt/accuracy/group_average": 0.7803735576923077,
91
+ "cbt/accuracy/seq_average": 0.7804121648659463,
92
+ "hellaswag/accuracy/val": 0.2727544313881697,
93
+ "hellaswag/accuracy/group_average": 0.2727544313881697,
94
+ "hellaswag/accuracy/seq_average": 0.2727544313881697,
95
+ "piqa/accuracy/val": 0.5571273122959739,
96
+ "piqa/accuracy/group_average": 0.5571273122959739,
97
+ "piqa/accuracy/seq_average": 0.5571273122959739,
98
+ "ai2arc/accuracy/ARC-Easy": 0.3090909090909091,
99
+ "ai2arc/accuracy/ARC-Challenge": 0.20429184549356222,
100
+ "ai2arc/accuracy/group_average": 0.2566913772922357,
101
+ "ai2arc/accuracy/seq_average": 0.2745042492917847,
102
+ "race/accuracy/test/high": 0.25128644939965694,
103
+ "race/accuracy/test/middle": 0.32103064066852366,
104
+ "race/accuracy/group_average": 0.28615854503409033,
105
+ "race/accuracy/seq_average": 0.2715849209566275,
106
+ "siqa/accuracy/dev": 0.3587512794268168,
107
+ "siqa/accuracy/group_average": 0.3587512794268168,
108
+ "siqa/accuracy/seq_average": 0.3587512794268168,
109
+ "commonsenseqa/accuracy/dev_rand_split": 0.23177723177723178,
110
+ "commonsenseqa/accuracy/group_average": 0.23177723177723178,
111
+ "commonsenseqa/accuracy/seq_average": 0.23177723177723178
112
+ }
Pretrain_language_model/save/slimpajama_moe_no_attmoe_154M_standard_lb_tcmoe/export/result-model-100000.pth.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "val/loss": 2.603516593812004,
3
+ "val/accuracy": 0.48022557818700395,
4
+ "val/perplexity": 13.51116787973311,
5
+ "val/time_since_best_loss": 0,
6
+ "val/time_since_best_accuracy": 0,
7
+ "lambada/loss": 2.669328582953222,
8
+ "lambada/accuracy/total": 0.25601708074534163,
9
+ "lambada/accuracy/openai_last_token": 0.7635869565217391,
10
+ "lambada/perplexity": 11.557441177211011,
11
+ "lambada/lm_loss": 3.1691284005766103,
12
+ "lambada/lm_perplexity": 23.78674280726548,
13
+ "lambada/time_since_best_loss": 0,
14
+ "lambada/time_since_best_accuracy": 0,
15
+ "mean_accuracy": 0.3681213294661728,
16
+ "mean_loss": 2.636422588382613,
17
+ "blimp/accuracy/passive_2": 0.904,
18
+ "blimp/accuracy/determiner_noun_agreement_2": 0.972,
19
+ "blimp/accuracy/ellipsis_n_bar_1": 0.794,
20
+ "blimp/accuracy/tough_vs_raising_2": 0.858,
21
+ "blimp/accuracy/tough_vs_raising_1": 0.567,
22
+ "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.905,
23
+ "blimp/accuracy/principle_A_reconstruction": 0.353,
24
+ "blimp/accuracy/wh_vs_that_with_gap": 0.512,
25
+ "blimp/accuracy/principle_A_domain_2": 0.849,
26
+ "blimp/accuracy/determiner_noun_agreement_1": 0.986,
27
+ "blimp/accuracy/ellipsis_n_bar_2": 0.903,
28
+ "blimp/accuracy/principle_A_domain_3": 0.603,
29
+ "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.907,
30
+ "blimp/accuracy/animate_subject_trans": 0.893,
31
+ "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.884,
32
+ "blimp/accuracy/distractor_agreement_relative_clause": 0.544,
33
+ "blimp/accuracy/transitive": 0.876,
34
+ "blimp/accuracy/sentential_subject_island": 0.285,
35
+ "blimp/accuracy/adjunct_island": 0.78,
36
+ "blimp/accuracy/intransitive": 0.759,
37
+ "blimp/accuracy/existential_there_subject_raising": 0.88,
38
+ "blimp/accuracy/irregular_past_participle_adjectives": 0.978,
39
+ "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.49,
40
+ "blimp/accuracy/principle_A_case_1": 1.0,
41
+ "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.202,
42
+ "blimp/accuracy/only_npi_scope": 0.703,
43
+ "blimp/accuracy/superlative_quantifiers_2": 0.812,
44
+ "blimp/accuracy/passive_1": 0.888,
45
+ "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.885,
46
+ "blimp/accuracy/inchoative": 0.626,
47
+ "blimp/accuracy/anaphor_gender_agreement": 0.95,
48
+ "blimp/accuracy/principle_A_c_command": 0.603,
49
+ "blimp/accuracy/only_npi_licensor_present": 0.581,
50
+ "blimp/accuracy/expletive_it_object_raising": 0.801,
51
+ "blimp/accuracy/left_branch_island_simple_question": 0.556,
52
+ "blimp/accuracy/wh_questions_subject_gap": 0.909,
53
+ "blimp/accuracy/existential_there_quantifiers_2": 0.388,
54
+ "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.947,
55
+ "blimp/accuracy/sentential_negation_npi_scope": 0.664,
56
+ "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.828,
57
+ "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.844,
58
+ "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.894,
59
+ "blimp/accuracy/principle_A_case_2": 0.963,
60
+ "blimp/accuracy/distractor_agreement_relational_noun": 0.743,
61
+ "blimp/accuracy/sentential_negation_npi_licensor_present": 0.985,
62
+ "blimp/accuracy/superlative_quantifiers_1": 0.68,
63
+ "blimp/accuracy/wh_island": 0.723,
64
+ "blimp/accuracy/principle_A_domain_1": 0.977,
65
+ "blimp/accuracy/complex_NP_island": 0.529,
66
+ "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.972,
67
+ "blimp/accuracy/irregular_past_participle_verbs": 0.912,
68
+ "blimp/accuracy/drop_argument": 0.732,
69
+ "blimp/accuracy/wh_questions_object_gap": 0.784,
70
+ "blimp/accuracy/animate_subject_passive": 0.794,
71
+ "blimp/accuracy/existential_there_quantifiers_1": 0.977,
72
+ "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.877,
73
+ "blimp/accuracy/npi_present_2": 0.589,
74
+ "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.93,
75
+ "blimp/accuracy/anaphor_number_agreement": 0.986,
76
+ "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.957,
77
+ "blimp/accuracy/existential_there_object_raising": 0.839,
78
+ "blimp/accuracy/matrix_question_npi_licensor_present": 0.263,
79
+ "blimp/accuracy/npi_present_1": 0.622,
80
+ "blimp/accuracy/wh_vs_that_no_gap": 0.975,
81
+ "blimp/accuracy/left_branch_island_echo_question": 0.464,
82
+ "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.976,
83
+ "blimp/accuracy/causative": 0.717,
84
+ "blimp/accuracy/group_average": 0.7690895522388058,
85
+ "blimp/accuracy/seq_average": 0.7690895522388059,
86
+ "cbt/accuracy/NE": 0.7636217948717948,
87
+ "cbt/accuracy/V": 0.9096,
88
+ "cbt/accuracy/CN": 0.8248,
89
+ "cbt/accuracy/P": 0.8892,
90
+ "cbt/accuracy/group_average": 0.8468054487179486,
91
+ "cbt/accuracy/seq_average": 0.8468387354941976,
92
+ "hellaswag/accuracy/val": 0.29267078271260705,
93
+ "hellaswag/accuracy/group_average": 0.29267078271260705,
94
+ "hellaswag/accuracy/seq_average": 0.29267078271260705,
95
+ "piqa/accuracy/val": 0.5903155603917302,
96
+ "piqa/accuracy/group_average": 0.5903155603917302,
97
+ "piqa/accuracy/seq_average": 0.5903155603917302,
98
+ "ai2arc/accuracy/ARC-Easy": 0.3302325581395349,
99
+ "ai2arc/accuracy/ARC-Challenge": 0.20257510729613734,
100
+ "ai2arc/accuracy/group_average": 0.26640383271783613,
101
+ "ai2arc/accuracy/seq_average": 0.28810198300283285,
102
+ "race/accuracy/test/high": 0.27072612921669525,
103
+ "race/accuracy/test/middle": 0.34192200557103064,
104
+ "race/accuracy/group_average": 0.306324067393863,
105
+ "race/accuracy/seq_average": 0.2914471017430077,
106
+ "siqa/accuracy/dev": 0.3602865916069601,
107
+ "siqa/accuracy/group_average": 0.3602865916069601,
108
+ "siqa/accuracy/seq_average": 0.3602865916069601,
109
+ "commonsenseqa/accuracy/dev_rand_split": 0.26371826371826373,
110
+ "commonsenseqa/accuracy/group_average": 0.26371826371826373,
111
+ "commonsenseqa/accuracy/seq_average": 0.26371826371826373
112
+ }
Pretrain_language_model/save/slimpajama_moe_no_attmoe_154M_standard_lb_tcmoe/export/result-model-20000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.877354939778646, "val/accuracy": 0.4417182074652778, "val/perplexity": 17.76721561673222, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.687665856402853, "lambada/accuracy/total": 0.18303571428571427, "lambada/accuracy/openai_last_token": 0.7333074534161491, "lambada/perplexity": 18.255754615329064, "lambada/lm_loss": 3.405477641680216, "lambada/lm_perplexity": 30.128683002434276, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.31237696087549605, "mean_loss": 2.7825103980907495, "blimp/accuracy/passive_2": 0.87, "blimp/accuracy/determiner_noun_agreement_2": 0.947, "blimp/accuracy/ellipsis_n_bar_1": 0.765, "blimp/accuracy/tough_vs_raising_2": 0.857, "blimp/accuracy/tough_vs_raising_1": 0.497, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.87, "blimp/accuracy/principle_A_reconstruction": 0.47, "blimp/accuracy/wh_vs_that_with_gap": 0.428, "blimp/accuracy/principle_A_domain_2": 0.816, "blimp/accuracy/determiner_noun_agreement_1": 0.976, "blimp/accuracy/ellipsis_n_bar_2": 0.867, "blimp/accuracy/principle_A_domain_3": 0.577, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.866, "blimp/accuracy/animate_subject_trans": 0.877, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.83, "blimp/accuracy/distractor_agreement_relative_clause": 0.475, "blimp/accuracy/transitive": 0.845, "blimp/accuracy/sentential_subject_island": 0.458, "blimp/accuracy/adjunct_island": 0.686, "blimp/accuracy/intransitive": 0.694, "blimp/accuracy/existential_there_subject_raising": 0.851, "blimp/accuracy/irregular_past_participle_adjectives": 0.903, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.404, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.191, "blimp/accuracy/only_npi_scope": 0.641, "blimp/accuracy/superlative_quantifiers_2": 0.873, "blimp/accuracy/passive_1": 0.873, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.897, "blimp/accuracy/inchoative": 0.554, "blimp/accuracy/anaphor_gender_agreement": 0.862, "blimp/accuracy/principle_A_c_command": 0.572, "blimp/accuracy/only_npi_licensor_present": 0.471, "blimp/accuracy/expletive_it_object_raising": 0.78, "blimp/accuracy/left_branch_island_simple_question": 0.476, "blimp/accuracy/wh_questions_subject_gap": 0.895, "blimp/accuracy/existential_there_quantifiers_2": 0.502, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.885, "blimp/accuracy/sentential_negation_npi_scope": 0.467, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.789, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.849, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.863, "blimp/accuracy/principle_A_case_2": 0.965, "blimp/accuracy/distractor_agreement_relational_noun": 0.765, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.986, "blimp/accuracy/superlative_quantifiers_1": 0.631, "blimp/accuracy/wh_island": 0.828, "blimp/accuracy/principle_A_domain_1": 0.977, "blimp/accuracy/complex_NP_island": 0.492, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.925, "blimp/accuracy/irregular_past_participle_verbs": 0.858, "blimp/accuracy/drop_argument": 0.689, "blimp/accuracy/wh_questions_object_gap": 0.76, "blimp/accuracy/animate_subject_passive": 0.769, "blimp/accuracy/existential_there_quantifiers_1": 0.978, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.845, "blimp/accuracy/npi_present_2": 0.619, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.896, "blimp/accuracy/anaphor_number_agreement": 0.972, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.934, "blimp/accuracy/existential_there_object_raising": 0.76, "blimp/accuracy/matrix_question_npi_licensor_present": 0.201, "blimp/accuracy/npi_present_1": 0.58, "blimp/accuracy/wh_vs_that_no_gap": 0.947, "blimp/accuracy/left_branch_island_echo_question": 0.366, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.973, "blimp/accuracy/causative": 0.677, "blimp/accuracy/group_average": 0.7412238805970146, "blimp/accuracy/seq_average": 0.7412238805970149, "cbt/accuracy/NE": 0.7059294871794872, "cbt/accuracy/V": 0.8772, "cbt/accuracy/CN": 0.7644, "cbt/accuracy/P": 0.864, "cbt/accuracy/group_average": 0.8028823717948718, "cbt/accuracy/seq_average": 0.8029211684673869, "hellaswag/accuracy/val": 0.2758414658434575, "hellaswag/accuracy/group_average": 0.2758414658434575, "hellaswag/accuracy/seq_average": 0.2758414658434575, "piqa/accuracy/val": 0.5767138193688792, "piqa/accuracy/group_average": 0.5767138193688792, "piqa/accuracy/seq_average": 0.5767138193688792, "ai2arc/accuracy/ARC-Easy": 0.3099365750528541, "ai2arc/accuracy/ARC-Challenge": 0.20772532188841203, "ai2arc/accuracy/group_average": 0.25883094847063304, "ai2arc/accuracy/seq_average": 0.2762039660056657, "race/accuracy/test/high": 0.2552887364208119, "race/accuracy/test/middle": 0.32590529247910865, "race/accuracy/group_average": 0.2905970144499603, "race/accuracy/seq_average": 0.275841102553709, "siqa/accuracy/dev": 0.3694984646878199, "siqa/accuracy/group_average": 0.3694984646878199, "siqa/accuracy/seq_average": 0.3694984646878199, "commonsenseqa/accuracy/dev_rand_split": 0.2457002457002457, "commonsenseqa/accuracy/group_average": 0.2457002457002457, "commonsenseqa/accuracy/seq_average": 0.2457002457002457}
Pretrain_language_model/save/slimpajama_moe_no_attmoe_154M_standard_lb_tcmoe/export/result-model-30000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.8010045611669145, "val/accuracy": 0.4515448676215278, "val/perplexity": 16.461174724927293, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.6126781013441382, "lambada/accuracy/total": 0.19934006211180125, "lambada/accuracy/openai_last_token": 0.7377717391304348, "lambada/perplexity": 15.94638679663136, "lambada/lm_loss": 3.3561931141990313, "lambada/lm_perplexity": 28.679802064734133, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.3254424648666645, "mean_loss": 2.7068413312555264, "blimp/accuracy/passive_2": 0.873, "blimp/accuracy/determiner_noun_agreement_2": 0.966, "blimp/accuracy/ellipsis_n_bar_1": 0.741, "blimp/accuracy/tough_vs_raising_2": 0.845, "blimp/accuracy/tough_vs_raising_1": 0.582, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.897, "blimp/accuracy/principle_A_reconstruction": 0.471, "blimp/accuracy/wh_vs_that_with_gap": 0.524, "blimp/accuracy/principle_A_domain_2": 0.797, "blimp/accuracy/determiner_noun_agreement_1": 0.975, "blimp/accuracy/ellipsis_n_bar_2": 0.847, "blimp/accuracy/principle_A_domain_3": 0.546, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.874, "blimp/accuracy/animate_subject_trans": 0.876, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.825, "blimp/accuracy/distractor_agreement_relative_clause": 0.487, "blimp/accuracy/transitive": 0.857, "blimp/accuracy/sentential_subject_island": 0.388, "blimp/accuracy/adjunct_island": 0.71, "blimp/accuracy/intransitive": 0.746, "blimp/accuracy/existential_there_subject_raising": 0.86, "blimp/accuracy/irregular_past_participle_adjectives": 0.978, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.346, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.209, "blimp/accuracy/only_npi_scope": 0.765, "blimp/accuracy/superlative_quantifiers_2": 0.585, "blimp/accuracy/passive_1": 0.873, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.873, "blimp/accuracy/inchoative": 0.62, "blimp/accuracy/anaphor_gender_agreement": 0.952, "blimp/accuracy/principle_A_c_command": 0.567, "blimp/accuracy/only_npi_licensor_present": 0.541, "blimp/accuracy/expletive_it_object_raising": 0.798, "blimp/accuracy/left_branch_island_simple_question": 0.363, "blimp/accuracy/wh_questions_subject_gap": 0.851, "blimp/accuracy/existential_there_quantifiers_2": 0.296, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.904, "blimp/accuracy/sentential_negation_npi_scope": 0.644, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.811, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.839, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.849, "blimp/accuracy/principle_A_case_2": 0.95, "blimp/accuracy/distractor_agreement_relational_noun": 0.715, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.986, "blimp/accuracy/superlative_quantifiers_1": 0.718, "blimp/accuracy/wh_island": 0.709, "blimp/accuracy/principle_A_domain_1": 0.944, "blimp/accuracy/complex_NP_island": 0.497, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.923, "blimp/accuracy/irregular_past_participle_verbs": 0.881, "blimp/accuracy/drop_argument": 0.763, "blimp/accuracy/wh_questions_object_gap": 0.636, "blimp/accuracy/animate_subject_passive": 0.787, "blimp/accuracy/existential_there_quantifiers_1": 0.973, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.859, "blimp/accuracy/npi_present_2": 0.586, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.891, "blimp/accuracy/anaphor_number_agreement": 0.979, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.922, "blimp/accuracy/existential_there_object_raising": 0.796, "blimp/accuracy/matrix_question_npi_licensor_present": 0.255, "blimp/accuracy/npi_present_1": 0.569, "blimp/accuracy/wh_vs_that_no_gap": 0.923, "blimp/accuracy/left_branch_island_echo_question": 0.325, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.957, "blimp/accuracy/causative": 0.714, "blimp/accuracy/group_average": 0.7404328358208955, "blimp/accuracy/seq_average": 0.7404328358208955, "cbt/accuracy/NE": 0.7307692307692307, "cbt/accuracy/V": 0.886, "cbt/accuracy/CN": 0.7864, "cbt/accuracy/P": 0.8704, "cbt/accuracy/group_average": 0.8183923076923076, "cbt/accuracy/seq_average": 0.8184273709483794, "hellaswag/accuracy/val": 0.28231428002389963, "hellaswag/accuracy/group_average": 0.28231428002389963, "hellaswag/accuracy/seq_average": 0.28231428002389963, "piqa/accuracy/val": 0.5788900979325353, "piqa/accuracy/group_average": 0.5788900979325353, "piqa/accuracy/seq_average": 0.5788900979325353, "ai2arc/accuracy/ARC-Easy": 0.32600422832980974, "ai2arc/accuracy/ARC-Challenge": 0.20772532188841203, "ai2arc/accuracy/group_average": 0.2668647751091109, "ai2arc/accuracy/seq_average": 0.2869688385269122, "race/accuracy/test/high": 0.259576901086335, "race/accuracy/test/middle": 0.32729805013927576, "race/accuracy/group_average": 0.2934374756128054, "race/accuracy/seq_average": 0.2792865828942035, "siqa/accuracy/dev": 0.3679631525076766, "siqa/accuracy/group_average": 0.3679631525076766, "siqa/accuracy/seq_average": 0.3679631525076766, "commonsenseqa/accuracy/dev_rand_split": 0.24488124488124488, "commonsenseqa/accuracy/group_average": 0.24488124488124488, "commonsenseqa/accuracy/seq_average": 0.24488124488124488}
Pretrain_language_model/save/slimpajama_moe_no_attmoe_154M_standard_lb_tcmoe/export/result-model-40000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.7482169015066966, "val/accuracy": 0.4594251844618056, "val/perplexity": 15.61476438347703, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.5934531644264363, "lambada/accuracy/total": 0.2080745341614907, "lambada/accuracy/openai_last_token": 0.7422360248447205, "lambada/perplexity": 15.670183870221386, "lambada/lm_loss": 3.2963399277560805, "lambada/lm_perplexity": 27.01358608433046, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.33374985931164813, "mean_loss": 2.6708350329665667, "blimp/accuracy/passive_2": 0.878, "blimp/accuracy/determiner_noun_agreement_2": 0.961, "blimp/accuracy/ellipsis_n_bar_1": 0.789, "blimp/accuracy/tough_vs_raising_2": 0.853, "blimp/accuracy/tough_vs_raising_1": 0.569, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.898, "blimp/accuracy/principle_A_reconstruction": 0.271, "blimp/accuracy/wh_vs_that_with_gap": 0.506, "blimp/accuracy/principle_A_domain_2": 0.805, "blimp/accuracy/determiner_noun_agreement_1": 0.988, "blimp/accuracy/ellipsis_n_bar_2": 0.887, "blimp/accuracy/principle_A_domain_3": 0.584, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.888, "blimp/accuracy/animate_subject_trans": 0.883, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.873, "blimp/accuracy/distractor_agreement_relative_clause": 0.516, "blimp/accuracy/transitive": 0.858, "blimp/accuracy/sentential_subject_island": 0.299, "blimp/accuracy/adjunct_island": 0.77, "blimp/accuracy/intransitive": 0.749, "blimp/accuracy/existential_there_subject_raising": 0.882, "blimp/accuracy/irregular_past_participle_adjectives": 0.913, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.46, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.23, "blimp/accuracy/only_npi_scope": 0.606, "blimp/accuracy/superlative_quantifiers_2": 0.734, "blimp/accuracy/passive_1": 0.876, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.857, "blimp/accuracy/inchoative": 0.607, "blimp/accuracy/anaphor_gender_agreement": 0.951, "blimp/accuracy/principle_A_c_command": 0.59, "blimp/accuracy/only_npi_licensor_present": 0.711, "blimp/accuracy/expletive_it_object_raising": 0.795, "blimp/accuracy/left_branch_island_simple_question": 0.523, "blimp/accuracy/wh_questions_subject_gap": 0.883, "blimp/accuracy/existential_there_quantifiers_2": 0.33, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.922, "blimp/accuracy/sentential_negation_npi_scope": 0.617, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.801, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.848, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.88, "blimp/accuracy/principle_A_case_2": 0.964, "blimp/accuracy/distractor_agreement_relational_noun": 0.707, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.988, "blimp/accuracy/superlative_quantifiers_1": 0.716, "blimp/accuracy/wh_island": 0.693, "blimp/accuracy/principle_A_domain_1": 0.966, "blimp/accuracy/complex_NP_island": 0.544, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.95, "blimp/accuracy/irregular_past_participle_verbs": 0.909, "blimp/accuracy/drop_argument": 0.736, "blimp/accuracy/wh_questions_object_gap": 0.714, "blimp/accuracy/animate_subject_passive": 0.759, "blimp/accuracy/existential_there_quantifiers_1": 0.982, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.857, "blimp/accuracy/npi_present_2": 0.527, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.915, "blimp/accuracy/anaphor_number_agreement": 0.978, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.949, "blimp/accuracy/existential_there_object_raising": 0.788, "blimp/accuracy/matrix_question_npi_licensor_present": 0.253, "blimp/accuracy/npi_present_1": 0.566, "blimp/accuracy/wh_vs_that_no_gap": 0.949, "blimp/accuracy/left_branch_island_echo_question": 0.359, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.973, "blimp/accuracy/causative": 0.686, "blimp/accuracy/group_average": 0.750283582089552, "blimp/accuracy/seq_average": 0.7502835820895523, "cbt/accuracy/NE": 0.7311698717948718, "cbt/accuracy/V": 0.8968, "cbt/accuracy/CN": 0.788, "cbt/accuracy/P": 0.8764, "cbt/accuracy/group_average": 0.8230924679487179, "cbt/accuracy/seq_average": 0.8231292517006803, "hellaswag/accuracy/val": 0.2847042421828321, "hellaswag/accuracy/group_average": 0.2847042421828321, "hellaswag/accuracy/seq_average": 0.2847042421828321, "piqa/accuracy/val": 0.5788900979325353, "piqa/accuracy/group_average": 0.5788900979325353, "piqa/accuracy/seq_average": 0.5788900979325353, "ai2arc/accuracy/ARC-Easy": 0.32642706131078225, "ai2arc/accuracy/ARC-Challenge": 0.19914163090128756, "ai2arc/accuracy/group_average": 0.2627843461060349, "ai2arc/accuracy/seq_average": 0.28441926345609064, "race/accuracy/test/high": 0.2655803316180675, "race/accuracy/test/middle": 0.33774373259052926, "race/accuracy/group_average": 0.30166203210429837, "race/accuracy/seq_average": 0.28658289420348604, "siqa/accuracy/dev": 0.372057318321392, "siqa/accuracy/group_average": 0.372057318321392, "siqa/accuracy/seq_average": 0.372057318321392, "commonsenseqa/accuracy/dev_rand_split": 0.25225225225225223, "commonsenseqa/accuracy/group_average": 0.25225225225225223, "commonsenseqa/accuracy/seq_average": 0.25225225225225223}
Pretrain_language_model/save/slimpajama_moe_no_attmoe_154M_standard_lb_tcmoe/export/result-model-50000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.7065531412760415, "val/accuracy": 0.4652952163938492, "val/perplexity": 14.977560903133552, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.7031426281662463, "lambada/accuracy/total": 0.22593167701863354, "lambada/accuracy/openai_last_token": 0.7534937888198758, "lambada/perplexity": 14.005009783680974, "lambada/lm_loss": 3.280149474513485, "lambada/lm_perplexity": 26.57974539746989, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.3456134467062414, "mean_loss": 2.704847884721144, "blimp/accuracy/passive_2": 0.878, "blimp/accuracy/determiner_noun_agreement_2": 0.971, "blimp/accuracy/ellipsis_n_bar_1": 0.767, "blimp/accuracy/tough_vs_raising_2": 0.884, "blimp/accuracy/tough_vs_raising_1": 0.519, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.901, "blimp/accuracy/principle_A_reconstruction": 0.396, "blimp/accuracy/wh_vs_that_with_gap": 0.502, "blimp/accuracy/principle_A_domain_2": 0.848, "blimp/accuracy/determiner_noun_agreement_1": 0.981, "blimp/accuracy/ellipsis_n_bar_2": 0.895, "blimp/accuracy/principle_A_domain_3": 0.588, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.896, "blimp/accuracy/animate_subject_trans": 0.876, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.849, "blimp/accuracy/distractor_agreement_relative_clause": 0.53, "blimp/accuracy/transitive": 0.852, "blimp/accuracy/sentential_subject_island": 0.366, "blimp/accuracy/adjunct_island": 0.788, "blimp/accuracy/intransitive": 0.745, "blimp/accuracy/existential_there_subject_raising": 0.883, "blimp/accuracy/irregular_past_participle_adjectives": 0.977, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.305, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.19, "blimp/accuracy/only_npi_scope": 0.73, "blimp/accuracy/superlative_quantifiers_2": 0.731, "blimp/accuracy/passive_1": 0.883, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.857, "blimp/accuracy/inchoative": 0.602, "blimp/accuracy/anaphor_gender_agreement": 0.946, "blimp/accuracy/principle_A_c_command": 0.622, "blimp/accuracy/only_npi_licensor_present": 0.612, "blimp/accuracy/expletive_it_object_raising": 0.798, "blimp/accuracy/left_branch_island_simple_question": 0.37, "blimp/accuracy/wh_questions_subject_gap": 0.915, "blimp/accuracy/existential_there_quantifiers_2": 0.42, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.904, "blimp/accuracy/sentential_negation_npi_scope": 0.596, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.827, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.884, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.864, "blimp/accuracy/principle_A_case_2": 0.969, "blimp/accuracy/distractor_agreement_relational_noun": 0.732, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.987, "blimp/accuracy/superlative_quantifiers_1": 0.685, "blimp/accuracy/wh_island": 0.72, "blimp/accuracy/principle_A_domain_1": 0.98, "blimp/accuracy/complex_NP_island": 0.54, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.959, "blimp/accuracy/irregular_past_participle_verbs": 0.901, "blimp/accuracy/drop_argument": 0.736, "blimp/accuracy/wh_questions_object_gap": 0.808, "blimp/accuracy/animate_subject_passive": 0.789, "blimp/accuracy/existential_there_quantifiers_1": 0.994, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.884, "blimp/accuracy/npi_present_2": 0.576, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.931, "blimp/accuracy/anaphor_number_agreement": 0.979, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.94, "blimp/accuracy/existential_there_object_raising": 0.816, "blimp/accuracy/matrix_question_npi_licensor_present": 0.271, "blimp/accuracy/npi_present_1": 0.584, "blimp/accuracy/wh_vs_that_no_gap": 0.964, "blimp/accuracy/left_branch_island_echo_question": 0.407, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.974, "blimp/accuracy/causative": 0.686, "blimp/accuracy/group_average": 0.7576119402985075, "blimp/accuracy/seq_average": 0.7576119402985074, "cbt/accuracy/NE": 0.7375801282051282, "cbt/accuracy/V": 0.8992, "cbt/accuracy/CN": 0.798, "cbt/accuracy/P": 0.8744, "cbt/accuracy/group_average": 0.8272950320512821, "cbt/accuracy/seq_average": 0.8273309323729492, "hellaswag/accuracy/val": 0.28560047799243177, "hellaswag/accuracy/group_average": 0.28560047799243177, "hellaswag/accuracy/seq_average": 0.28560047799243177, "piqa/accuracy/val": 0.5826985854189336, "piqa/accuracy/group_average": 0.5826985854189336, "piqa/accuracy/seq_average": 0.5826985854189336, "ai2arc/accuracy/ARC-Easy": 0.321353065539112, "ai2arc/accuracy/ARC-Challenge": 0.2128755364806867, "ai2arc/accuracy/group_average": 0.26711430100989936, "ai2arc/accuracy/seq_average": 0.2855524079320113, "race/accuracy/test/high": 0.26786735277301316, "race/accuracy/test/middle": 0.3279944289693593, "race/accuracy/group_average": 0.29793089087118624, "race/accuracy/seq_average": 0.2853668423186056, "siqa/accuracy/dev": 0.36284544524053225, "siqa/accuracy/group_average": 0.36284544524053225, "siqa/accuracy/seq_average": 0.36284544524053225, "commonsenseqa/accuracy/dev_rand_split": 0.25307125307125306, "commonsenseqa/accuracy/group_average": 0.25307125307125306, "commonsenseqa/accuracy/seq_average": 0.25307125307125306}
Pretrain_language_model/save/slimpajama_moe_no_attmoe_154M_standard_lb_tcmoe/export/result-model-60000.pth.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "val/loss": 2.671809605189732,
3
+ "val/accuracy": 0.4705403645833333,
4
+ "val/perplexity": 14.466123493335513,
5
+ "val/time_since_best_loss": 0,
6
+ "val/time_since_best_accuracy": 0,
7
+ "lambada/loss": 2.6152883967997864,
8
+ "lambada/accuracy/total": 0.22496118012422361,
9
+ "lambada/accuracy/openai_last_token": 0.7525232919254659,
10
+ "lambada/perplexity": 13.588685271191228,
11
+ "lambada/lm_loss": 3.222259005347056,
12
+ "lambada/lm_perplexity": 25.084722747333092,
13
+ "lambada/time_since_best_loss": 0,
14
+ "lambada/time_since_best_accuracy": 0,
15
+ "mean_accuracy": 0.3477507723537785,
16
+ "mean_loss": 2.643549000994759,
17
+ "blimp/accuracy/passive_2": 0.888,
18
+ "blimp/accuracy/determiner_noun_agreement_2": 0.974,
19
+ "blimp/accuracy/ellipsis_n_bar_1": 0.769,
20
+ "blimp/accuracy/tough_vs_raising_2": 0.861,
21
+ "blimp/accuracy/tough_vs_raising_1": 0.55,
22
+ "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.879,
23
+ "blimp/accuracy/principle_A_reconstruction": 0.335,
24
+ "blimp/accuracy/wh_vs_that_with_gap": 0.491,
25
+ "blimp/accuracy/principle_A_domain_2": 0.831,
26
+ "blimp/accuracy/determiner_noun_agreement_1": 0.989,
27
+ "blimp/accuracy/ellipsis_n_bar_2": 0.893,
28
+ "blimp/accuracy/principle_A_domain_3": 0.579,
29
+ "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.904,
30
+ "blimp/accuracy/animate_subject_trans": 0.884,
31
+ "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.881,
32
+ "blimp/accuracy/distractor_agreement_relative_clause": 0.583,
33
+ "blimp/accuracy/transitive": 0.864,
34
+ "blimp/accuracy/sentential_subject_island": 0.338,
35
+ "blimp/accuracy/adjunct_island": 0.729,
36
+ "blimp/accuracy/intransitive": 0.743,
37
+ "blimp/accuracy/existential_there_subject_raising": 0.888,
38
+ "blimp/accuracy/irregular_past_participle_adjectives": 0.953,
39
+ "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.402,
40
+ "blimp/accuracy/principle_A_case_1": 1.0,
41
+ "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.208,
42
+ "blimp/accuracy/only_npi_scope": 0.719,
43
+ "blimp/accuracy/superlative_quantifiers_2": 0.658,
44
+ "blimp/accuracy/passive_1": 0.876,
45
+ "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.867,
46
+ "blimp/accuracy/inchoative": 0.646,
47
+ "blimp/accuracy/anaphor_gender_agreement": 0.954,
48
+ "blimp/accuracy/principle_A_c_command": 0.609,
49
+ "blimp/accuracy/only_npi_licensor_present": 0.471,
50
+ "blimp/accuracy/expletive_it_object_raising": 0.775,
51
+ "blimp/accuracy/left_branch_island_simple_question": 0.467,
52
+ "blimp/accuracy/wh_questions_subject_gap": 0.918,
53
+ "blimp/accuracy/existential_there_quantifiers_2": 0.366,
54
+ "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.933,
55
+ "blimp/accuracy/sentential_negation_npi_scope": 0.684,
56
+ "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.807,
57
+ "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.857,
58
+ "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.862,
59
+ "blimp/accuracy/principle_A_case_2": 0.966,
60
+ "blimp/accuracy/distractor_agreement_relational_noun": 0.712,
61
+ "blimp/accuracy/sentential_negation_npi_licensor_present": 0.99,
62
+ "blimp/accuracy/superlative_quantifiers_1": 0.691,
63
+ "blimp/accuracy/wh_island": 0.782,
64
+ "blimp/accuracy/principle_A_domain_1": 0.988,
65
+ "blimp/accuracy/complex_NP_island": 0.528,
66
+ "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.953,
67
+ "blimp/accuracy/irregular_past_participle_verbs": 0.902,
68
+ "blimp/accuracy/drop_argument": 0.741,
69
+ "blimp/accuracy/wh_questions_object_gap": 0.757,
70
+ "blimp/accuracy/animate_subject_passive": 0.803,
71
+ "blimp/accuracy/existential_there_quantifiers_1": 0.985,
72
+ "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.878,
73
+ "blimp/accuracy/npi_present_2": 0.603,
74
+ "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.911,
75
+ "blimp/accuracy/anaphor_number_agreement": 0.98,
76
+ "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.948,
77
+ "blimp/accuracy/existential_there_object_raising": 0.819,
78
+ "blimp/accuracy/matrix_question_npi_licensor_present": 0.256,
79
+ "blimp/accuracy/npi_present_1": 0.62,
80
+ "blimp/accuracy/wh_vs_that_no_gap": 0.962,
81
+ "blimp/accuracy/left_branch_island_echo_question": 0.365,
82
+ "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.973,
83
+ "blimp/accuracy/causative": 0.72,
84
+ "blimp/accuracy/group_average": 0.7569850746268658,
85
+ "blimp/accuracy/seq_average": 0.7569850746268657,
86
+ "cbt/accuracy/NE": 0.7487980769230769,
87
+ "cbt/accuracy/V": 0.8972,
88
+ "cbt/accuracy/CN": 0.816,
89
+ "cbt/accuracy/P": 0.8852,
90
+ "cbt/accuracy/group_average": 0.8367995192307691,
91
+ "cbt/accuracy/seq_average": 0.8368347338935574,
92
+ "hellaswag/accuracy/val": 0.29067914758016333,
93
+ "hellaswag/accuracy/group_average": 0.29067914758016333,
94
+ "hellaswag/accuracy/seq_average": 0.29067914758016333,
95
+ "piqa/accuracy/val": 0.5816104461371056,
96
+ "piqa/accuracy/group_average": 0.5816104461371056,
97
+ "piqa/accuracy/seq_average": 0.5816104461371056,
98
+ "ai2arc/accuracy/ARC-Easy": 0.3276955602536998,
99
+ "ai2arc/accuracy/ARC-Challenge": 0.21373390557939914,
100
+ "ai2arc/accuracy/group_average": 0.27071473291654946,
101
+ "ai2arc/accuracy/seq_average": 0.29008498583569403,
102
+ "race/accuracy/test/high": 0.26500857632933106,
103
+ "race/accuracy/test/middle": 0.3293871866295265,
104
+ "race/accuracy/group_average": 0.29719788147942877,
105
+ "race/accuracy/seq_average": 0.2837454398054317,
106
+ "siqa/accuracy/dev": 0.3623336745138178,
107
+ "siqa/accuracy/group_average": 0.3623336745138178,
108
+ "siqa/accuracy/seq_average": 0.3623336745138178,
109
+ "commonsenseqa/accuracy/dev_rand_split": 0.25552825552825553,
110
+ "commonsenseqa/accuracy/group_average": 0.25552825552825553,
111
+ "commonsenseqa/accuracy/seq_average": 0.25552825552825553
112
+ }
Pretrain_language_model/save/slimpajama_moe_no_attmoe_154M_standard_lb_tcmoe/export/result-model-70000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.6450330946180554, "val/accuracy": 0.4744311135912698, "val/perplexity": 14.083911177072899, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.6619717615731755, "lambada/accuracy/total": 0.24495341614906832, "lambada/accuracy/openai_last_token": 0.7575698757763976, "lambada/perplexity": 12.76574531095774, "lambada/lm_loss": 3.211762370304564, "lambada/lm_perplexity": 24.82279465423036, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.3596922648701691, "mean_loss": 2.6535024280956154, "blimp/accuracy/passive_2": 0.898, "blimp/accuracy/determiner_noun_agreement_2": 0.966, "blimp/accuracy/ellipsis_n_bar_1": 0.803, "blimp/accuracy/tough_vs_raising_2": 0.87, "blimp/accuracy/tough_vs_raising_1": 0.577, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.904, "blimp/accuracy/principle_A_reconstruction": 0.301, "blimp/accuracy/wh_vs_that_with_gap": 0.505, "blimp/accuracy/principle_A_domain_2": 0.841, "blimp/accuracy/determiner_noun_agreement_1": 0.986, "blimp/accuracy/ellipsis_n_bar_2": 0.903, "blimp/accuracy/principle_A_domain_3": 0.611, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.893, "blimp/accuracy/animate_subject_trans": 0.889, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.871, "blimp/accuracy/distractor_agreement_relative_clause": 0.579, "blimp/accuracy/transitive": 0.883, "blimp/accuracy/sentential_subject_island": 0.303, "blimp/accuracy/adjunct_island": 0.782, "blimp/accuracy/intransitive": 0.761, "blimp/accuracy/existential_there_subject_raising": 0.874, "blimp/accuracy/irregular_past_participle_adjectives": 0.979, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.472, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.22, "blimp/accuracy/only_npi_scope": 0.79, "blimp/accuracy/superlative_quantifiers_2": 0.759, "blimp/accuracy/passive_1": 0.887, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.859, "blimp/accuracy/inchoative": 0.635, "blimp/accuracy/anaphor_gender_agreement": 0.959, "blimp/accuracy/principle_A_c_command": 0.593, "blimp/accuracy/only_npi_licensor_present": 0.671, "blimp/accuracy/expletive_it_object_raising": 0.799, "blimp/accuracy/left_branch_island_simple_question": 0.54, "blimp/accuracy/wh_questions_subject_gap": 0.926, "blimp/accuracy/existential_there_quantifiers_2": 0.348, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.929, "blimp/accuracy/sentential_negation_npi_scope": 0.657, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.85, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.858, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.892, "blimp/accuracy/principle_A_case_2": 0.959, "blimp/accuracy/distractor_agreement_relational_noun": 0.792, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.988, "blimp/accuracy/superlative_quantifiers_1": 0.736, "blimp/accuracy/wh_island": 0.72, "blimp/accuracy/principle_A_domain_1": 0.975, "blimp/accuracy/complex_NP_island": 0.517, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.955, "blimp/accuracy/irregular_past_participle_verbs": 0.911, "blimp/accuracy/drop_argument": 0.742, "blimp/accuracy/wh_questions_object_gap": 0.772, "blimp/accuracy/animate_subject_passive": 0.784, "blimp/accuracy/existential_there_quantifiers_1": 0.972, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.886, "blimp/accuracy/npi_present_2": 0.589, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.921, "blimp/accuracy/anaphor_number_agreement": 0.98, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.947, "blimp/accuracy/existential_there_object_raising": 0.839, "blimp/accuracy/matrix_question_npi_licensor_present": 0.236, "blimp/accuracy/npi_present_1": 0.603, "blimp/accuracy/wh_vs_that_no_gap": 0.968, "blimp/accuracy/left_branch_island_echo_question": 0.451, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.977, "blimp/accuracy/causative": 0.726, "blimp/accuracy/group_average": 0.7696865671641789, "blimp/accuracy/seq_average": 0.7696865671641792, "cbt/accuracy/NE": 0.7524038461538461, "cbt/accuracy/V": 0.9036, "cbt/accuracy/CN": 0.8124, "cbt/accuracy/P": 0.8828, "cbt/accuracy/group_average": 0.8378009615384615, "cbt/accuracy/seq_average": 0.8378351340536214, "hellaswag/accuracy/val": 0.29396534554869547, "hellaswag/accuracy/group_average": 0.29396534554869547, "hellaswag/accuracy/seq_average": 0.29396534554869547, "piqa/accuracy/val": 0.5875952121871599, "piqa/accuracy/group_average": 0.5875952121871599, "piqa/accuracy/seq_average": 0.5875952121871599, "ai2arc/accuracy/ARC-Easy": 0.3293868921775899, "ai2arc/accuracy/ARC-Challenge": 0.20257510729613734, "ai2arc/accuracy/group_average": 0.2659809997368636, "ai2arc/accuracy/seq_average": 0.28753541076487255, "race/accuracy/test/high": 0.2644368210405946, "race/accuracy/test/middle": 0.334958217270195, "race/accuracy/group_average": 0.2996975191553948, "race/accuracy/seq_average": 0.2849614916903121, "siqa/accuracy/dev": 0.36284544524053225, "siqa/accuracy/group_average": 0.36284544524053225, "siqa/accuracy/seq_average": 0.36284544524053225, "commonsenseqa/accuracy/dev_rand_split": 0.2628992628992629, "commonsenseqa/accuracy/group_average": 0.2628992628992629, "commonsenseqa/accuracy/seq_average": 0.2628992628992629}
Pretrain_language_model/save/slimpajama_moe_no_attmoe_154M_standard_lb_tcmoe/export/result-model-80000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.6233917720734126, "val/accuracy": 0.4771544441344246, "val/perplexity": 13.782391126835273, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.60029289292993, "lambada/accuracy/total": 0.2521350931677019, "lambada/accuracy/openai_last_token": 0.7616459627329193, "lambada/perplexity": 12.105005054481381, "lambada/lm_loss": 3.1827236355788733, "lambada/lm_perplexity": 24.112337418328277, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.3646447686510632, "mean_loss": 2.6118423325016713, "blimp/accuracy/passive_2": 0.908, "blimp/accuracy/determiner_noun_agreement_2": 0.974, "blimp/accuracy/ellipsis_n_bar_1": 0.787, "blimp/accuracy/tough_vs_raising_2": 0.856, "blimp/accuracy/tough_vs_raising_1": 0.584, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.896, "blimp/accuracy/principle_A_reconstruction": 0.35, "blimp/accuracy/wh_vs_that_with_gap": 0.492, "blimp/accuracy/principle_A_domain_2": 0.845, "blimp/accuracy/determiner_noun_agreement_1": 0.993, "blimp/accuracy/ellipsis_n_bar_2": 0.892, "blimp/accuracy/principle_A_domain_3": 0.605, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.897, "blimp/accuracy/animate_subject_trans": 0.883, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.895, "blimp/accuracy/distractor_agreement_relative_clause": 0.583, "blimp/accuracy/transitive": 0.877, "blimp/accuracy/sentential_subject_island": 0.321, "blimp/accuracy/adjunct_island": 0.791, "blimp/accuracy/intransitive": 0.781, "blimp/accuracy/existential_there_subject_raising": 0.875, "blimp/accuracy/irregular_past_participle_adjectives": 0.979, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.409, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.232, "blimp/accuracy/only_npi_scope": 0.681, "blimp/accuracy/superlative_quantifiers_2": 0.797, "blimp/accuracy/passive_1": 0.887, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.878, "blimp/accuracy/inchoative": 0.624, "blimp/accuracy/anaphor_gender_agreement": 0.954, "blimp/accuracy/principle_A_c_command": 0.622, "blimp/accuracy/only_npi_licensor_present": 0.531, "blimp/accuracy/expletive_it_object_raising": 0.785, "blimp/accuracy/left_branch_island_simple_question": 0.485, "blimp/accuracy/wh_questions_subject_gap": 0.924, "blimp/accuracy/existential_there_quantifiers_2": 0.35, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.929, "blimp/accuracy/sentential_negation_npi_scope": 0.68, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.835, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.831, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.891, "blimp/accuracy/principle_A_case_2": 0.962, "blimp/accuracy/distractor_agreement_relational_noun": 0.727, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.987, "blimp/accuracy/superlative_quantifiers_1": 0.792, "blimp/accuracy/wh_island": 0.758, "blimp/accuracy/principle_A_domain_1": 0.967, "blimp/accuracy/complex_NP_island": 0.562, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.963, "blimp/accuracy/irregular_past_participle_verbs": 0.909, "blimp/accuracy/drop_argument": 0.759, "blimp/accuracy/wh_questions_object_gap": 0.783, "blimp/accuracy/animate_subject_passive": 0.794, "blimp/accuracy/existential_there_quantifiers_1": 0.978, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.87, "blimp/accuracy/npi_present_2": 0.6, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.933, "blimp/accuracy/anaphor_number_agreement": 0.984, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.958, "blimp/accuracy/existential_there_object_raising": 0.824, "blimp/accuracy/matrix_question_npi_licensor_present": 0.324, "blimp/accuracy/npi_present_1": 0.62, "blimp/accuracy/wh_vs_that_no_gap": 0.963, "blimp/accuracy/left_branch_island_echo_question": 0.396, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.976, "blimp/accuracy/causative": 0.712, "blimp/accuracy/group_average": 0.7685074626865671, "blimp/accuracy/seq_average": 0.7685074626865671, "cbt/accuracy/NE": 0.7604166666666666, "cbt/accuracy/V": 0.908, "cbt/accuracy/CN": 0.8172, "cbt/accuracy/P": 0.8824, "cbt/accuracy/group_average": 0.8420041666666667, "cbt/accuracy/seq_average": 0.8420368147258903, "hellaswag/accuracy/val": 0.2953594901414061, "hellaswag/accuracy/group_average": 0.2953594901414061, "hellaswag/accuracy/seq_average": 0.2953594901414061, "piqa/accuracy/val": 0.588683351468988, "piqa/accuracy/group_average": 0.588683351468988, "piqa/accuracy/seq_average": 0.588683351468988, "ai2arc/accuracy/ARC-Easy": 0.33403805496828753, "ai2arc/accuracy/ARC-Challenge": 0.2094420600858369, "ai2arc/accuracy/group_average": 0.2717400575270622, "ai2arc/accuracy/seq_average": 0.2929178470254957, "race/accuracy/test/high": 0.2655803316180675, "race/accuracy/test/middle": 0.33356545961002787, "race/accuracy/group_average": 0.29957289561404765, "race/accuracy/seq_average": 0.2853668423186056, "siqa/accuracy/dev": 0.3623336745138178, "siqa/accuracy/group_average": 0.3623336745138178, "siqa/accuracy/seq_average": 0.3623336745138178, "commonsenseqa/accuracy/dev_rand_split": 0.26044226044226043, "commonsenseqa/accuracy/group_average": 0.26044226044226043, "commonsenseqa/accuracy/seq_average": 0.26044226044226043}
Pretrain_language_model/save/slimpajama_moe_no_attmoe_154M_standard_lb_tcmoe/export/result-model-90000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.6112525576636907, "val/accuracy": 0.47884889632936506, "val/perplexity": 13.616095119429911, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.662150317837733, "lambada/accuracy/total": 0.24611801242236025, "lambada/accuracy/openai_last_token": 0.7597049689440993, "lambada/perplexity": 11.89158931127403, "lambada/lm_loss": 3.1628039147488223, "lambada/lm_perplexity": 23.63677861257303, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.36248345437586266, "mean_loss": 2.6367014377507116, "blimp/accuracy/passive_2": 0.894, "blimp/accuracy/determiner_noun_agreement_2": 0.972, "blimp/accuracy/ellipsis_n_bar_1": 0.796, "blimp/accuracy/tough_vs_raising_2": 0.871, "blimp/accuracy/tough_vs_raising_1": 0.571, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.9, "blimp/accuracy/principle_A_reconstruction": 0.326, "blimp/accuracy/wh_vs_that_with_gap": 0.497, "blimp/accuracy/principle_A_domain_2": 0.858, "blimp/accuracy/determiner_noun_agreement_1": 0.992, "blimp/accuracy/ellipsis_n_bar_2": 0.897, "blimp/accuracy/principle_A_domain_3": 0.597, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.896, "blimp/accuracy/animate_subject_trans": 0.876, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.89, "blimp/accuracy/distractor_agreement_relative_clause": 0.578, "blimp/accuracy/transitive": 0.869, "blimp/accuracy/sentential_subject_island": 0.325, "blimp/accuracy/adjunct_island": 0.783, "blimp/accuracy/intransitive": 0.768, "blimp/accuracy/existential_there_subject_raising": 0.883, "blimp/accuracy/irregular_past_participle_adjectives": 0.981, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.396, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.211, "blimp/accuracy/only_npi_scope": 0.742, "blimp/accuracy/superlative_quantifiers_2": 0.625, "blimp/accuracy/passive_1": 0.883, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.887, "blimp/accuracy/inchoative": 0.624, "blimp/accuracy/anaphor_gender_agreement": 0.958, "blimp/accuracy/principle_A_c_command": 0.615, "blimp/accuracy/only_npi_licensor_present": 0.549, "blimp/accuracy/expletive_it_object_raising": 0.793, "blimp/accuracy/left_branch_island_simple_question": 0.449, "blimp/accuracy/wh_questions_subject_gap": 0.927, "blimp/accuracy/existential_there_quantifiers_2": 0.373, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.942, "blimp/accuracy/sentential_negation_npi_scope": 0.622, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.836, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.859, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.889, "blimp/accuracy/principle_A_case_2": 0.952, "blimp/accuracy/distractor_agreement_relational_noun": 0.775, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.982, "blimp/accuracy/superlative_quantifiers_1": 0.756, "blimp/accuracy/wh_island": 0.759, "blimp/accuracy/principle_A_domain_1": 0.976, "blimp/accuracy/complex_NP_island": 0.537, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.962, "blimp/accuracy/irregular_past_participle_verbs": 0.912, "blimp/accuracy/drop_argument": 0.74, "blimp/accuracy/wh_questions_object_gap": 0.78, "blimp/accuracy/animate_subject_passive": 0.797, "blimp/accuracy/existential_there_quantifiers_1": 0.985, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.869, "blimp/accuracy/npi_present_2": 0.629, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.943, "blimp/accuracy/anaphor_number_agreement": 0.983, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.952, "blimp/accuracy/existential_there_object_raising": 0.836, "blimp/accuracy/matrix_question_npi_licensor_present": 0.294, "blimp/accuracy/npi_present_1": 0.609, "blimp/accuracy/wh_vs_that_no_gap": 0.965, "blimp/accuracy/left_branch_island_echo_question": 0.43, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.976, "blimp/accuracy/causative": 0.73, "blimp/accuracy/group_average": 0.76610447761194, "blimp/accuracy/seq_average": 0.7661044776119403, "cbt/accuracy/NE": 0.7640224358974359, "cbt/accuracy/V": 0.9112, "cbt/accuracy/CN": 0.824, "cbt/accuracy/P": 0.8868, "cbt/accuracy/group_average": 0.846505608974359, "cbt/accuracy/seq_average": 0.8465386154461785, "hellaswag/accuracy/val": 0.29416450906193986, "hellaswag/accuracy/group_average": 0.29416450906193986, "hellaswag/accuracy/seq_average": 0.29416450906193986, "piqa/accuracy/val": 0.5875952121871599, "piqa/accuracy/group_average": 0.5875952121871599, "piqa/accuracy/seq_average": 0.5875952121871599, "ai2arc/accuracy/ARC-Easy": 0.33657505285412265, "ai2arc/accuracy/ARC-Challenge": 0.20772532188841203, "ai2arc/accuracy/group_average": 0.27215018737126734, "ai2arc/accuracy/seq_average": 0.29405099150141645, "race/accuracy/test/high": 0.274442538593482, "race/accuracy/test/middle": 0.33008356545961004, "race/accuracy/group_average": 0.302263052026546, "race/accuracy/seq_average": 0.29063640048642075, "siqa/accuracy/dev": 0.36591606960081885, "siqa/accuracy/group_average": 0.36591606960081885, "siqa/accuracy/seq_average": 0.36591606960081885, "commonsenseqa/accuracy/dev_rand_split": 0.266994266994267, "commonsenseqa/accuracy/group_average": 0.266994266994267, "commonsenseqa/accuracy/seq_average": 0.266994266994267}