Upload folder using huggingface_hub

#272
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb_v2/export/result-model-10000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 3.027585953000992, "val/accuracy": 0.42340862940228174, "val/perplexity": 20.647328756503384, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.7719552176339284, "lambada/accuracy/total": 0.15838509316770186, "lambada/accuracy/openai_last_token": 0.718361801242236, "lambada/perplexity": 24.03160614755204, "lambada/lm_loss": 3.5617760941350007, "lambada/lm_perplexity": 35.225705787508474, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.2908968612849918, "mean_loss": 2.8997705853174605, "blimp/accuracy/passive_2": 0.861, "blimp/accuracy/determiner_noun_agreement_2": 0.955, "blimp/accuracy/ellipsis_n_bar_1": 0.74, "blimp/accuracy/tough_vs_raising_2": 0.798, "blimp/accuracy/tough_vs_raising_1": 0.565, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.895, "blimp/accuracy/principle_A_reconstruction": 0.288, "blimp/accuracy/wh_vs_that_with_gap": 0.432, "blimp/accuracy/principle_A_domain_2": 0.828, "blimp/accuracy/determiner_noun_agreement_1": 0.968, "blimp/accuracy/ellipsis_n_bar_2": 0.872, "blimp/accuracy/principle_A_domain_3": 0.536, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.865, "blimp/accuracy/animate_subject_trans": 0.884, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.726, "blimp/accuracy/distractor_agreement_relative_clause": 0.37, "blimp/accuracy/transitive": 0.791, "blimp/accuracy/sentential_subject_island": 0.431, "blimp/accuracy/adjunct_island": 0.706, "blimp/accuracy/intransitive": 0.713, "blimp/accuracy/existential_there_subject_raising": 0.823, "blimp/accuracy/irregular_past_participle_adjectives": 0.889, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.184, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.174, "blimp/accuracy/only_npi_scope": 0.586, "blimp/accuracy/superlative_quantifiers_2": 0.683, "blimp/accuracy/passive_1": 0.876, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.879, "blimp/accuracy/inchoative": 0.528, "blimp/accuracy/anaphor_gender_agreement": 0.883, "blimp/accuracy/principle_A_c_command": 0.453, "blimp/accuracy/only_npi_licensor_present": 0.462, "blimp/accuracy/expletive_it_object_raising": 0.738, "blimp/accuracy/left_branch_island_simple_question": 0.26, "blimp/accuracy/wh_questions_subject_gap": 0.901, "blimp/accuracy/existential_there_quantifiers_2": 0.366, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.891, "blimp/accuracy/sentential_negation_npi_scope": 0.431, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.759, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.865, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.833, "blimp/accuracy/principle_A_case_2": 0.914, "blimp/accuracy/distractor_agreement_relational_noun": 0.694, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.986, "blimp/accuracy/superlative_quantifiers_1": 0.616, "blimp/accuracy/wh_island": 0.774, "blimp/accuracy/principle_A_domain_1": 0.983, "blimp/accuracy/complex_NP_island": 0.521, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.922, "blimp/accuracy/irregular_past_participle_verbs": 0.814, "blimp/accuracy/drop_argument": 0.766, "blimp/accuracy/wh_questions_object_gap": 0.716, "blimp/accuracy/animate_subject_passive": 0.764, "blimp/accuracy/existential_there_quantifiers_1": 0.975, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.876, "blimp/accuracy/npi_present_2": 0.617, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.821, "blimp/accuracy/anaphor_number_agreement": 0.969, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.911, "blimp/accuracy/existential_there_object_raising": 0.79, "blimp/accuracy/matrix_question_npi_licensor_present": 0.074, "blimp/accuracy/npi_present_1": 0.58, "blimp/accuracy/wh_vs_that_no_gap": 0.957, "blimp/accuracy/left_branch_island_echo_question": 0.463, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.969, "blimp/accuracy/causative": 0.636, "blimp/accuracy/group_average": 0.7133731343283581, "blimp/accuracy/seq_average": 0.7133731343283582, "cbt/accuracy/NE": 0.6846955128205128, "cbt/accuracy/V": 0.86, "cbt/accuracy/CN": 0.7304, "cbt/accuracy/P": 0.83, "cbt/accuracy/group_average": 0.7762738782051282, "cbt/accuracy/seq_average": 0.7763105242096838, "hellaswag/accuracy/val": 0.2727544313881697, "hellaswag/accuracy/group_average": 0.2727544313881697, "hellaswag/accuracy/seq_average": 0.2727544313881697, "piqa/accuracy/val": 0.5467899891186072, "piqa/accuracy/group_average": 0.5467899891186072, "piqa/accuracy/seq_average": 0.5467899891186072, "ai2arc/accuracy/ARC-Easy": 0.30021141649048627, "ai2arc/accuracy/ARC-Challenge": 0.21030042918454936, "ai2arc/accuracy/group_average": 0.2552559228375178, "ai2arc/accuracy/seq_average": 0.2705382436260623, "mmlu/accuracy/MMLU": 0.26235252055774044, "mmlu/accuracy/group_average": 0.26235252055774044, "mmlu/accuracy/seq_average": 0.26235252055774044, "openbookqa/accuracy/test": 0.26, "openbookqa/accuracy/group_average": 0.26, "openbookqa/accuracy/seq_average": 0.26, "race/accuracy/test/high": 0.25443110348770726, "race/accuracy/test/middle": 0.30571030640668523, "race/accuracy/group_average": 0.2800707049471962, "race/accuracy/seq_average": 0.26935549250101337, "siqa/accuracy/dev": 0.35363357215967245, "siqa/accuracy/group_average": 0.35363357215967245, "siqa/accuracy/seq_average": 0.35363357215967245, "winogrande/accuracy/dev": 0.5098658247829518, "winogrande/accuracy/group_average": 0.5098658247829518, "winogrande/accuracy/seq_average": 0.5098658247829518, "commonsenseqa/accuracy/dev_rand_split": 0.23996723996723995, "commonsenseqa/accuracy/group_average": 0.23996723996723995, "commonsenseqa/accuracy/seq_average": 0.23996723996723995}
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb_v2/export/result-model-100000.pth.json ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "val/loss": 2.6105455671037947,
3
+ "val/accuracy": 0.47938077411954366,
4
+ "val/perplexity": 13.606472070820919,
5
+ "val/time_since_best_loss": 0,
6
+ "val/time_since_best_accuracy": 0,
7
+ "lambada/loss": 2.5885574625145575,
8
+ "lambada/accuracy/total": 0.24922360248447206,
9
+ "lambada/accuracy/openai_last_token": 0.7593167701863354,
10
+ "lambada/perplexity": 12.0800010668659,
11
+ "lambada/lm_loss": 3.176875031428619,
12
+ "lambada/lm_perplexity": 23.97172549418872,
13
+ "lambada/time_since_best_loss": 0,
14
+ "lambada/time_since_best_accuracy": 0,
15
+ "mean_accuracy": 0.3643021883020079,
16
+ "mean_loss": 2.599551514809176,
17
+ "blimp/accuracy/passive_2": 0.911,
18
+ "blimp/accuracy/determiner_noun_agreement_2": 0.983,
19
+ "blimp/accuracy/ellipsis_n_bar_1": 0.779,
20
+ "blimp/accuracy/tough_vs_raising_2": 0.846,
21
+ "blimp/accuracy/tough_vs_raising_1": 0.628,
22
+ "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.923,
23
+ "blimp/accuracy/principle_A_reconstruction": 0.244,
24
+ "blimp/accuracy/wh_vs_that_with_gap": 0.542,
25
+ "blimp/accuracy/principle_A_domain_2": 0.833,
26
+ "blimp/accuracy/determiner_noun_agreement_1": 0.983,
27
+ "blimp/accuracy/ellipsis_n_bar_2": 0.93,
28
+ "blimp/accuracy/principle_A_domain_3": 0.504,
29
+ "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.91,
30
+ "blimp/accuracy/animate_subject_trans": 0.907,
31
+ "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.886,
32
+ "blimp/accuracy/distractor_agreement_relative_clause": 0.601,
33
+ "blimp/accuracy/transitive": 0.869,
34
+ "blimp/accuracy/sentential_subject_island": 0.268,
35
+ "blimp/accuracy/adjunct_island": 0.768,
36
+ "blimp/accuracy/intransitive": 0.814,
37
+ "blimp/accuracy/existential_there_subject_raising": 0.867,
38
+ "blimp/accuracy/irregular_past_participle_adjectives": 0.956,
39
+ "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.38,
40
+ "blimp/accuracy/principle_A_case_1": 1.0,
41
+ "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.268,
42
+ "blimp/accuracy/only_npi_scope": 0.61,
43
+ "blimp/accuracy/superlative_quantifiers_2": 0.872,
44
+ "blimp/accuracy/passive_1": 0.893,
45
+ "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.921,
46
+ "blimp/accuracy/inchoative": 0.642,
47
+ "blimp/accuracy/anaphor_gender_agreement": 0.933,
48
+ "blimp/accuracy/principle_A_c_command": 0.6,
49
+ "blimp/accuracy/only_npi_licensor_present": 0.57,
50
+ "blimp/accuracy/expletive_it_object_raising": 0.747,
51
+ "blimp/accuracy/left_branch_island_simple_question": 0.444,
52
+ "blimp/accuracy/wh_questions_subject_gap": 0.901,
53
+ "blimp/accuracy/existential_there_quantifiers_2": 0.344,
54
+ "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.939,
55
+ "blimp/accuracy/sentential_negation_npi_scope": 0.659,
56
+ "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.793,
57
+ "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.827,
58
+ "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.914,
59
+ "blimp/accuracy/principle_A_case_2": 0.952,
60
+ "blimp/accuracy/distractor_agreement_relational_noun": 0.783,
61
+ "blimp/accuracy/sentential_negation_npi_licensor_present": 0.991,
62
+ "blimp/accuracy/superlative_quantifiers_1": 0.67,
63
+ "blimp/accuracy/wh_island": 0.795,
64
+ "blimp/accuracy/principle_A_domain_1": 0.985,
65
+ "blimp/accuracy/complex_NP_island": 0.472,
66
+ "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.962,
67
+ "blimp/accuracy/irregular_past_participle_verbs": 0.909,
68
+ "blimp/accuracy/drop_argument": 0.778,
69
+ "blimp/accuracy/wh_questions_object_gap": 0.757,
70
+ "blimp/accuracy/animate_subject_passive": 0.798,
71
+ "blimp/accuracy/existential_there_quantifiers_1": 0.977,
72
+ "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.895,
73
+ "blimp/accuracy/npi_present_2": 0.612,
74
+ "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.92,
75
+ "blimp/accuracy/anaphor_number_agreement": 0.981,
76
+ "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.947,
77
+ "blimp/accuracy/existential_there_object_raising": 0.843,
78
+ "blimp/accuracy/matrix_question_npi_licensor_present": 0.284,
79
+ "blimp/accuracy/npi_present_1": 0.637,
80
+ "blimp/accuracy/wh_vs_that_no_gap": 0.963,
81
+ "blimp/accuracy/left_branch_island_echo_question": 0.395,
82
+ "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.968,
83
+ "blimp/accuracy/causative": 0.703,
84
+ "blimp/accuracy/group_average": 0.7639701492537316,
85
+ "blimp/accuracy/seq_average": 0.7639701492537313,
86
+ "cbt/accuracy/NE": 0.7672275641025641,
87
+ "cbt/accuracy/V": 0.9072,
88
+ "cbt/accuracy/CN": 0.8216,
89
+ "cbt/accuracy/P": 0.8908,
90
+ "cbt/accuracy/group_average": 0.846706891025641,
91
+ "cbt/accuracy/seq_average": 0.8467386954781913,
92
+ "hellaswag/accuracy/val": 0.29187412865962953,
93
+ "hellaswag/accuracy/group_average": 0.29187412865962953,
94
+ "hellaswag/accuracy/seq_average": 0.29187412865962953,
95
+ "piqa/accuracy/val": 0.5772578890097932,
96
+ "piqa/accuracy/group_average": 0.5772578890097932,
97
+ "piqa/accuracy/seq_average": 0.5772578890097932,
98
+ "ai2arc/accuracy/ARC-Easy": 0.32515856236786467,
99
+ "ai2arc/accuracy/ARC-Challenge": 0.20772532188841203,
100
+ "ai2arc/accuracy/group_average": 0.2664419421281383,
101
+ "ai2arc/accuracy/seq_average": 0.2864022662889518,
102
+ "mmlu/accuracy/MMLU": 0.2582052198784412,
103
+ "mmlu/accuracy/group_average": 0.2582052198784412,
104
+ "mmlu/accuracy/seq_average": 0.2582052198784412,
105
+ "openbookqa/accuracy/test": 0.258,
106
+ "openbookqa/accuracy/group_average": 0.258,
107
+ "openbookqa/accuracy/seq_average": 0.258,
108
+ "race/accuracy/test/high": 0.26758147512864494,
109
+ "race/accuracy/test/middle": 0.3342618384401114,
110
+ "race/accuracy/group_average": 0.3009216567843782,
111
+ "race/accuracy/seq_average": 0.2869882448317795,
112
+ "siqa/accuracy/dev": 0.3526100307062436,
113
+ "siqa/accuracy/group_average": 0.3526100307062436,
114
+ "siqa/accuracy/seq_average": 0.3526100307062436,
115
+ "winogrande/accuracy/dev": 0.5122336227308603,
116
+ "winogrande/accuracy/group_average": 0.5122336227308603,
117
+ "winogrande/accuracy/seq_average": 0.5122336227308603,
118
+ "commonsenseqa/accuracy/dev_rand_split": 0.2538902538902539,
119
+ "commonsenseqa/accuracy/group_average": 0.2538902538902539,
120
+ "commonsenseqa/accuracy/seq_average": 0.2538902538902539
121
+ }
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb_v2/export/result-model-20000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.880667308020213, "val/accuracy": 0.4415166945684524, "val/perplexity": 17.826164754209497, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.6459052992163237, "lambada/accuracy/total": 0.19701086956521738, "lambada/accuracy/openai_last_token": 0.7393245341614907, "lambada/perplexity": 17.4742443200684, "lambada/lm_loss": 3.414981394257363, "lambada/lm_perplexity": 30.416383502897077, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.31926378206683487, "mean_loss": 2.763286303618268, "blimp/accuracy/passive_2": 0.866, "blimp/accuracy/determiner_noun_agreement_2": 0.96, "blimp/accuracy/ellipsis_n_bar_1": 0.77, "blimp/accuracy/tough_vs_raising_2": 0.837, "blimp/accuracy/tough_vs_raising_1": 0.605, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.894, "blimp/accuracy/principle_A_reconstruction": 0.312, "blimp/accuracy/wh_vs_that_with_gap": 0.493, "blimp/accuracy/principle_A_domain_2": 0.801, "blimp/accuracy/determiner_noun_agreement_1": 0.98, "blimp/accuracy/ellipsis_n_bar_2": 0.898, "blimp/accuracy/principle_A_domain_3": 0.548, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.871, "blimp/accuracy/animate_subject_trans": 0.889, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.825, "blimp/accuracy/distractor_agreement_relative_clause": 0.45, "blimp/accuracy/transitive": 0.839, "blimp/accuracy/sentential_subject_island": 0.376, "blimp/accuracy/adjunct_island": 0.727, "blimp/accuracy/intransitive": 0.708, "blimp/accuracy/existential_there_subject_raising": 0.82, "blimp/accuracy/irregular_past_participle_adjectives": 0.852, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.238, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.179, "blimp/accuracy/only_npi_scope": 0.645, "blimp/accuracy/superlative_quantifiers_2": 0.828, "blimp/accuracy/passive_1": 0.878, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.895, "blimp/accuracy/inchoative": 0.537, "blimp/accuracy/anaphor_gender_agreement": 0.871, "blimp/accuracy/principle_A_c_command": 0.526, "blimp/accuracy/only_npi_licensor_present": 0.562, "blimp/accuracy/expletive_it_object_raising": 0.733, "blimp/accuracy/left_branch_island_simple_question": 0.334, "blimp/accuracy/wh_questions_subject_gap": 0.884, "blimp/accuracy/existential_there_quantifiers_2": 0.308, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.889, "blimp/accuracy/sentential_negation_npi_scope": 0.499, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.79, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.879, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.881, "blimp/accuracy/principle_A_case_2": 0.923, "blimp/accuracy/distractor_agreement_relational_noun": 0.786, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.994, "blimp/accuracy/superlative_quantifiers_1": 0.521, "blimp/accuracy/wh_island": 0.832, "blimp/accuracy/principle_A_domain_1": 0.987, "blimp/accuracy/complex_NP_island": 0.539, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.945, "blimp/accuracy/irregular_past_participle_verbs": 0.881, "blimp/accuracy/drop_argument": 0.755, "blimp/accuracy/wh_questions_object_gap": 0.748, "blimp/accuracy/animate_subject_passive": 0.761, "blimp/accuracy/existential_there_quantifiers_1": 0.994, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.874, "blimp/accuracy/npi_present_2": 0.602, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.898, "blimp/accuracy/anaphor_number_agreement": 0.969, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.933, "blimp/accuracy/existential_there_object_raising": 0.77, "blimp/accuracy/matrix_question_npi_licensor_present": 0.089, "blimp/accuracy/npi_present_1": 0.534, "blimp/accuracy/wh_vs_that_no_gap": 0.942, "blimp/accuracy/left_branch_island_echo_question": 0.389, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.982, "blimp/accuracy/causative": 0.691, "blimp/accuracy/group_average": 0.731582089552239, "blimp/accuracy/seq_average": 0.7315820895522388, "cbt/accuracy/NE": 0.703125, "cbt/accuracy/V": 0.8844, "cbt/accuracy/CN": 0.7648, "cbt/accuracy/P": 0.8572, "cbt/accuracy/group_average": 0.80238125, "cbt/accuracy/seq_average": 0.802420968387355, "hellaswag/accuracy/val": 0.2751443935471022, "hellaswag/accuracy/group_average": 0.2751443935471022, "hellaswag/accuracy/seq_average": 0.2751443935471022, "piqa/accuracy/val": 0.5609357997823722, "piqa/accuracy/group_average": 0.5609357997823722, "piqa/accuracy/seq_average": 0.5609357997823722, "ai2arc/accuracy/ARC-Easy": 0.3116279069767442, "ai2arc/accuracy/ARC-Challenge": 0.19570815450643778, "ai2arc/accuracy/group_average": 0.253668030741591, "ai2arc/accuracy/seq_average": 0.273371104815864, "mmlu/accuracy/MMLU": 0.262567036110118, "mmlu/accuracy/group_average": 0.262567036110118, "mmlu/accuracy/seq_average": 0.262567036110118, "openbookqa/accuracy/test": 0.266, "openbookqa/accuracy/group_average": 0.266, "openbookqa/accuracy/seq_average": 0.266, "race/accuracy/test/high": 0.2584333905088622, "race/accuracy/test/middle": 0.3363509749303621, "race/accuracy/group_average": 0.29739218271961215, "race/accuracy/seq_average": 0.28111066072152413, "siqa/accuracy/dev": 0.3577277379733879, "siqa/accuracy/group_average": 0.3577277379733879, "siqa/accuracy/seq_average": 0.3577277379733879, "winogrande/accuracy/dev": 0.516179952644041, "winogrande/accuracy/group_average": 0.516179952644041, "winogrande/accuracy/seq_average": 0.516179952644041, "commonsenseqa/accuracy/dev_rand_split": 0.23013923013923013, "commonsenseqa/accuracy/group_average": 0.23013923013923013, "commonsenseqa/accuracy/seq_average": 0.23013923013923013}
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb_v2/export/result-model-30000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.8055223737444197, "val/accuracy": 0.45195079985119047, "val/perplexity": 16.535711471888074, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.7081181307016693, "lambada/accuracy/total": 0.21777950310559005, "lambada/accuracy/openai_last_token": 0.7402950310559007, "lambada/perplexity": 16.4259478671092, "lambada/lm_loss": 3.363486649122513, "lambada/lm_perplexity": 28.88974388101112, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.33486515147839024, "mean_loss": 2.7568202522230445, "blimp/accuracy/passive_2": 0.873, "blimp/accuracy/determiner_noun_agreement_2": 0.978, "blimp/accuracy/ellipsis_n_bar_1": 0.757, "blimp/accuracy/tough_vs_raising_2": 0.827, "blimp/accuracy/tough_vs_raising_1": 0.621, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.928, "blimp/accuracy/principle_A_reconstruction": 0.259, "blimp/accuracy/wh_vs_that_with_gap": 0.531, "blimp/accuracy/principle_A_domain_2": 0.805, "blimp/accuracy/determiner_noun_agreement_1": 0.976, "blimp/accuracy/ellipsis_n_bar_2": 0.889, "blimp/accuracy/principle_A_domain_3": 0.538, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.892, "blimp/accuracy/animate_subject_trans": 0.886, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.809, "blimp/accuracy/distractor_agreement_relative_clause": 0.526, "blimp/accuracy/transitive": 0.837, "blimp/accuracy/sentential_subject_island": 0.334, "blimp/accuracy/adjunct_island": 0.76, "blimp/accuracy/intransitive": 0.796, "blimp/accuracy/existential_there_subject_raising": 0.837, "blimp/accuracy/irregular_past_participle_adjectives": 0.865, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.294, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.272, "blimp/accuracy/only_npi_scope": 0.685, "blimp/accuracy/superlative_quantifiers_2": 0.852, "blimp/accuracy/passive_1": 0.874, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.881, "blimp/accuracy/inchoative": 0.641, "blimp/accuracy/anaphor_gender_agreement": 0.927, "blimp/accuracy/principle_A_c_command": 0.563, "blimp/accuracy/only_npi_licensor_present": 0.452, "blimp/accuracy/expletive_it_object_raising": 0.784, "blimp/accuracy/left_branch_island_simple_question": 0.38, "blimp/accuracy/wh_questions_subject_gap": 0.87, "blimp/accuracy/existential_there_quantifiers_2": 0.286, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.925, "blimp/accuracy/sentential_negation_npi_scope": 0.546, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.764, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.854, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.868, "blimp/accuracy/principle_A_case_2": 0.956, "blimp/accuracy/distractor_agreement_relational_noun": 0.785, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.98, "blimp/accuracy/superlative_quantifiers_1": 0.604, "blimp/accuracy/wh_island": 0.755, "blimp/accuracy/principle_A_domain_1": 0.956, "blimp/accuracy/complex_NP_island": 0.517, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.93, "blimp/accuracy/irregular_past_participle_verbs": 0.873, "blimp/accuracy/drop_argument": 0.785, "blimp/accuracy/wh_questions_object_gap": 0.678, "blimp/accuracy/animate_subject_passive": 0.81, "blimp/accuracy/existential_there_quantifiers_1": 0.967, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.888, "blimp/accuracy/npi_present_2": 0.591, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.889, "blimp/accuracy/anaphor_number_agreement": 0.975, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.935, "blimp/accuracy/existential_there_object_raising": 0.787, "blimp/accuracy/matrix_question_npi_licensor_present": 0.171, "blimp/accuracy/npi_present_1": 0.504, "blimp/accuracy/wh_vs_that_no_gap": 0.943, "blimp/accuracy/left_branch_island_echo_question": 0.354, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.949, "blimp/accuracy/causative": 0.676, "blimp/accuracy/group_average": 0.7388059701492535, "blimp/accuracy/seq_average": 0.7388059701492538, "cbt/accuracy/NE": 0.734375, "cbt/accuracy/V": 0.8856, "cbt/accuracy/CN": 0.7848, "cbt/accuracy/P": 0.8616, "cbt/accuracy/group_average": 0.8165937500000001, "cbt/accuracy/seq_average": 0.8166266506602641, "hellaswag/accuracy/val": 0.2771360286795459, "hellaswag/accuracy/group_average": 0.2771360286795459, "hellaswag/accuracy/seq_average": 0.2771360286795459, "piqa/accuracy/val": 0.5854189336235038, "piqa/accuracy/group_average": 0.5854189336235038, "piqa/accuracy/seq_average": 0.5854189336235038, "ai2arc/accuracy/ARC-Easy": 0.3099365750528541, "ai2arc/accuracy/ARC-Challenge": 0.20772532188841203, "ai2arc/accuracy/group_average": 0.25883094847063304, "ai2arc/accuracy/seq_average": 0.2762039660056657, "mmlu/accuracy/MMLU": 0.25727565248480516, "mmlu/accuracy/group_average": 0.25727565248480516, "mmlu/accuracy/seq_average": 0.25727565248480516, "openbookqa/accuracy/test": 0.258, "openbookqa/accuracy/group_average": 0.258, "openbookqa/accuracy/seq_average": 0.258, "race/accuracy/test/high": 0.258147512864494, "race/accuracy/test/middle": 0.33356545961002787, "race/accuracy/group_average": 0.2958564862372609, "race/accuracy/seq_average": 0.28009728415079044, "siqa/accuracy/dev": 0.34698055271238487, "siqa/accuracy/group_average": 0.34698055271238487, "siqa/accuracy/seq_average": 0.34698055271238487, "winogrande/accuracy/dev": 0.5090765588003157, "winogrande/accuracy/group_average": 0.5090765588003157, "winogrande/accuracy/seq_average": 0.5090765588003157, "commonsenseqa/accuracy/dev_rand_split": 0.22604422604422605, "commonsenseqa/accuracy/group_average": 0.22604422604422605, "commonsenseqa/accuracy/seq_average": 0.22604422604422605}
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb_v2/export/result-model-40000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.754936823769221, "val/accuracy": 0.4590299091641865, "val/perplexity": 15.72004773806797, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.6254700844332297, "lambada/accuracy/total": 0.21777950310559005, "lambada/accuracy/openai_last_token": 0.7476708074534162, "lambada/perplexity": 15.749754975663437, "lambada/lm_loss": 3.2934883043631307, "lambada/lm_perplexity": 26.93666323994265, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.33840470613488827, "mean_loss": 2.6902034541012254, "blimp/accuracy/passive_2": 0.874, "blimp/accuracy/determiner_noun_agreement_2": 0.974, "blimp/accuracy/ellipsis_n_bar_1": 0.79, "blimp/accuracy/tough_vs_raising_2": 0.838, "blimp/accuracy/tough_vs_raising_1": 0.608, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.898, "blimp/accuracy/principle_A_reconstruction": 0.279, "blimp/accuracy/wh_vs_that_with_gap": 0.499, "blimp/accuracy/principle_A_domain_2": 0.796, "blimp/accuracy/determiner_noun_agreement_1": 0.982, "blimp/accuracy/ellipsis_n_bar_2": 0.904, "blimp/accuracy/principle_A_domain_3": 0.562, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.927, "blimp/accuracy/animate_subject_trans": 0.906, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.858, "blimp/accuracy/distractor_agreement_relative_clause": 0.6, "blimp/accuracy/transitive": 0.845, "blimp/accuracy/sentential_subject_island": 0.29, "blimp/accuracy/adjunct_island": 0.824, "blimp/accuracy/intransitive": 0.79, "blimp/accuracy/existential_there_subject_raising": 0.844, "blimp/accuracy/irregular_past_participle_adjectives": 0.78, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.391, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.262, "blimp/accuracy/only_npi_scope": 0.514, "blimp/accuracy/superlative_quantifiers_2": 0.746, "blimp/accuracy/passive_1": 0.87, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.892, "blimp/accuracy/inchoative": 0.62, "blimp/accuracy/anaphor_gender_agreement": 0.94, "blimp/accuracy/principle_A_c_command": 0.645, "blimp/accuracy/only_npi_licensor_present": 0.54, "blimp/accuracy/expletive_it_object_raising": 0.762, "blimp/accuracy/left_branch_island_simple_question": 0.491, "blimp/accuracy/wh_questions_subject_gap": 0.893, "blimp/accuracy/existential_there_quantifiers_2": 0.351, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.928, "blimp/accuracy/sentential_negation_npi_scope": 0.581, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.794, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.84, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.892, "blimp/accuracy/principle_A_case_2": 0.941, "blimp/accuracy/distractor_agreement_relational_noun": 0.825, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.997, "blimp/accuracy/superlative_quantifiers_1": 0.726, "blimp/accuracy/wh_island": 0.792, "blimp/accuracy/principle_A_domain_1": 0.989, "blimp/accuracy/complex_NP_island": 0.477, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.947, "blimp/accuracy/irregular_past_participle_verbs": 0.898, "blimp/accuracy/drop_argument": 0.768, "blimp/accuracy/wh_questions_object_gap": 0.714, "blimp/accuracy/animate_subject_passive": 0.779, "blimp/accuracy/existential_there_quantifiers_1": 0.986, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.877, "blimp/accuracy/npi_present_2": 0.519, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.91, "blimp/accuracy/anaphor_number_agreement": 0.977, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.95, "blimp/accuracy/existential_there_object_raising": 0.807, "blimp/accuracy/matrix_question_npi_licensor_present": 0.157, "blimp/accuracy/npi_present_1": 0.491, "blimp/accuracy/wh_vs_that_no_gap": 0.968, "blimp/accuracy/left_branch_island_echo_question": 0.408, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.966, "blimp/accuracy/causative": 0.668, "blimp/accuracy/group_average": 0.7486119402985074, "blimp/accuracy/seq_average": 0.7486119402985074, "cbt/accuracy/NE": 0.7395833333333334, "cbt/accuracy/V": 0.8892, "cbt/accuracy/CN": 0.7916, "cbt/accuracy/P": 0.868, "cbt/accuracy/group_average": 0.8220958333333332, "cbt/accuracy/seq_average": 0.8221288515406162, "hellaswag/accuracy/val": 0.28211511651065524, "hellaswag/accuracy/group_average": 0.28211511651065524, "hellaswag/accuracy/seq_average": 0.28211511651065524, "piqa/accuracy/val": 0.5658324265505985, "piqa/accuracy/group_average": 0.5658324265505985, "piqa/accuracy/seq_average": 0.5658324265505985, "ai2arc/accuracy/ARC-Easy": 0.3145877378435518, "ai2arc/accuracy/ARC-Challenge": 0.19914163090128756, "ai2arc/accuracy/group_average": 0.2568646843724197, "ai2arc/accuracy/seq_average": 0.27648725212464587, "mmlu/accuracy/MMLU": 0.2591347872720772, "mmlu/accuracy/group_average": 0.2591347872720772, "mmlu/accuracy/seq_average": 0.2591347872720772, "openbookqa/accuracy/test": 0.25, "openbookqa/accuracy/group_average": 0.25, "openbookqa/accuracy/seq_average": 0.25, "race/accuracy/test/high": 0.2584333905088622, "race/accuracy/test/middle": 0.3307799442896936, "race/accuracy/group_average": 0.2946066673992779, "race/accuracy/seq_average": 0.27948925820835024, "siqa/accuracy/dev": 0.35516888433981575, "siqa/accuracy/group_average": 0.35516888433981575, "siqa/accuracy/seq_average": 0.35516888433981575, "winogrande/accuracy/dev": 0.5059194948697711, "winogrande/accuracy/group_average": 0.5059194948697711, "winogrande/accuracy/seq_average": 0.5059194948697711, "commonsenseqa/accuracy/dev_rand_split": 0.257985257985258, "commonsenseqa/accuracy/group_average": 0.257985257985258, "commonsenseqa/accuracy/seq_average": 0.257985257985258}
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb_v2/export/result-model-60000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.677303738064236, "val/accuracy": 0.46979147290426587, "val/perplexity": 14.545821031944616, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.575543137070555, "lambada/accuracy/total": 0.23466614906832298, "lambada/accuracy/openai_last_token": 0.7548524844720497, "lambada/perplexity": 13.650760529750542, "lambada/lm_loss": 3.229637397542701, "lambada/lm_perplexity": 25.270492166916192, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.35222881098629444, "mean_loss": 2.6264234375673956, "blimp/accuracy/passive_2": 0.898, "blimp/accuracy/determiner_noun_agreement_2": 0.987, "blimp/accuracy/ellipsis_n_bar_1": 0.785, "blimp/accuracy/tough_vs_raising_2": 0.865, "blimp/accuracy/tough_vs_raising_1": 0.577, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.923, "blimp/accuracy/principle_A_reconstruction": 0.22, "blimp/accuracy/wh_vs_that_with_gap": 0.527, "blimp/accuracy/principle_A_domain_2": 0.798, "blimp/accuracy/determiner_noun_agreement_1": 0.985, "blimp/accuracy/ellipsis_n_bar_2": 0.909, "blimp/accuracy/principle_A_domain_3": 0.539, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.93, "blimp/accuracy/animate_subject_trans": 0.907, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.87, "blimp/accuracy/distractor_agreement_relative_clause": 0.579, "blimp/accuracy/transitive": 0.87, "blimp/accuracy/sentential_subject_island": 0.278, "blimp/accuracy/adjunct_island": 0.753, "blimp/accuracy/intransitive": 0.795, "blimp/accuracy/existential_there_subject_raising": 0.845, "blimp/accuracy/irregular_past_participle_adjectives": 0.937, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.322, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.293, "blimp/accuracy/only_npi_scope": 0.716, "blimp/accuracy/superlative_quantifiers_2": 0.761, "blimp/accuracy/passive_1": 0.88, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.914, "blimp/accuracy/inchoative": 0.671, "blimp/accuracy/anaphor_gender_agreement": 0.938, "blimp/accuracy/principle_A_c_command": 0.627, "blimp/accuracy/only_npi_licensor_present": 0.569, "blimp/accuracy/expletive_it_object_raising": 0.746, "blimp/accuracy/left_branch_island_simple_question": 0.36, "blimp/accuracy/wh_questions_subject_gap": 0.909, "blimp/accuracy/existential_there_quantifiers_2": 0.252, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.938, "blimp/accuracy/sentential_negation_npi_scope": 0.57, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.757, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.85, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.887, "blimp/accuracy/principle_A_case_2": 0.948, "blimp/accuracy/distractor_agreement_relational_noun": 0.759, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.993, "blimp/accuracy/superlative_quantifiers_1": 0.753, "blimp/accuracy/wh_island": 0.814, "blimp/accuracy/principle_A_domain_1": 0.986, "blimp/accuracy/complex_NP_island": 0.492, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.954, "blimp/accuracy/irregular_past_participle_verbs": 0.892, "blimp/accuracy/drop_argument": 0.765, "blimp/accuracy/wh_questions_object_gap": 0.772, "blimp/accuracy/animate_subject_passive": 0.792, "blimp/accuracy/existential_there_quantifiers_1": 0.963, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.887, "blimp/accuracy/npi_present_2": 0.608, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.907, "blimp/accuracy/anaphor_number_agreement": 0.985, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.945, "blimp/accuracy/existential_there_object_raising": 0.82, "blimp/accuracy/matrix_question_npi_licensor_present": 0.241, "blimp/accuracy/npi_present_1": 0.583, "blimp/accuracy/wh_vs_that_no_gap": 0.968, "blimp/accuracy/left_branch_island_echo_question": 0.411, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.96, "blimp/accuracy/causative": 0.689, "blimp/accuracy/group_average": 0.7555820895522387, "blimp/accuracy/seq_average": 0.7555820895522388, "cbt/accuracy/NE": 0.7504006410256411, "cbt/accuracy/V": 0.8984, "cbt/accuracy/CN": 0.8096, "cbt/accuracy/P": 0.8812, "cbt/accuracy/group_average": 0.8349001602564103, "cbt/accuracy/seq_average": 0.8349339735894358, "hellaswag/accuracy/val": 0.2849034056960765, "hellaswag/accuracy/group_average": 0.2849034056960765, "hellaswag/accuracy/seq_average": 0.2849034056960765, "piqa/accuracy/val": 0.5674646354733406, "piqa/accuracy/group_average": 0.5674646354733406, "piqa/accuracy/seq_average": 0.5674646354733406, "ai2arc/accuracy/ARC-Easy": 0.32346723044397463, "ai2arc/accuracy/ARC-Challenge": 0.20257510729613734, "ai2arc/accuracy/group_average": 0.263021168870056, "ai2arc/accuracy/seq_average": 0.28356940509915013, "mmlu/accuracy/MMLU": 0.2619234894529853, "mmlu/accuracy/group_average": 0.2619234894529853, "mmlu/accuracy/seq_average": 0.2619234894529853, "openbookqa/accuracy/test": 0.264, "openbookqa/accuracy/group_average": 0.264, "openbookqa/accuracy/seq_average": 0.264, "race/accuracy/test/high": 0.2658662092624357, "race/accuracy/test/middle": 0.3231197771587744, "race/accuracy/group_average": 0.29449299321060507, "race/accuracy/seq_average": 0.28252938792055127, "siqa/accuracy/dev": 0.35056294779938585, "siqa/accuracy/group_average": 0.35056294779938585, "siqa/accuracy/seq_average": 0.35056294779938585, "winogrande/accuracy/dev": 0.510655090765588, "winogrande/accuracy/group_average": 0.510655090765588, "winogrande/accuracy/seq_average": 0.510655090765588, "commonsenseqa/accuracy/dev_rand_split": 0.24897624897624898, "commonsenseqa/accuracy/group_average": 0.24897624897624898, "commonsenseqa/accuracy/seq_average": 0.24897624897624898}
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb_v2/export/result-model-70000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.6513274662078374, "val/accuracy": 0.4739176432291667, "val/perplexity": 14.172840129788627, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.6206831843216225, "lambada/accuracy/total": 0.23563664596273293, "lambada/accuracy/openai_last_token": 0.7548524844720497, "lambada/perplexity": 12.653600410893773, "lambada/lm_loss": 3.216387615447795, "lambada/lm_perplexity": 24.9378720901689, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.3547771445959498, "mean_loss": 2.6360053252647297, "blimp/accuracy/passive_2": 0.897, "blimp/accuracy/determiner_noun_agreement_2": 0.979, "blimp/accuracy/ellipsis_n_bar_1": 0.794, "blimp/accuracy/tough_vs_raising_2": 0.816, "blimp/accuracy/tough_vs_raising_1": 0.627, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.904, "blimp/accuracy/principle_A_reconstruction": 0.3, "blimp/accuracy/wh_vs_that_with_gap": 0.519, "blimp/accuracy/principle_A_domain_2": 0.825, "blimp/accuracy/determiner_noun_agreement_1": 0.982, "blimp/accuracy/ellipsis_n_bar_2": 0.923, "blimp/accuracy/principle_A_domain_3": 0.55, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.913, "blimp/accuracy/animate_subject_trans": 0.905, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.87, "blimp/accuracy/distractor_agreement_relative_clause": 0.602, "blimp/accuracy/transitive": 0.873, "blimp/accuracy/sentential_subject_island": 0.3, "blimp/accuracy/adjunct_island": 0.749, "blimp/accuracy/intransitive": 0.788, "blimp/accuracy/existential_there_subject_raising": 0.852, "blimp/accuracy/irregular_past_participle_adjectives": 0.883, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.431, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.281, "blimp/accuracy/only_npi_scope": 0.695, "blimp/accuracy/superlative_quantifiers_2": 0.843, "blimp/accuracy/passive_1": 0.884, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.903, "blimp/accuracy/inchoative": 0.629, "blimp/accuracy/anaphor_gender_agreement": 0.956, "blimp/accuracy/principle_A_c_command": 0.595, "blimp/accuracy/only_npi_licensor_present": 0.594, "blimp/accuracy/expletive_it_object_raising": 0.737, "blimp/accuracy/left_branch_island_simple_question": 0.504, "blimp/accuracy/wh_questions_subject_gap": 0.897, "blimp/accuracy/existential_there_quantifiers_2": 0.341, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.937, "blimp/accuracy/sentential_negation_npi_scope": 0.631, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.814, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.836, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.886, "blimp/accuracy/principle_A_case_2": 0.949, "blimp/accuracy/distractor_agreement_relational_noun": 0.806, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.995, "blimp/accuracy/superlative_quantifiers_1": 0.688, "blimp/accuracy/wh_island": 0.818, "blimp/accuracy/principle_A_domain_1": 0.994, "blimp/accuracy/complex_NP_island": 0.489, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.964, "blimp/accuracy/irregular_past_participle_verbs": 0.906, "blimp/accuracy/drop_argument": 0.748, "blimp/accuracy/wh_questions_object_gap": 0.767, "blimp/accuracy/animate_subject_passive": 0.771, "blimp/accuracy/existential_there_quantifiers_1": 0.983, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.874, "blimp/accuracy/npi_present_2": 0.516, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.914, "blimp/accuracy/anaphor_number_agreement": 0.982, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.938, "blimp/accuracy/existential_there_object_raising": 0.83, "blimp/accuracy/matrix_question_npi_licensor_present": 0.222, "blimp/accuracy/npi_present_1": 0.526, "blimp/accuracy/wh_vs_that_no_gap": 0.967, "blimp/accuracy/left_branch_island_echo_question": 0.416, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.966, "blimp/accuracy/causative": 0.691, "blimp/accuracy/group_average": 0.7606716417910446, "blimp/accuracy/seq_average": 0.7606716417910447, "cbt/accuracy/NE": 0.7536057692307693, "cbt/accuracy/V": 0.9036, "cbt/accuracy/CN": 0.8116, "cbt/accuracy/P": 0.8812, "cbt/accuracy/group_average": 0.8375014423076923, "cbt/accuracy/seq_average": 0.8375350140056023, "hellaswag/accuracy/val": 0.2885879306910974, "hellaswag/accuracy/group_average": 0.2885879306910974, "hellaswag/accuracy/seq_average": 0.2885879306910974, "piqa/accuracy/val": 0.5685527747551686, "piqa/accuracy/group_average": 0.5685527747551686, "piqa/accuracy/seq_average": 0.5685527747551686, "ai2arc/accuracy/ARC-Easy": 0.3310782241014799, "ai2arc/accuracy/ARC-Challenge": 0.2111587982832618, "ai2arc/accuracy/group_average": 0.27111851119237085, "ai2arc/accuracy/seq_average": 0.2915014164305949, "mmlu/accuracy/MMLU": 0.25841973543081875, "mmlu/accuracy/group_average": 0.25841973543081875, "mmlu/accuracy/seq_average": 0.25841973543081875, "openbookqa/accuracy/test": 0.264, "openbookqa/accuracy/group_average": 0.264, "openbookqa/accuracy/seq_average": 0.264, "race/accuracy/test/high": 0.2612921669525443, "race/accuracy/test/middle": 0.3286908077994429, "race/accuracy/group_average": 0.29499148737599357, "race/accuracy/seq_average": 0.2809079854073774, "siqa/accuracy/dev": 0.3577277379733879, "siqa/accuracy/group_average": 0.3577277379733879, "siqa/accuracy/seq_average": 0.3577277379733879, "winogrande/accuracy/dev": 0.5035516969218626, "winogrande/accuracy/group_average": 0.5035516969218626, "winogrande/accuracy/seq_average": 0.5035516969218626, "commonsenseqa/accuracy/dev_rand_split": 0.2457002457002457, "commonsenseqa/accuracy/group_average": 0.2457002457002457, "commonsenseqa/accuracy/seq_average": 0.2457002457002457}