Upload folder using huggingface_hub

#266
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_sigmoid_standard_lb_v2/export/result-model-10000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 3.042975773887029, "val/accuracy": 0.4206940181671627, "val/perplexity": 20.967545164265886, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.762588406201475, "lambada/accuracy/total": 0.1686723602484472, "lambada/accuracy/openai_last_token": 0.7135093167701864, "lambada/perplexity": 25.19556823747764, "lambada/lm_loss": 3.579254335054225, "lambada/lm_perplexity": 35.846801175829, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.29468318920780495, "mean_loss": 2.9027820900442523, "blimp/accuracy/passive_2": 0.874, "blimp/accuracy/determiner_noun_agreement_2": 0.978, "blimp/accuracy/ellipsis_n_bar_1": 0.688, "blimp/accuracy/tough_vs_raising_2": 0.807, "blimp/accuracy/tough_vs_raising_1": 0.573, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.893, "blimp/accuracy/principle_A_reconstruction": 0.403, "blimp/accuracy/wh_vs_that_with_gap": 0.446, "blimp/accuracy/principle_A_domain_2": 0.77, "blimp/accuracy/determiner_noun_agreement_1": 0.972, "blimp/accuracy/ellipsis_n_bar_2": 0.864, "blimp/accuracy/principle_A_domain_3": 0.515, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.889, "blimp/accuracy/animate_subject_trans": 0.876, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.754, "blimp/accuracy/distractor_agreement_relative_clause": 0.433, "blimp/accuracy/transitive": 0.81, "blimp/accuracy/sentential_subject_island": 0.347, "blimp/accuracy/adjunct_island": 0.721, "blimp/accuracy/intransitive": 0.7, "blimp/accuracy/existential_there_subject_raising": 0.805, "blimp/accuracy/irregular_past_participle_adjectives": 0.975, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.196, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.209, "blimp/accuracy/only_npi_scope": 0.676, "blimp/accuracy/superlative_quantifiers_2": 0.86, "blimp/accuracy/passive_1": 0.889, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.88, "blimp/accuracy/inchoative": 0.573, "blimp/accuracy/anaphor_gender_agreement": 0.932, "blimp/accuracy/principle_A_c_command": 0.422, "blimp/accuracy/only_npi_licensor_present": 0.41, "blimp/accuracy/expletive_it_object_raising": 0.767, "blimp/accuracy/left_branch_island_simple_question": 0.194, "blimp/accuracy/wh_questions_subject_gap": 0.883, "blimp/accuracy/existential_there_quantifiers_2": 0.369, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.912, "blimp/accuracy/sentential_negation_npi_scope": 0.458, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.759, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.927, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.803, "blimp/accuracy/principle_A_case_2": 0.924, "blimp/accuracy/distractor_agreement_relational_noun": 0.794, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.995, "blimp/accuracy/superlative_quantifiers_1": 0.684, "blimp/accuracy/wh_island": 0.801, "blimp/accuracy/principle_A_domain_1": 0.988, "blimp/accuracy/complex_NP_island": 0.549, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.937, "blimp/accuracy/irregular_past_participle_verbs": 0.798, "blimp/accuracy/drop_argument": 0.755, "blimp/accuracy/wh_questions_object_gap": 0.731, "blimp/accuracy/animate_subject_passive": 0.71, "blimp/accuracy/existential_there_quantifiers_1": 0.972, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.867, "blimp/accuracy/npi_present_2": 0.608, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.808, "blimp/accuracy/anaphor_number_agreement": 0.962, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.92, "blimp/accuracy/existential_there_object_raising": 0.767, "blimp/accuracy/matrix_question_npi_licensor_present": 0.03, "blimp/accuracy/npi_present_1": 0.575, "blimp/accuracy/wh_vs_that_no_gap": 0.94, "blimp/accuracy/left_branch_island_echo_question": 0.489, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.953, "blimp/accuracy/causative": 0.69, "blimp/accuracy/group_average": 0.7232686567164178, "blimp/accuracy/seq_average": 0.7232686567164179, "cbt/accuracy/NE": 0.6822916666666666, "cbt/accuracy/V": 0.8536, "cbt/accuracy/CN": 0.7352, "cbt/accuracy/P": 0.8316, "cbt/accuracy/group_average": 0.7756729166666666, "cbt/accuracy/seq_average": 0.7757102841136455, "hellaswag/accuracy/val": 0.2711611232822147, "hellaswag/accuracy/group_average": 0.2711611232822147, "hellaswag/accuracy/seq_average": 0.2711611232822147, "piqa/accuracy/val": 0.5321001088139282, "piqa/accuracy/group_average": 0.5321001088139282, "piqa/accuracy/seq_average": 0.5321001088139282, "ai2arc/accuracy/ARC-Easy": 0.3107822410147992, "ai2arc/accuracy/ARC-Challenge": 0.20858369098712445, "ai2arc/accuracy/group_average": 0.2596829660009618, "ai2arc/accuracy/seq_average": 0.2770538243626062, "mmlu/accuracy/MMLU": 0.26821594565606005, "mmlu/accuracy/group_average": 0.26821594565606005, "mmlu/accuracy/seq_average": 0.26821594565606005, "openbookqa/accuracy/test": 0.272, "openbookqa/accuracy/group_average": 0.272, "openbookqa/accuracy/seq_average": 0.272, "race/accuracy/test/high": 0.24213836477987422, "race/accuracy/test/middle": 0.3161559888579387, "race/accuracy/group_average": 0.27914717681890644, "race/accuracy/seq_average": 0.26368058370490477, "siqa/accuracy/dev": 0.35516888433981575, "siqa/accuracy/group_average": 0.35516888433981575, "siqa/accuracy/seq_average": 0.35516888433981575, "winogrande/accuracy/dev": 0.5153906866614049, "winogrande/accuracy/group_average": 0.5153906866614049, "winogrande/accuracy/seq_average": 0.5153906866614049, "commonsenseqa/accuracy/dev_rand_split": 0.23177723177723178, "commonsenseqa/accuracy/group_average": 0.23177723177723178, "commonsenseqa/accuracy/seq_average": 0.23177723177723178}
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_sigmoid_standard_lb_v2/export/result-model-100000.pth.json ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "val/loss": 2.6009427025204612,
3
+ "val/accuracy": 0.4792877681671627,
4
+ "val/perplexity": 13.476436319193972,
5
+ "val/time_since_best_loss": 0,
6
+ "val/time_since_best_accuracy": 0,
7
+ "lambada/loss": 2.7328426763878104,
8
+ "lambada/accuracy/total": 0.26766304347826086,
9
+ "lambada/accuracy/openai_last_token": 0.765527950310559,
10
+ "lambada/perplexity": 11.978991374036408,
11
+ "lambada/lm_loss": 3.177715485090263,
12
+ "lambada/lm_perplexity": 23.991881087390862,
13
+ "lambada/time_since_best_loss": 0,
14
+ "lambada/time_since_best_accuracy": 0,
15
+ "mean_accuracy": 0.37347540582271177,
16
+ "mean_loss": 2.666892689454136,
17
+ "blimp/accuracy/passive_2": 0.903,
18
+ "blimp/accuracy/determiner_noun_agreement_2": 0.991,
19
+ "blimp/accuracy/ellipsis_n_bar_1": 0.828,
20
+ "blimp/accuracy/tough_vs_raising_2": 0.876,
21
+ "blimp/accuracy/tough_vs_raising_1": 0.582,
22
+ "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.892,
23
+ "blimp/accuracy/principle_A_reconstruction": 0.405,
24
+ "blimp/accuracy/wh_vs_that_with_gap": 0.536,
25
+ "blimp/accuracy/principle_A_domain_2": 0.782,
26
+ "blimp/accuracy/determiner_noun_agreement_1": 0.991,
27
+ "blimp/accuracy/ellipsis_n_bar_2": 0.9,
28
+ "blimp/accuracy/principle_A_domain_3": 0.55,
29
+ "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.933,
30
+ "blimp/accuracy/animate_subject_trans": 0.897,
31
+ "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.881,
32
+ "blimp/accuracy/distractor_agreement_relative_clause": 0.625,
33
+ "blimp/accuracy/transitive": 0.876,
34
+ "blimp/accuracy/sentential_subject_island": 0.3,
35
+ "blimp/accuracy/adjunct_island": 0.816,
36
+ "blimp/accuracy/intransitive": 0.808,
37
+ "blimp/accuracy/existential_there_subject_raising": 0.885,
38
+ "blimp/accuracy/irregular_past_participle_adjectives": 0.984,
39
+ "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.41,
40
+ "blimp/accuracy/principle_A_case_1": 1.0,
41
+ "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.227,
42
+ "blimp/accuracy/only_npi_scope": 0.713,
43
+ "blimp/accuracy/superlative_quantifiers_2": 0.749,
44
+ "blimp/accuracy/passive_1": 0.893,
45
+ "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.913,
46
+ "blimp/accuracy/inchoative": 0.656,
47
+ "blimp/accuracy/anaphor_gender_agreement": 0.962,
48
+ "blimp/accuracy/principle_A_c_command": 0.626,
49
+ "blimp/accuracy/only_npi_licensor_present": 0.504,
50
+ "blimp/accuracy/expletive_it_object_raising": 0.751,
51
+ "blimp/accuracy/left_branch_island_simple_question": 0.413,
52
+ "blimp/accuracy/wh_questions_subject_gap": 0.924,
53
+ "blimp/accuracy/existential_there_quantifiers_2": 0.458,
54
+ "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.955,
55
+ "blimp/accuracy/sentential_negation_npi_scope": 0.691,
56
+ "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.791,
57
+ "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.888,
58
+ "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.881,
59
+ "blimp/accuracy/principle_A_case_2": 0.964,
60
+ "blimp/accuracy/distractor_agreement_relational_noun": 0.809,
61
+ "blimp/accuracy/sentential_negation_npi_licensor_present": 0.989,
62
+ "blimp/accuracy/superlative_quantifiers_1": 0.627,
63
+ "blimp/accuracy/wh_island": 0.743,
64
+ "blimp/accuracy/principle_A_domain_1": 0.99,
65
+ "blimp/accuracy/complex_NP_island": 0.543,
66
+ "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.97,
67
+ "blimp/accuracy/irregular_past_participle_verbs": 0.871,
68
+ "blimp/accuracy/drop_argument": 0.767,
69
+ "blimp/accuracy/wh_questions_object_gap": 0.778,
70
+ "blimp/accuracy/animate_subject_passive": 0.776,
71
+ "blimp/accuracy/existential_there_quantifiers_1": 0.975,
72
+ "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.88,
73
+ "blimp/accuracy/npi_present_2": 0.536,
74
+ "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.933,
75
+ "blimp/accuracy/anaphor_number_agreement": 0.983,
76
+ "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.96,
77
+ "blimp/accuracy/existential_there_object_raising": 0.801,
78
+ "blimp/accuracy/matrix_question_npi_licensor_present": 0.193,
79
+ "blimp/accuracy/npi_present_1": 0.543,
80
+ "blimp/accuracy/wh_vs_that_no_gap": 0.966,
81
+ "blimp/accuracy/left_branch_island_echo_question": 0.466,
82
+ "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.976,
83
+ "blimp/accuracy/causative": 0.74,
84
+ "blimp/accuracy/group_average": 0.767537313432836,
85
+ "blimp/accuracy/seq_average": 0.7675373134328358,
86
+ "cbt/accuracy/NE": 0.7764423076923077,
87
+ "cbt/accuracy/V": 0.91,
88
+ "cbt/accuracy/CN": 0.8236,
89
+ "cbt/accuracy/P": 0.8872,
90
+ "cbt/accuracy/group_average": 0.8493105769230769,
91
+ "cbt/accuracy/seq_average": 0.8493397358943577,
92
+ "hellaswag/accuracy/val": 0.2914758016331408,
93
+ "hellaswag/accuracy/group_average": 0.2914758016331408,
94
+ "hellaswag/accuracy/seq_average": 0.2914758016331408,
95
+ "piqa/accuracy/val": 0.5963003264417845,
96
+ "piqa/accuracy/group_average": 0.5963003264417845,
97
+ "piqa/accuracy/seq_average": 0.5963003264417845,
98
+ "ai2arc/accuracy/ARC-Easy": 0.3386892177589852,
99
+ "ai2arc/accuracy/ARC-Challenge": 0.21888412017167383,
100
+ "ai2arc/accuracy/group_average": 0.2787866689653295,
101
+ "ai2arc/accuracy/seq_average": 0.2991501416430595,
102
+ "mmlu/accuracy/MMLU": 0.26106542724347515,
103
+ "mmlu/accuracy/group_average": 0.26106542724347515,
104
+ "mmlu/accuracy/seq_average": 0.26106542724347515,
105
+ "openbookqa/accuracy/test": 0.274,
106
+ "openbookqa/accuracy/group_average": 0.274,
107
+ "openbookqa/accuracy/seq_average": 0.274,
108
+ "race/accuracy/test/high": 0.2701543739279588,
109
+ "race/accuracy/test/middle": 0.3286908077994429,
110
+ "race/accuracy/group_average": 0.2994225908637008,
111
+ "race/accuracy/seq_average": 0.28719092014592623,
112
+ "siqa/accuracy/dev": 0.3607983623336745,
113
+ "siqa/accuracy/group_average": 0.3607983623336745,
114
+ "siqa/accuracy/seq_average": 0.3607983623336745,
115
+ "winogrande/accuracy/dev": 0.5130228887134964,
116
+ "winogrande/accuracy/group_average": 0.5130228887134964,
117
+ "winogrande/accuracy/seq_average": 0.5130228887134964,
118
+ "commonsenseqa/accuracy/dev_rand_split": 0.26371826371826373,
119
+ "commonsenseqa/accuracy/group_average": 0.26371826371826373,
120
+ "commonsenseqa/accuracy/seq_average": 0.26371826371826373
121
+ }
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_sigmoid_standard_lb_v2/export/result-model-20000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.8857591417100696, "val/accuracy": 0.4406961108010913, "val/perplexity": 17.917164100624984, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.66965347195264, "lambada/accuracy/total": 0.18963509316770186, "lambada/accuracy/openai_last_token": 0.7280667701863354, "lambada/perplexity": 19.209678329730828, "lambada/lm_loss": 3.425484315023718, "lambada/lm_perplexity": 30.73752789372241, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.3151656019843966, "mean_loss": 2.7777063068313548, "blimp/accuracy/passive_2": 0.878, "blimp/accuracy/determiner_noun_agreement_2": 0.983, "blimp/accuracy/ellipsis_n_bar_1": 0.757, "blimp/accuracy/tough_vs_raising_2": 0.848, "blimp/accuracy/tough_vs_raising_1": 0.556, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.888, "blimp/accuracy/principle_A_reconstruction": 0.433, "blimp/accuracy/wh_vs_that_with_gap": 0.501, "blimp/accuracy/principle_A_domain_2": 0.79, "blimp/accuracy/determiner_noun_agreement_1": 0.982, "blimp/accuracy/ellipsis_n_bar_2": 0.879, "blimp/accuracy/principle_A_domain_3": 0.539, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.903, "blimp/accuracy/animate_subject_trans": 0.882, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.827, "blimp/accuracy/distractor_agreement_relative_clause": 0.525, "blimp/accuracy/transitive": 0.838, "blimp/accuracy/sentential_subject_island": 0.324, "blimp/accuracy/adjunct_island": 0.814, "blimp/accuracy/intransitive": 0.736, "blimp/accuracy/existential_there_subject_raising": 0.803, "blimp/accuracy/irregular_past_participle_adjectives": 0.916, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.23, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.127, "blimp/accuracy/only_npi_scope": 0.731, "blimp/accuracy/superlative_quantifiers_2": 0.648, "blimp/accuracy/passive_1": 0.904, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.884, "blimp/accuracy/inchoative": 0.557, "blimp/accuracy/anaphor_gender_agreement": 0.903, "blimp/accuracy/principle_A_c_command": 0.522, "blimp/accuracy/only_npi_licensor_present": 0.494, "blimp/accuracy/expletive_it_object_raising": 0.735, "blimp/accuracy/left_branch_island_simple_question": 0.253, "blimp/accuracy/wh_questions_subject_gap": 0.922, "blimp/accuracy/existential_there_quantifiers_2": 0.231, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.918, "blimp/accuracy/sentential_negation_npi_scope": 0.406, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.78, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.922, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.827, "blimp/accuracy/principle_A_case_2": 0.936, "blimp/accuracy/distractor_agreement_relational_noun": 0.8, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.995, "blimp/accuracy/superlative_quantifiers_1": 0.545, "blimp/accuracy/wh_island": 0.824, "blimp/accuracy/principle_A_domain_1": 0.995, "blimp/accuracy/complex_NP_island": 0.6, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.955, "blimp/accuracy/irregular_past_participle_verbs": 0.826, "blimp/accuracy/drop_argument": 0.747, "blimp/accuracy/wh_questions_object_gap": 0.774, "blimp/accuracy/animate_subject_passive": 0.745, "blimp/accuracy/existential_there_quantifiers_1": 0.982, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.836, "blimp/accuracy/npi_present_2": 0.52, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.921, "blimp/accuracy/anaphor_number_agreement": 0.959, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.961, "blimp/accuracy/existential_there_object_raising": 0.754, "blimp/accuracy/matrix_question_npi_licensor_present": 0.068, "blimp/accuracy/npi_present_1": 0.46, "blimp/accuracy/wh_vs_that_no_gap": 0.974, "blimp/accuracy/left_branch_island_echo_question": 0.36, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.991, "blimp/accuracy/causative": 0.715, "blimp/accuracy/group_average": 0.7289402985074626, "blimp/accuracy/seq_average": 0.7289402985074627, "cbt/accuracy/NE": 0.6935096153846154, "cbt/accuracy/V": 0.8816, "cbt/accuracy/CN": 0.77, "cbt/accuracy/P": 0.8524, "cbt/accuracy/group_average": 0.7993774038461539, "cbt/accuracy/seq_average": 0.7994197679071628, "hellaswag/accuracy/val": 0.27524397530372435, "hellaswag/accuracy/group_average": 0.27524397530372435, "hellaswag/accuracy/seq_average": 0.27524397530372435, "piqa/accuracy/val": 0.5565832426550599, "piqa/accuracy/group_average": 0.5565832426550599, "piqa/accuracy/seq_average": 0.5565832426550599, "ai2arc/accuracy/ARC-Easy": 0.31670190274841437, "ai2arc/accuracy/ARC-Challenge": 0.20686695278969958, "ai2arc/accuracy/group_average": 0.26178442776905697, "ai2arc/accuracy/seq_average": 0.2804532577903683, "mmlu/accuracy/MMLU": 0.26349660350375403, "mmlu/accuracy/group_average": 0.26349660350375403, "mmlu/accuracy/seq_average": 0.26349660350375403, "openbookqa/accuracy/test": 0.258, "openbookqa/accuracy/group_average": 0.258, "openbookqa/accuracy/seq_average": 0.258, "race/accuracy/test/high": 0.2567181246426529, "race/accuracy/test/middle": 0.32103064066852366, "race/accuracy/group_average": 0.2888743826555883, "race/accuracy/seq_average": 0.2754357519254155, "siqa/accuracy/dev": 0.35516888433981575, "siqa/accuracy/group_average": 0.35516888433981575, "siqa/accuracy/seq_average": 0.35516888433981575, "winogrande/accuracy/dev": 0.5090765588003157, "winogrande/accuracy/group_average": 0.5090765588003157, "winogrande/accuracy/seq_average": 0.5090765588003157, "commonsenseqa/accuracy/dev_rand_split": 0.2416052416052416, "commonsenseqa/accuracy/group_average": 0.2416052416052416, "commonsenseqa/accuracy/seq_average": 0.2416052416052416}
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_sigmoid_standard_lb_v2/export/result-model-30000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.802791534908234, "val/accuracy": 0.45174444289434523, "val/perplexity": 16.490616710104515, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.683407990828804, "lambada/accuracy/total": 0.20710403726708074, "lambada/accuracy/openai_last_token": 0.7379658385093167, "lambada/perplexity": 17.315911245384207, "lambada/lm_loss": 3.3728853777300993, "lambada/lm_perplexity": 29.16255075395031, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.32942424008071297, "mean_loss": 2.7430997628685194, "blimp/accuracy/passive_2": 0.875, "blimp/accuracy/determiner_noun_agreement_2": 0.977, "blimp/accuracy/ellipsis_n_bar_1": 0.76, "blimp/accuracy/tough_vs_raising_2": 0.815, "blimp/accuracy/tough_vs_raising_1": 0.635, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.877, "blimp/accuracy/principle_A_reconstruction": 0.539, "blimp/accuracy/wh_vs_that_with_gap": 0.545, "blimp/accuracy/principle_A_domain_2": 0.809, "blimp/accuracy/determiner_noun_agreement_1": 0.977, "blimp/accuracy/ellipsis_n_bar_2": 0.885, "blimp/accuracy/principle_A_domain_3": 0.556, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.917, "blimp/accuracy/animate_subject_trans": 0.882, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.837, "blimp/accuracy/distractor_agreement_relative_clause": 0.524, "blimp/accuracy/transitive": 0.839, "blimp/accuracy/sentential_subject_island": 0.287, "blimp/accuracy/adjunct_island": 0.796, "blimp/accuracy/intransitive": 0.798, "blimp/accuracy/existential_there_subject_raising": 0.83, "blimp/accuracy/irregular_past_participle_adjectives": 0.987, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.322, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.22, "blimp/accuracy/only_npi_scope": 0.747, "blimp/accuracy/superlative_quantifiers_2": 0.689, "blimp/accuracy/passive_1": 0.866, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.897, "blimp/accuracy/inchoative": 0.627, "blimp/accuracy/anaphor_gender_agreement": 0.939, "blimp/accuracy/principle_A_c_command": 0.56, "blimp/accuracy/only_npi_licensor_present": 0.313, "blimp/accuracy/expletive_it_object_raising": 0.739, "blimp/accuracy/left_branch_island_simple_question": 0.356, "blimp/accuracy/wh_questions_subject_gap": 0.887, "blimp/accuracy/existential_there_quantifiers_2": 0.317, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.926, "blimp/accuracy/sentential_negation_npi_scope": 0.505, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.769, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.881, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.818, "blimp/accuracy/principle_A_case_2": 0.946, "blimp/accuracy/distractor_agreement_relational_noun": 0.784, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.984, "blimp/accuracy/superlative_quantifiers_1": 0.68, "blimp/accuracy/wh_island": 0.768, "blimp/accuracy/principle_A_domain_1": 0.983, "blimp/accuracy/complex_NP_island": 0.538, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.955, "blimp/accuracy/irregular_past_participle_verbs": 0.842, "blimp/accuracy/drop_argument": 0.79, "blimp/accuracy/wh_questions_object_gap": 0.649, "blimp/accuracy/animate_subject_passive": 0.767, "blimp/accuracy/existential_there_quantifiers_1": 0.975, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.848, "blimp/accuracy/npi_present_2": 0.508, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.912, "blimp/accuracy/anaphor_number_agreement": 0.974, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.95, "blimp/accuracy/existential_there_object_raising": 0.758, "blimp/accuracy/matrix_question_npi_licensor_present": 0.142, "blimp/accuracy/npi_present_1": 0.462, "blimp/accuracy/wh_vs_that_no_gap": 0.956, "blimp/accuracy/left_branch_island_echo_question": 0.435, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.97, "blimp/accuracy/causative": 0.707, "blimp/accuracy/group_average": 0.7404179104477613, "blimp/accuracy/seq_average": 0.7404179104477612, "cbt/accuracy/NE": 0.7319711538461539, "cbt/accuracy/V": 0.8888, "cbt/accuracy/CN": 0.7832, "cbt/accuracy/P": 0.8632, "cbt/accuracy/group_average": 0.8167927884615385, "cbt/accuracy/seq_average": 0.816826730692277, "hellaswag/accuracy/val": 0.27703644692292373, "hellaswag/accuracy/group_average": 0.27703644692292373, "hellaswag/accuracy/seq_average": 0.27703644692292373, "piqa/accuracy/val": 0.5783460282916213, "piqa/accuracy/group_average": 0.5783460282916213, "piqa/accuracy/seq_average": 0.5783460282916213, "ai2arc/accuracy/ARC-Easy": 0.32473572938689216, "ai2arc/accuracy/ARC-Challenge": 0.2111587982832618, "ai2arc/accuracy/group_average": 0.267947263835077, "ai2arc/accuracy/seq_average": 0.28725212464589234, "mmlu/accuracy/MMLU": 0.2619234894529853, "mmlu/accuracy/group_average": 0.2619234894529853, "mmlu/accuracy/seq_average": 0.2619234894529853, "openbookqa/accuracy/test": 0.268, "openbookqa/accuracy/group_average": 0.268, "openbookqa/accuracy/seq_average": 0.268, "race/accuracy/test/high": 0.269010863350486, "race/accuracy/test/middle": 0.3342618384401114, "race/accuracy/group_average": 0.3016363508952987, "race/accuracy/seq_average": 0.2880016214025132, "siqa/accuracy/dev": 0.3526100307062436, "siqa/accuracy/group_average": 0.3526100307062436, "siqa/accuracy/seq_average": 0.3526100307062436, "winogrande/accuracy/dev": 0.5082872928176796, "winogrande/accuracy/group_average": 0.5082872928176796, "winogrande/accuracy/seq_average": 0.5082872928176796, "commonsenseqa/accuracy/dev_rand_split": 0.24815724815724816, "commonsenseqa/accuracy/group_average": 0.24815724815724816, "commonsenseqa/accuracy/seq_average": 0.24815724815724816}
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_sigmoid_standard_lb_v2/export/result-model-40000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.751065451001364, "val/accuracy": 0.45872376457093256, "val/perplexity": 15.65930722379432, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.6325202136306287, "lambada/accuracy/total": 0.20535714285714285, "lambada/accuracy/openai_last_token": 0.7408773291925466, "lambada/perplexity": 16.818405195016727, "lambada/lm_loss": 3.305533322717444, "lambada/lm_perplexity": 27.263077730930483, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.3320404537140377, "mean_loss": 2.691792832315996, "blimp/accuracy/passive_2": 0.888, "blimp/accuracy/determiner_noun_agreement_2": 0.985, "blimp/accuracy/ellipsis_n_bar_1": 0.804, "blimp/accuracy/tough_vs_raising_2": 0.851, "blimp/accuracy/tough_vs_raising_1": 0.571, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.888, "blimp/accuracy/principle_A_reconstruction": 0.372, "blimp/accuracy/wh_vs_that_with_gap": 0.514, "blimp/accuracy/principle_A_domain_2": 0.757, "blimp/accuracy/determiner_noun_agreement_1": 0.986, "blimp/accuracy/ellipsis_n_bar_2": 0.88, "blimp/accuracy/principle_A_domain_3": 0.538, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.944, "blimp/accuracy/animate_subject_trans": 0.891, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.88, "blimp/accuracy/distractor_agreement_relative_clause": 0.636, "blimp/accuracy/transitive": 0.853, "blimp/accuracy/sentential_subject_island": 0.262, "blimp/accuracy/adjunct_island": 0.821, "blimp/accuracy/intransitive": 0.758, "blimp/accuracy/existential_there_subject_raising": 0.854, "blimp/accuracy/irregular_past_participle_adjectives": 0.957, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.331, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.164, "blimp/accuracy/only_npi_scope": 0.685, "blimp/accuracy/superlative_quantifiers_2": 0.673, "blimp/accuracy/passive_1": 0.904, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.859, "blimp/accuracy/inchoative": 0.606, "blimp/accuracy/anaphor_gender_agreement": 0.93, "blimp/accuracy/principle_A_c_command": 0.586, "blimp/accuracy/only_npi_licensor_present": 0.318, "blimp/accuracy/expletive_it_object_raising": 0.74, "blimp/accuracy/left_branch_island_simple_question": 0.345, "blimp/accuracy/wh_questions_subject_gap": 0.931, "blimp/accuracy/existential_there_quantifiers_2": 0.352, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.946, "blimp/accuracy/sentential_negation_npi_scope": 0.644, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.776, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.909, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.848, "blimp/accuracy/principle_A_case_2": 0.961, "blimp/accuracy/distractor_agreement_relational_noun": 0.844, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.997, "blimp/accuracy/superlative_quantifiers_1": 0.661, "blimp/accuracy/wh_island": 0.701, "blimp/accuracy/principle_A_domain_1": 0.991, "blimp/accuracy/complex_NP_island": 0.561, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.964, "blimp/accuracy/irregular_past_participle_verbs": 0.856, "blimp/accuracy/drop_argument": 0.756, "blimp/accuracy/wh_questions_object_gap": 0.745, "blimp/accuracy/animate_subject_passive": 0.765, "blimp/accuracy/existential_there_quantifiers_1": 0.972, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.862, "blimp/accuracy/npi_present_2": 0.474, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.929, "blimp/accuracy/anaphor_number_agreement": 0.974, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.955, "blimp/accuracy/existential_there_object_raising": 0.792, "blimp/accuracy/matrix_question_npi_licensor_present": 0.159, "blimp/accuracy/npi_present_1": 0.443, "blimp/accuracy/wh_vs_that_no_gap": 0.973, "blimp/accuracy/left_branch_island_echo_question": 0.407, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.989, "blimp/accuracy/causative": 0.7, "blimp/accuracy/group_average": 0.7442985074626863, "blimp/accuracy/seq_average": 0.7442985074626866, "cbt/accuracy/NE": 0.7367788461538461, "cbt/accuracy/V": 0.89, "cbt/accuracy/CN": 0.7932, "cbt/accuracy/P": 0.8708, "cbt/accuracy/group_average": 0.8226947115384615, "cbt/accuracy/seq_average": 0.8227290916366546, "hellaswag/accuracy/val": 0.28141804421429994, "hellaswag/accuracy/group_average": 0.28141804421429994, "hellaswag/accuracy/seq_average": 0.28141804421429994, "piqa/accuracy/val": 0.5805223068552775, "piqa/accuracy/group_average": 0.5805223068552775, "piqa/accuracy/seq_average": 0.5805223068552775, "ai2arc/accuracy/ARC-Easy": 0.3285412262156448, "ai2arc/accuracy/ARC-Challenge": 0.2034334763948498, "ai2arc/accuracy/group_average": 0.2659873513052473, "ai2arc/accuracy/seq_average": 0.28725212464589234, "mmlu/accuracy/MMLU": 0.25863425098319626, "mmlu/accuracy/group_average": 0.25863425098319626, "mmlu/accuracy/seq_average": 0.25863425098319626, "openbookqa/accuracy/test": 0.274, "openbookqa/accuracy/group_average": 0.274, "openbookqa/accuracy/seq_average": 0.274, "race/accuracy/test/high": 0.2672955974842767, "race/accuracy/test/middle": 0.318941504178273, "race/accuracy/group_average": 0.29311855083127486, "race/accuracy/seq_average": 0.2823267126064045, "siqa/accuracy/dev": 0.3572159672466735, "siqa/accuracy/group_average": 0.3572159672466735, "siqa/accuracy/seq_average": 0.3572159672466735, "winogrande/accuracy/dev": 0.5082872928176796, "winogrande/accuracy/group_average": 0.5082872928176796, "winogrande/accuracy/seq_average": 0.5082872928176796, "commonsenseqa/accuracy/dev_rand_split": 0.26535626535626533, "commonsenseqa/accuracy/group_average": 0.26535626535626533, "commonsenseqa/accuracy/seq_average": 0.26535626535626533}
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_sigmoid_standard_lb_v2/export/result-model-50000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.7056061275421626, "val/accuracy": 0.46512276785714285, "val/perplexity": 14.963383661339174, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.8863205050829777, "lambada/accuracy/total": 0.23078416149068323, "lambada/accuracy/openai_last_token": 0.7532996894409938, "lambada/perplexity": 14.597399808302038, "lambada/lm_loss": 3.288504249149332, "lambada/lm_perplexity": 26.802743432160263, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.347953464673913, "mean_loss": 2.7959633163125703, "blimp/accuracy/passive_2": 0.89, "blimp/accuracy/determiner_noun_agreement_2": 0.984, "blimp/accuracy/ellipsis_n_bar_1": 0.827, "blimp/accuracy/tough_vs_raising_2": 0.869, "blimp/accuracy/tough_vs_raising_1": 0.54, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.892, "blimp/accuracy/principle_A_reconstruction": 0.472, "blimp/accuracy/wh_vs_that_with_gap": 0.445, "blimp/accuracy/principle_A_domain_2": 0.815, "blimp/accuracy/determiner_noun_agreement_1": 0.984, "blimp/accuracy/ellipsis_n_bar_2": 0.885, "blimp/accuracy/principle_A_domain_3": 0.51, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.927, "blimp/accuracy/animate_subject_trans": 0.9, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.857, "blimp/accuracy/distractor_agreement_relative_clause": 0.615, "blimp/accuracy/transitive": 0.855, "blimp/accuracy/sentential_subject_island": 0.317, "blimp/accuracy/adjunct_island": 0.77, "blimp/accuracy/intransitive": 0.795, "blimp/accuracy/existential_there_subject_raising": 0.856, "blimp/accuracy/irregular_past_participle_adjectives": 0.921, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.317, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.145, "blimp/accuracy/only_npi_scope": 0.773, "blimp/accuracy/superlative_quantifiers_2": 0.653, "blimp/accuracy/passive_1": 0.894, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.886, "blimp/accuracy/inchoative": 0.611, "blimp/accuracy/anaphor_gender_agreement": 0.932, "blimp/accuracy/principle_A_c_command": 0.633, "blimp/accuracy/only_npi_licensor_present": 0.535, "blimp/accuracy/expletive_it_object_raising": 0.768, "blimp/accuracy/left_branch_island_simple_question": 0.305, "blimp/accuracy/wh_questions_subject_gap": 0.937, "blimp/accuracy/existential_there_quantifiers_2": 0.384, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.934, "blimp/accuracy/sentential_negation_npi_scope": 0.657, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.794, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.902, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.843, "blimp/accuracy/principle_A_case_2": 0.962, "blimp/accuracy/distractor_agreement_relational_noun": 0.789, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.993, "blimp/accuracy/superlative_quantifiers_1": 0.548, "blimp/accuracy/wh_island": 0.706, "blimp/accuracy/principle_A_domain_1": 0.99, "blimp/accuracy/complex_NP_island": 0.555, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.959, "blimp/accuracy/irregular_past_participle_verbs": 0.853, "blimp/accuracy/drop_argument": 0.747, "blimp/accuracy/wh_questions_object_gap": 0.792, "blimp/accuracy/animate_subject_passive": 0.796, "blimp/accuracy/existential_there_quantifiers_1": 0.977, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.868, "blimp/accuracy/npi_present_2": 0.546, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.932, "blimp/accuracy/anaphor_number_agreement": 0.979, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.957, "blimp/accuracy/existential_there_object_raising": 0.816, "blimp/accuracy/matrix_question_npi_licensor_present": 0.155, "blimp/accuracy/npi_present_1": 0.507, "blimp/accuracy/wh_vs_that_no_gap": 0.969, "blimp/accuracy/left_branch_island_echo_question": 0.43, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.984, "blimp/accuracy/causative": 0.705, "blimp/accuracy/group_average": 0.7514029850746271, "blimp/accuracy/seq_average": 0.7514029850746269, "cbt/accuracy/NE": 0.7495993589743589, "cbt/accuracy/V": 0.9036, "cbt/accuracy/CN": 0.7964, "cbt/accuracy/P": 0.87, "cbt/accuracy/group_average": 0.8298998397435897, "cbt/accuracy/seq_average": 0.8299319727891157, "hellaswag/accuracy/val": 0.2818163712407887, "hellaswag/accuracy/group_average": 0.2818163712407887, "hellaswag/accuracy/seq_average": 0.2818163712407887, "piqa/accuracy/val": 0.5783460282916213, "piqa/accuracy/group_average": 0.5783460282916213, "piqa/accuracy/seq_average": 0.5783460282916213, "ai2arc/accuracy/ARC-Easy": 0.33276955602537, "ai2arc/accuracy/ARC-Challenge": 0.21974248927038625, "ai2arc/accuracy/group_average": 0.27625602264787813, "ai2arc/accuracy/seq_average": 0.29546742209631727, "mmlu/accuracy/MMLU": 0.26006435466571326, "mmlu/accuracy/group_average": 0.26006435466571326, "mmlu/accuracy/seq_average": 0.26006435466571326, "openbookqa/accuracy/test": 0.266, "openbookqa/accuracy/group_average": 0.266, "openbookqa/accuracy/seq_average": 0.266, "race/accuracy/test/high": 0.2638650657518582, "race/accuracy/test/middle": 0.33147632311977715, "race/accuracy/group_average": 0.2976706944358177, "race/accuracy/seq_average": 0.28354276449128496, "siqa/accuracy/dev": 0.3510747185261003, "siqa/accuracy/group_average": 0.3510747185261003, "siqa/accuracy/seq_average": 0.3510747185261003, "winogrande/accuracy/dev": 0.5138121546961326, "winogrande/accuracy/group_average": 0.5138121546961326, "winogrande/accuracy/seq_average": 0.5138121546961326, "commonsenseqa/accuracy/dev_rand_split": 0.25143325143325146, "commonsenseqa/accuracy/group_average": 0.25143325143325146, "commonsenseqa/accuracy/seq_average": 0.25143325143325146}
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_sigmoid_standard_lb_v2/export/result-model-60000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.6712130591982888, "val/accuracy": 0.46965099516369047, "val/perplexity": 14.457496358850928, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.872034866617333, "lambada/accuracy/total": 0.22496118012422361, "lambada/accuracy/openai_last_token": 0.7480590062111802, "lambada/perplexity": 14.393959125887662, "lambada/lm_loss": 3.234705262408019, "lambada/lm_perplexity": 25.398884669429773, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.34730608764395704, "mean_loss": 2.771623962907811, "blimp/accuracy/passive_2": 0.887, "blimp/accuracy/determiner_noun_agreement_2": 0.99, "blimp/accuracy/ellipsis_n_bar_1": 0.813, "blimp/accuracy/tough_vs_raising_2": 0.885, "blimp/accuracy/tough_vs_raising_1": 0.539, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.874, "blimp/accuracy/principle_A_reconstruction": 0.312, "blimp/accuracy/wh_vs_that_with_gap": 0.481, "blimp/accuracy/principle_A_domain_2": 0.778, "blimp/accuracy/determiner_noun_agreement_1": 0.988, "blimp/accuracy/ellipsis_n_bar_2": 0.898, "blimp/accuracy/principle_A_domain_3": 0.544, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.926, "blimp/accuracy/animate_subject_trans": 0.895, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.872, "blimp/accuracy/distractor_agreement_relative_clause": 0.619, "blimp/accuracy/transitive": 0.866, "blimp/accuracy/sentential_subject_island": 0.295, "blimp/accuracy/adjunct_island": 0.849, "blimp/accuracy/intransitive": 0.795, "blimp/accuracy/existential_there_subject_raising": 0.886, "blimp/accuracy/irregular_past_participle_adjectives": 0.952, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.322, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.167, "blimp/accuracy/only_npi_scope": 0.804, "blimp/accuracy/superlative_quantifiers_2": 0.775, "blimp/accuracy/passive_1": 0.88, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.9, "blimp/accuracy/inchoative": 0.655, "blimp/accuracy/anaphor_gender_agreement": 0.939, "blimp/accuracy/principle_A_c_command": 0.572, "blimp/accuracy/only_npi_licensor_present": 0.572, "blimp/accuracy/expletive_it_object_raising": 0.746, "blimp/accuracy/left_branch_island_simple_question": 0.32, "blimp/accuracy/wh_questions_subject_gap": 0.925, "blimp/accuracy/existential_there_quantifiers_2": 0.293, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.946, "blimp/accuracy/sentential_negation_npi_scope": 0.58, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.774, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.915, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.842, "blimp/accuracy/principle_A_case_2": 0.955, "blimp/accuracy/distractor_agreement_relational_noun": 0.785, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.991, "blimp/accuracy/superlative_quantifiers_1": 0.643, "blimp/accuracy/wh_island": 0.778, "blimp/accuracy/principle_A_domain_1": 0.994, "blimp/accuracy/complex_NP_island": 0.586, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.963, "blimp/accuracy/irregular_past_participle_verbs": 0.867, "blimp/accuracy/drop_argument": 0.766, "blimp/accuracy/wh_questions_object_gap": 0.749, "blimp/accuracy/animate_subject_passive": 0.791, "blimp/accuracy/existential_there_quantifiers_1": 0.96, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.874, "blimp/accuracy/npi_present_2": 0.53, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.92, "blimp/accuracy/anaphor_number_agreement": 0.97, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.956, "blimp/accuracy/existential_there_object_raising": 0.817, "blimp/accuracy/matrix_question_npi_licensor_present": 0.154, "blimp/accuracy/npi_present_1": 0.527, "blimp/accuracy/wh_vs_that_no_gap": 0.965, "blimp/accuracy/left_branch_island_echo_question": 0.423, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.978, "blimp/accuracy/causative": 0.734, "blimp/accuracy/group_average": 0.7544328358208955, "blimp/accuracy/seq_average": 0.7544328358208955, "cbt/accuracy/NE": 0.7556089743589743, "cbt/accuracy/V": 0.9024, "cbt/accuracy/CN": 0.814, "cbt/accuracy/P": 0.8804, "cbt/accuracy/group_average": 0.8381022435897435, "cbt/accuracy/seq_average": 0.8381352541016407, "hellaswag/accuracy/val": 0.2865962955586537, "hellaswag/accuracy/group_average": 0.2865962955586537, "hellaswag/accuracy/seq_average": 0.2865962955586537, "piqa/accuracy/val": 0.5821545157780196, "piqa/accuracy/group_average": 0.5821545157780196, "piqa/accuracy/seq_average": 0.5821545157780196, "ai2arc/accuracy/ARC-Easy": 0.3293868921775899, "ai2arc/accuracy/ARC-Challenge": 0.21888412017167383, "ai2arc/accuracy/group_average": 0.27413550617463184, "ai2arc/accuracy/seq_average": 0.2929178470254957, "mmlu/accuracy/MMLU": 0.2624240257418663, "mmlu/accuracy/group_average": 0.2624240257418663, "mmlu/accuracy/seq_average": 0.2624240257418663, "openbookqa/accuracy/test": 0.262, "openbookqa/accuracy/group_average": 0.262, "openbookqa/accuracy/seq_average": 0.262, "race/accuracy/test/high": 0.258147512864494, "race/accuracy/test/middle": 0.3245125348189415, "race/accuracy/group_average": 0.29133002384171774, "race/accuracy/seq_average": 0.27746250506688286, "siqa/accuracy/dev": 0.36131013306038895, "siqa/accuracy/group_average": 0.36131013306038895, "siqa/accuracy/seq_average": 0.36131013306038895, "winogrande/accuracy/dev": 0.5114443567482242, "winogrande/accuracy/group_average": 0.5114443567482242, "winogrande/accuracy/seq_average": 0.5114443567482242, "commonsenseqa/accuracy/dev_rand_split": 0.2588042588042588, "commonsenseqa/accuracy/group_average": 0.2588042588042588, "commonsenseqa/accuracy/seq_average": 0.2588042588042588}
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_sigmoid_standard_lb_v2/export/result-model-70000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.64217776343936, "val/accuracy": 0.47384498232886907, "val/perplexity": 14.043754304239512, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.737154753311821, "lambada/accuracy/total": 0.25038819875776397, "lambada/accuracy/openai_last_token": 0.7562111801242236, "lambada/perplexity": 13.327692371599486, "lambada/lm_loss": 3.218262594495382, "lambada/lm_perplexity": 24.984673940362445, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.3621165905433165, "mean_loss": 2.68966625837559, "blimp/accuracy/passive_2": 0.907, "blimp/accuracy/determiner_noun_agreement_2": 0.988, "blimp/accuracy/ellipsis_n_bar_1": 0.815, "blimp/accuracy/tough_vs_raising_2": 0.861, "blimp/accuracy/tough_vs_raising_1": 0.552, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.876, "blimp/accuracy/principle_A_reconstruction": 0.47, "blimp/accuracy/wh_vs_that_with_gap": 0.477, "blimp/accuracy/principle_A_domain_2": 0.79, "blimp/accuracy/determiner_noun_agreement_1": 0.986, "blimp/accuracy/ellipsis_n_bar_2": 0.902, "blimp/accuracy/principle_A_domain_3": 0.548, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.934, "blimp/accuracy/animate_subject_trans": 0.897, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.873, "blimp/accuracy/distractor_agreement_relative_clause": 0.601, "blimp/accuracy/transitive": 0.868, "blimp/accuracy/sentential_subject_island": 0.3, "blimp/accuracy/adjunct_island": 0.805, "blimp/accuracy/intransitive": 0.794, "blimp/accuracy/existential_there_subject_raising": 0.885, "blimp/accuracy/irregular_past_participle_adjectives": 0.954, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.387, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.199, "blimp/accuracy/only_npi_scope": 0.74, "blimp/accuracy/superlative_quantifiers_2": 0.737, "blimp/accuracy/passive_1": 0.891, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.906, "blimp/accuracy/inchoative": 0.629, "blimp/accuracy/anaphor_gender_agreement": 0.963, "blimp/accuracy/principle_A_c_command": 0.597, "blimp/accuracy/only_npi_licensor_present": 0.607, "blimp/accuracy/expletive_it_object_raising": 0.742, "blimp/accuracy/left_branch_island_simple_question": 0.391, "blimp/accuracy/wh_questions_subject_gap": 0.925, "blimp/accuracy/existential_there_quantifiers_2": 0.389, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.945, "blimp/accuracy/sentential_negation_npi_scope": 0.665, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.78, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.908, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.858, "blimp/accuracy/principle_A_case_2": 0.964, "blimp/accuracy/distractor_agreement_relational_noun": 0.804, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.996, "blimp/accuracy/superlative_quantifiers_1": 0.722, "blimp/accuracy/wh_island": 0.788, "blimp/accuracy/principle_A_domain_1": 0.987, "blimp/accuracy/complex_NP_island": 0.542, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.969, "blimp/accuracy/irregular_past_participle_verbs": 0.861, "blimp/accuracy/drop_argument": 0.75, "blimp/accuracy/wh_questions_object_gap": 0.773, "blimp/accuracy/animate_subject_passive": 0.769, "blimp/accuracy/existential_there_quantifiers_1": 0.973, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.859, "blimp/accuracy/npi_present_2": 0.53, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.934, "blimp/accuracy/anaphor_number_agreement": 0.981, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.956, "blimp/accuracy/existential_there_object_raising": 0.806, "blimp/accuracy/matrix_question_npi_licensor_present": 0.157, "blimp/accuracy/npi_present_1": 0.51, "blimp/accuracy/wh_vs_that_no_gap": 0.967, "blimp/accuracy/left_branch_island_echo_question": 0.449, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.975, "blimp/accuracy/causative": 0.722, "blimp/accuracy/group_average": 0.7624776119402986, "blimp/accuracy/seq_average": 0.7624776119402985, "cbt/accuracy/NE": 0.7636217948717948, "cbt/accuracy/V": 0.9064, "cbt/accuracy/CN": 0.8172, "cbt/accuracy/P": 0.8824, "cbt/accuracy/group_average": 0.8424054487179488, "cbt/accuracy/seq_average": 0.842436974789916, "hellaswag/accuracy/val": 0.2889862577175861, "hellaswag/accuracy/group_average": 0.2889862577175861, "hellaswag/accuracy/seq_average": 0.2889862577175861, "piqa/accuracy/val": 0.5859630032644179, "piqa/accuracy/group_average": 0.5859630032644179, "piqa/accuracy/seq_average": 0.5859630032644179, "ai2arc/accuracy/ARC-Easy": 0.34334038054968286, "ai2arc/accuracy/ARC-Challenge": 0.21802575107296138, "ai2arc/accuracy/group_average": 0.28068306581132213, "ai2arc/accuracy/seq_average": 0.30198300283286117, "mmlu/accuracy/MMLU": 0.2619949946371112, "mmlu/accuracy/group_average": 0.2619949946371112, "mmlu/accuracy/seq_average": 0.2619949946371112, "openbookqa/accuracy/test": 0.258, "openbookqa/accuracy/group_average": 0.258, "openbookqa/accuracy/seq_average": 0.258, "race/accuracy/test/high": 0.26300743281875355, "race/accuracy/test/middle": 0.3363509749303621, "race/accuracy/group_average": 0.2996792038745578, "race/accuracy/seq_average": 0.2843534657478719, "siqa/accuracy/dev": 0.36284544524053225, "siqa/accuracy/group_average": 0.36284544524053225, "siqa/accuracy/seq_average": 0.36284544524053225, "winogrande/accuracy/dev": 0.5185477505919495, "winogrande/accuracy/group_average": 0.5185477505919495, "winogrande/accuracy/seq_average": 0.5185477505919495, "commonsenseqa/accuracy/dev_rand_split": 0.26535626535626533, "commonsenseqa/accuracy/group_average": 0.26535626535626533, "commonsenseqa/accuracy/seq_average": 0.26535626535626533}
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_sigmoid_standard_lb_v2/export/result-model-80000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.621066138857887, "val/accuracy": 0.47705078125, "val/perplexity": 13.750375582877417, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.84641128445264, "lambada/accuracy/total": 0.25562888198757766, "lambada/accuracy/openai_last_token": 0.7595108695652174, "lambada/perplexity": 12.501188778590125, "lambada/lm_loss": 3.199625161333579, "lambada/lm_perplexity": 24.52333617945064, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.3663398316187888, "mean_loss": 2.7337387116552634, "blimp/accuracy/passive_2": 0.91, "blimp/accuracy/determiner_noun_agreement_2": 0.991, "blimp/accuracy/ellipsis_n_bar_1": 0.832, "blimp/accuracy/tough_vs_raising_2": 0.866, "blimp/accuracy/tough_vs_raising_1": 0.586, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.898, "blimp/accuracy/principle_A_reconstruction": 0.413, "blimp/accuracy/wh_vs_that_with_gap": 0.484, "blimp/accuracy/principle_A_domain_2": 0.789, "blimp/accuracy/determiner_noun_agreement_1": 0.99, "blimp/accuracy/ellipsis_n_bar_2": 0.888, "blimp/accuracy/principle_A_domain_3": 0.541, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.928, "blimp/accuracy/animate_subject_trans": 0.895, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.888, "blimp/accuracy/distractor_agreement_relative_clause": 0.652, "blimp/accuracy/transitive": 0.881, "blimp/accuracy/sentential_subject_island": 0.307, "blimp/accuracy/adjunct_island": 0.811, "blimp/accuracy/intransitive": 0.808, "blimp/accuracy/existential_there_subject_raising": 0.882, "blimp/accuracy/irregular_past_participle_adjectives": 0.989, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.364, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.174, "blimp/accuracy/only_npi_scope": 0.779, "blimp/accuracy/superlative_quantifiers_2": 0.745, "blimp/accuracy/passive_1": 0.908, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.913, "blimp/accuracy/inchoative": 0.659, "blimp/accuracy/anaphor_gender_agreement": 0.951, "blimp/accuracy/principle_A_c_command": 0.604, "blimp/accuracy/only_npi_licensor_present": 0.532, "blimp/accuracy/expletive_it_object_raising": 0.737, "blimp/accuracy/left_branch_island_simple_question": 0.342, "blimp/accuracy/wh_questions_subject_gap": 0.912, "blimp/accuracy/existential_there_quantifiers_2": 0.433, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.944, "blimp/accuracy/sentential_negation_npi_scope": 0.715, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.784, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.9, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.873, "blimp/accuracy/principle_A_case_2": 0.968, "blimp/accuracy/distractor_agreement_relational_noun": 0.824, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.994, "blimp/accuracy/superlative_quantifiers_1": 0.619, "blimp/accuracy/wh_island": 0.78, "blimp/accuracy/principle_A_domain_1": 0.983, "blimp/accuracy/complex_NP_island": 0.528, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.962, "blimp/accuracy/irregular_past_participle_verbs": 0.863, "blimp/accuracy/drop_argument": 0.772, "blimp/accuracy/wh_questions_object_gap": 0.782, "blimp/accuracy/animate_subject_passive": 0.785, "blimp/accuracy/existential_there_quantifiers_1": 0.968, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.871, "blimp/accuracy/npi_present_2": 0.543, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.925, "blimp/accuracy/anaphor_number_agreement": 0.987, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.957, "blimp/accuracy/existential_there_object_raising": 0.791, "blimp/accuracy/matrix_question_npi_licensor_present": 0.226, "blimp/accuracy/npi_present_1": 0.512, "blimp/accuracy/wh_vs_that_no_gap": 0.963, "blimp/accuracy/left_branch_island_echo_question": 0.445, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.977, "blimp/accuracy/causative": 0.72, "blimp/accuracy/group_average": 0.7648208955223879, "blimp/accuracy/seq_average": 0.7648208955223881, "cbt/accuracy/NE": 0.7684294871794872, "cbt/accuracy/V": 0.902, "cbt/accuracy/CN": 0.8112, "cbt/accuracy/P": 0.8852, "cbt/accuracy/group_average": 0.8417073717948718, "cbt/accuracy/seq_average": 0.8417366946778712, "hellaswag/accuracy/val": 0.28818960366460866, "hellaswag/accuracy/group_average": 0.28818960366460866, "hellaswag/accuracy/seq_average": 0.28818960366460866, "piqa/accuracy/val": 0.5957562568008705, "piqa/accuracy/group_average": 0.5957562568008705, "piqa/accuracy/seq_average": 0.5957562568008705, "ai2arc/accuracy/ARC-Easy": 0.3412262156448203, "ai2arc/accuracy/ARC-Challenge": 0.2206008583690987, "ai2arc/accuracy/group_average": 0.28091353700695953, "ai2arc/accuracy/seq_average": 0.30141643059490086, "mmlu/accuracy/MMLU": 0.26056489095459423, "mmlu/accuracy/group_average": 0.26056489095459423, "mmlu/accuracy/seq_average": 0.26056489095459423, "openbookqa/accuracy/test": 0.276, "openbookqa/accuracy/group_average": 0.276, "openbookqa/accuracy/seq_average": 0.276, "race/accuracy/test/high": 0.2730131503716409, "race/accuracy/test/middle": 0.3321727019498607, "race/accuracy/group_average": 0.30259292616075084, "race/accuracy/seq_average": 0.29023104985812725, "siqa/accuracy/dev": 0.3607983623336745, "siqa/accuracy/group_average": 0.3607983623336745, "siqa/accuracy/seq_average": 0.3607983623336745, "winogrande/accuracy/dev": 0.5193370165745856, "winogrande/accuracy/group_average": 0.5193370165745856, "winogrande/accuracy/seq_average": 0.5193370165745856, "commonsenseqa/accuracy/dev_rand_split": 0.26371826371826373, "commonsenseqa/accuracy/group_average": 0.26371826371826373, "commonsenseqa/accuracy/seq_average": 0.26371826371826373}
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_sigmoid_standard_lb_v2/export/result-model-90000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.6081475151909723, "val/accuracy": 0.4784361824156746, "val/perplexity": 13.573882136239984, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.986832328464674, "lambada/accuracy/total": 0.2542701863354037, "lambada/accuracy/openai_last_token": 0.7618400621118012, "lambada/perplexity": 12.343393051930292, "lambada/lm_loss": 3.171699785447292, "lambada/lm_perplexity": 23.847986384501535, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.36635318437553915, "mean_loss": 2.797489921827823, "blimp/accuracy/passive_2": 0.902, "blimp/accuracy/determiner_noun_agreement_2": 0.987, "blimp/accuracy/ellipsis_n_bar_1": 0.79, "blimp/accuracy/tough_vs_raising_2": 0.888, "blimp/accuracy/tough_vs_raising_1": 0.57, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.911, "blimp/accuracy/principle_A_reconstruction": 0.417, "blimp/accuracy/wh_vs_that_with_gap": 0.476, "blimp/accuracy/principle_A_domain_2": 0.785, "blimp/accuracy/determiner_noun_agreement_1": 0.991, "blimp/accuracy/ellipsis_n_bar_2": 0.906, "blimp/accuracy/principle_A_domain_3": 0.554, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.936, "blimp/accuracy/animate_subject_trans": 0.895, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.893, "blimp/accuracy/distractor_agreement_relative_clause": 0.649, "blimp/accuracy/transitive": 0.882, "blimp/accuracy/sentential_subject_island": 0.304, "blimp/accuracy/adjunct_island": 0.818, "blimp/accuracy/intransitive": 0.796, "blimp/accuracy/existential_there_subject_raising": 0.885, "blimp/accuracy/irregular_past_participle_adjectives": 0.976, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.374, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.193, "blimp/accuracy/only_npi_scope": 0.724, "blimp/accuracy/superlative_quantifiers_2": 0.62, "blimp/accuracy/passive_1": 0.896, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.908, "blimp/accuracy/inchoative": 0.656, "blimp/accuracy/anaphor_gender_agreement": 0.961, "blimp/accuracy/principle_A_c_command": 0.624, "blimp/accuracy/only_npi_licensor_present": 0.492, "blimp/accuracy/expletive_it_object_raising": 0.768, "blimp/accuracy/left_branch_island_simple_question": 0.356, "blimp/accuracy/wh_questions_subject_gap": 0.913, "blimp/accuracy/existential_there_quantifiers_2": 0.439, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.955, "blimp/accuracy/sentential_negation_npi_scope": 0.649, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.791, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.882, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.843, "blimp/accuracy/principle_A_case_2": 0.968, "blimp/accuracy/distractor_agreement_relational_noun": 0.84, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.994, "blimp/accuracy/superlative_quantifiers_1": 0.621, "blimp/accuracy/wh_island": 0.764, "blimp/accuracy/principle_A_domain_1": 0.993, "blimp/accuracy/complex_NP_island": 0.558, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.968, "blimp/accuracy/irregular_past_participle_verbs": 0.867, "blimp/accuracy/drop_argument": 0.763, "blimp/accuracy/wh_questions_object_gap": 0.776, "blimp/accuracy/animate_subject_passive": 0.779, "blimp/accuracy/existential_there_quantifiers_1": 0.971, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.879, "blimp/accuracy/npi_present_2": 0.557, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.939, "blimp/accuracy/anaphor_number_agreement": 0.986, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.966, "blimp/accuracy/existential_there_object_raising": 0.805, "blimp/accuracy/matrix_question_npi_licensor_present": 0.223, "blimp/accuracy/npi_present_1": 0.554, "blimp/accuracy/wh_vs_that_no_gap": 0.968, "blimp/accuracy/left_branch_island_echo_question": 0.433, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.98, "blimp/accuracy/causative": 0.728, "blimp/accuracy/group_average": 0.7632089552238808, "blimp/accuracy/seq_average": 0.7632089552238805, "cbt/accuracy/NE": 0.7700320512820513, "cbt/accuracy/V": 0.9112, "cbt/accuracy/CN": 0.8216, "cbt/accuracy/P": 0.884, "cbt/accuracy/group_average": 0.8467080128205128, "cbt/accuracy/seq_average": 0.8467386954781913, "hellaswag/accuracy/val": 0.2891854212308305, "hellaswag/accuracy/group_average": 0.2891854212308305, "hellaswag/accuracy/seq_average": 0.2891854212308305, "piqa/accuracy/val": 0.5892274211099021, "piqa/accuracy/group_average": 0.5892274211099021, "piqa/accuracy/seq_average": 0.5892274211099021, "ai2arc/accuracy/ARC-Easy": 0.346723044397463, "ai2arc/accuracy/ARC-Challenge": 0.2223175965665236, "ai2arc/accuracy/group_average": 0.28452032048199327, "ai2arc/accuracy/seq_average": 0.3056657223796034, "mmlu/accuracy/MMLU": 0.2639256346085091, "mmlu/accuracy/group_average": 0.2639256346085091, "mmlu/accuracy/seq_average": 0.2639256346085091, "openbookqa/accuracy/test": 0.278, "openbookqa/accuracy/group_average": 0.278, "openbookqa/accuracy/seq_average": 0.278, "race/accuracy/test/high": 0.274442538593482, "race/accuracy/test/middle": 0.3279944289693593, "race/accuracy/group_average": 0.3012184837814207, "race/accuracy/seq_average": 0.29002837454398056, "siqa/accuracy/dev": 0.3592630501535312, "siqa/accuracy/group_average": 0.3592630501535312, "siqa/accuracy/seq_average": 0.3592630501535312, "winogrande/accuracy/dev": 0.5169692186266772, "winogrande/accuracy/group_average": 0.5169692186266772, "winogrande/accuracy/seq_average": 0.5169692186266772, "commonsenseqa/accuracy/dev_rand_split": 0.2628992628992629, "commonsenseqa/accuracy/group_average": 0.2628992628992629, "commonsenseqa/accuracy/seq_average": 0.2628992628992629}