Upload folder using huggingface_hub

#4192
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_sig/export/ol/result-model-120000.pth.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ Logger: syncing
2
+ Logger process terminating...
3
+ {"val/loss": 2.425018310546875, "val/accuracy": 0.5030798533606151, "val/perplexity": 11.30243637117585, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.563258579799107, "lambada/accuracy/total": 0.3511257763975155, "lambada/accuracy/openai_last_token": 0.7936723602484472, "lambada/perplexity": 7.55521450469618, "lambada/lm_loss": 3.006063742632469, "lambada/lm_perplexity": 20.207700459527782, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4271028148790653, "mean_loss": 2.4941384451729913, "blimp/accuracy/passive_2": 0.915, "blimp/accuracy/determiner_noun_agreement_2": 0.982, "blimp/accuracy/ellipsis_n_bar_1": 0.837, "blimp/accuracy/tough_vs_raising_2": 0.914, "blimp/accuracy/tough_vs_raising_1": 0.604, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.916, "blimp/accuracy/principle_A_reconstruction": 0.444, "blimp/accuracy/wh_vs_that_with_gap": 0.551, "blimp/accuracy/principle_A_domain_2": 0.89, "blimp/accuracy/determiner_noun_agreement_1": 0.992, "blimp/accuracy/ellipsis_n_bar_2": 0.921, "blimp/accuracy/principle_A_domain_3": 0.59, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.933, "blimp/accuracy/animate_subject_trans": 0.907, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.911, "blimp/accuracy/distractor_agreement_relative_clause": 0.644, "blimp/accuracy/transitive": 0.858, "blimp/accuracy/sentential_subject_island": 0.323, "blimp/accuracy/adjunct_island": 0.899, "blimp/accuracy/intransitive": 0.769, "blimp/accuracy/existential_there_subject_raising": 0.868, "blimp/accuracy/irregular_past_participle_adjectives": 0.906, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.578, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.336, "blimp/accuracy/only_npi_scope": 0.66, "blimp/accuracy/superlative_quantifiers_2": 0.646, "blimp/accuracy/passive_1": 0.897, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.923, "blimp/accuracy/inchoative": 0.624, "blimp/accuracy/anaphor_gender_agreement": 0.952, "blimp/accuracy/principle_A_c_command": 0.678, "blimp/accuracy/only_npi_licensor_present": 0.465, "blimp/accuracy/expletive_it_object_raising": 0.782, "blimp/accuracy/left_branch_island_simple_question": 0.69, "blimp/accuracy/wh_questions_subject_gap": 0.942, "blimp/accuracy/existential_there_quantifiers_2": 0.441, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.944, "blimp/accuracy/sentential_negation_npi_scope": 0.644, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.804, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.902, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.924, "blimp/accuracy/principle_A_case_2": 0.925, "blimp/accuracy/distractor_agreement_relational_noun": 0.833, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.988, "blimp/accuracy/superlative_quantifiers_1": 0.546, "blimp/accuracy/wh_island": 0.78, "blimp/accuracy/principle_A_domain_1": 0.991, "blimp/accuracy/complex_NP_island": 0.621, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.978, "blimp/accuracy/irregular_past_participle_verbs": 0.878, "blimp/accuracy/drop_argument": 0.727, "blimp/accuracy/wh_questions_object_gap": 0.825, "blimp/accuracy/animate_subject_passive": 0.796, "blimp/accuracy/existential_there_quantifiers_1": 0.968, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.906, "blimp/accuracy/npi_present_2": 0.627, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.943, "blimp/accuracy/anaphor_number_agreement": 0.991, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.971, "blimp/accuracy/existential_there_object_raising": 0.853, "blimp/accuracy/matrix_question_npi_licensor_present": 0.32, "blimp/accuracy/npi_present_1": 0.595, "blimp/accuracy/wh_vs_that_no_gap": 0.985, "blimp/accuracy/left_branch_island_echo_question": 0.442, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.974, "blimp/accuracy/causative": 0.716, "blimp/accuracy/group_average": 0.7848507462686566, "blimp/accuracy/seq_average": 0.7848507462686567, "cbt/accuracy/NE": 0.7948717948717948, "cbt/accuracy/V": 0.924, "cbt/accuracy/CN": 0.8672, "cbt/accuracy/P": 0.9128, "cbt/accuracy/group_average": 0.8747179487179486, "cbt/accuracy/seq_average": 0.874749899959984, "hellaswag/accuracy/val": 0.3301135232025493, "hellaswag/accuracy/group_average": 0.3301135232025493, "hellaswag/accuracy/seq_average": 0.3301135232025493, "piqa/accuracy/val": 0.6196953210010882, "piqa/accuracy/group_average": 0.6196953210010882, "piqa/accuracy/seq_average": 0.6196953210010882, "ai2arc/accuracy/ARC-Easy": 0.3674418604651163, "ai2arc/accuracy/ARC-Challenge": 0.22660944206008585, "ai2arc/accuracy/group_average": 0.29702565126260105, "ai2arc/accuracy/seq_average": 0.32096317280453257}
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_sig/export/ol/result-model-160000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.387673756432912, "val/accuracy": 0.507689460875496, "val/perplexity": 10.888136003769251, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.426500024262422, "lambada/accuracy/total": 0.3220108695652174, "lambada/accuracy/openai_last_token": 0.7892080745341615, "lambada/perplexity": 7.855458924510323, "lambada/lm_loss": 2.974261046844193, "lambada/lm_perplexity": 19.575152782831637, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4148501652203567, "mean_loss": 2.407086890347667, "blimp/accuracy/passive_2": 0.89, "blimp/accuracy/determiner_noun_agreement_2": 0.986, "blimp/accuracy/ellipsis_n_bar_1": 0.856, "blimp/accuracy/tough_vs_raising_2": 0.867, "blimp/accuracy/tough_vs_raising_1": 0.575, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.903, "blimp/accuracy/principle_A_reconstruction": 0.417, "blimp/accuracy/wh_vs_that_with_gap": 0.462, "blimp/accuracy/principle_A_domain_2": 0.907, "blimp/accuracy/determiner_noun_agreement_1": 0.992, "blimp/accuracy/ellipsis_n_bar_2": 0.915, "blimp/accuracy/principle_A_domain_3": 0.61, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.939, "blimp/accuracy/animate_subject_trans": 0.91, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.897, "blimp/accuracy/distractor_agreement_relative_clause": 0.608, "blimp/accuracy/transitive": 0.88, "blimp/accuracy/sentential_subject_island": 0.374, "blimp/accuracy/adjunct_island": 0.897, "blimp/accuracy/intransitive": 0.755, "blimp/accuracy/existential_there_subject_raising": 0.882, "blimp/accuracy/irregular_past_participle_adjectives": 0.993, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.699, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.304, "blimp/accuracy/only_npi_scope": 0.772, "blimp/accuracy/superlative_quantifiers_2": 0.763, "blimp/accuracy/passive_1": 0.888, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.927, "blimp/accuracy/inchoative": 0.622, "blimp/accuracy/anaphor_gender_agreement": 0.951, "blimp/accuracy/principle_A_c_command": 0.594, "blimp/accuracy/only_npi_licensor_present": 0.593, "blimp/accuracy/expletive_it_object_raising": 0.779, "blimp/accuracy/left_branch_island_simple_question": 0.802, "blimp/accuracy/wh_questions_subject_gap": 0.929, "blimp/accuracy/existential_there_quantifiers_2": 0.552, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.949, "blimp/accuracy/sentential_negation_npi_scope": 0.645, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.828, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.888, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.908, "blimp/accuracy/principle_A_case_2": 0.947, "blimp/accuracy/distractor_agreement_relational_noun": 0.851, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.993, "blimp/accuracy/superlative_quantifiers_1": 0.636, "blimp/accuracy/wh_island": 0.856, "blimp/accuracy/principle_A_domain_1": 0.988, "blimp/accuracy/complex_NP_island": 0.635, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.977, "blimp/accuracy/irregular_past_participle_verbs": 0.889, "blimp/accuracy/drop_argument": 0.708, "blimp/accuracy/wh_questions_object_gap": 0.841, "blimp/accuracy/animate_subject_passive": 0.781, "blimp/accuracy/existential_there_quantifiers_1": 0.969, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.906, "blimp/accuracy/npi_present_2": 0.553, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.967, "blimp/accuracy/anaphor_number_agreement": 0.988, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.958, "blimp/accuracy/existential_there_object_raising": 0.841, "blimp/accuracy/matrix_question_npi_licensor_present": 0.342, "blimp/accuracy/npi_present_1": 0.522, "blimp/accuracy/wh_vs_that_no_gap": 0.979, "blimp/accuracy/left_branch_island_echo_question": 0.471, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.964, "blimp/accuracy/causative": 0.747, "blimp/accuracy/group_average": 0.7942835820895521, "blimp/accuracy/seq_average": 0.7942835820895522, "cbt/accuracy/NE": 0.8048878205128205, "cbt/accuracy/V": 0.9248, "cbt/accuracy/CN": 0.8784, "cbt/accuracy/P": 0.9168, "cbt/accuracy/group_average": 0.8812219551282051, "cbt/accuracy/seq_average": 0.8812525010004002, "hellaswag/accuracy/val": 0.3419637522405895, "hellaswag/accuracy/group_average": 0.3419637522405895, "hellaswag/accuracy/seq_average": 0.3419637522405895, "piqa/accuracy/val": 0.6218715995647442, "piqa/accuracy/group_average": 0.6218715995647442, "piqa/accuracy/seq_average": 0.6218715995647442, "ai2arc/accuracy/ARC-Easy": 0.3691331923890063, "ai2arc/accuracy/ARC-Challenge": 0.22489270386266094, "ai2arc/accuracy/group_average": 0.2970129481258336, "ai2arc/accuracy/seq_average": 0.32152974504249293}
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_sig/export/ol/result-model-180000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.3662293449280756, "val/accuracy": 0.5103691948784722, "val/perplexity": 10.657132059050621, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.4370655509996118, "lambada/accuracy/total": 0.3423913043478261, "lambada/accuracy/openai_last_token": 0.7928959627329193, "lambada/perplexity": 7.478640994082503, "lambada/lm_loss": 2.9656701336579623, "lambada/lm_perplexity": 19.407704640750723, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.42638024961314913, "mean_loss": 2.4016474479638434, "blimp/accuracy/passive_2": 0.884, "blimp/accuracy/determiner_noun_agreement_2": 0.983, "blimp/accuracy/ellipsis_n_bar_1": 0.831, "blimp/accuracy/tough_vs_raising_2": 0.886, "blimp/accuracy/tough_vs_raising_1": 0.618, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.905, "blimp/accuracy/principle_A_reconstruction": 0.39, "blimp/accuracy/wh_vs_that_with_gap": 0.436, "blimp/accuracy/principle_A_domain_2": 0.912, "blimp/accuracy/determiner_noun_agreement_1": 0.987, "blimp/accuracy/ellipsis_n_bar_2": 0.893, "blimp/accuracy/principle_A_domain_3": 0.59, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.936, "blimp/accuracy/animate_subject_trans": 0.909, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.907, "blimp/accuracy/distractor_agreement_relative_clause": 0.666, "blimp/accuracy/transitive": 0.88, "blimp/accuracy/sentential_subject_island": 0.34, "blimp/accuracy/adjunct_island": 0.87, "blimp/accuracy/intransitive": 0.751, "blimp/accuracy/existential_there_subject_raising": 0.883, "blimp/accuracy/irregular_past_participle_adjectives": 0.939, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.642, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.322, "blimp/accuracy/only_npi_scope": 0.667, "blimp/accuracy/superlative_quantifiers_2": 0.782, "blimp/accuracy/passive_1": 0.896, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.934, "blimp/accuracy/inchoative": 0.606, "blimp/accuracy/anaphor_gender_agreement": 0.943, "blimp/accuracy/principle_A_c_command": 0.681, "blimp/accuracy/only_npi_licensor_present": 0.681, "blimp/accuracy/expletive_it_object_raising": 0.779, "blimp/accuracy/left_branch_island_simple_question": 0.77, "blimp/accuracy/wh_questions_subject_gap": 0.945, "blimp/accuracy/existential_there_quantifiers_2": 0.441, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.935, "blimp/accuracy/sentential_negation_npi_scope": 0.662, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.81, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.929, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.891, "blimp/accuracy/principle_A_case_2": 0.915, "blimp/accuracy/distractor_agreement_relational_noun": 0.822, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.995, "blimp/accuracy/superlative_quantifiers_1": 0.585, "blimp/accuracy/wh_island": 0.77, "blimp/accuracy/principle_A_domain_1": 0.993, "blimp/accuracy/complex_NP_island": 0.62, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.982, "blimp/accuracy/irregular_past_participle_verbs": 0.875, "blimp/accuracy/drop_argument": 0.725, "blimp/accuracy/wh_questions_object_gap": 0.851, "blimp/accuracy/animate_subject_passive": 0.79, "blimp/accuracy/existential_there_quantifiers_1": 0.984, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.881, "blimp/accuracy/npi_present_2": 0.517, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.938, "blimp/accuracy/anaphor_number_agreement": 0.984, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.96, "blimp/accuracy/existential_there_object_raising": 0.841, "blimp/accuracy/matrix_question_npi_licensor_present": 0.375, "blimp/accuracy/npi_present_1": 0.531, "blimp/accuracy/wh_vs_that_no_gap": 0.98, "blimp/accuracy/left_branch_island_echo_question": 0.48, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.979, "blimp/accuracy/causative": 0.741, "blimp/accuracy/group_average": 0.7884477611940299, "blimp/accuracy/seq_average": 0.7884477611940298, "cbt/accuracy/NE": 0.7988782051282052, "cbt/accuracy/V": 0.9292, "cbt/accuracy/CN": 0.8668, "cbt/accuracy/P": 0.916, "cbt/accuracy/group_average": 0.8777195512820513, "cbt/accuracy/seq_average": 0.8777511004401761, "hellaswag/accuracy/val": 0.35132443736307506, "hellaswag/accuracy/group_average": 0.35132443736307506, "hellaswag/accuracy/seq_average": 0.35132443736307506, "piqa/accuracy/val": 0.6322089227421109, "piqa/accuracy/group_average": 0.6322089227421109, "piqa/accuracy/seq_average": 0.6322089227421109, "ai2arc/accuracy/ARC-Easy": 0.37209302325581395, "ai2arc/accuracy/ARC-Challenge": 0.22660944206008585, "ai2arc/accuracy/group_average": 0.2993512326579499, "ai2arc/accuracy/seq_average": 0.3240793201133145}
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_sig/export/ol/result-model-200000.pth.json ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+
2
+ {"val/loss": 2.351534888857887, "val/accuracy": 0.5128057570684523, "val/perplexity": 10.501676266580374, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.3900749253930513, "lambada/accuracy/total": 0.3499611801242236, "lambada/accuracy/openai_last_token": 0.7944487577639752, "lambada/perplexity": 7.482863901451054, "lambada/lm_loss": 2.956547542968971, "lambada/lm_perplexity": 19.231461215685417, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.43138346859633797, "mean_loss": 2.370804907125469, "blimp/accuracy/passive_2": 0.903, "blimp/accuracy/determiner_noun_agreement_2": 0.983, "blimp/accuracy/ellipsis_n_bar_1": 0.819, "blimp/accuracy/tough_vs_raising_2": 0.868, "blimp/accuracy/tough_vs_raising_1": 0.592, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.925, "blimp/accuracy/principle_A_reconstruction": 0.481, "blimp/accuracy/wh_vs_that_with_gap": 0.489, "blimp/accuracy/principle_A_domain_2": 0.912, "blimp/accuracy/determiner_noun_agreement_1": 0.992, "blimp/accuracy/ellipsis_n_bar_2": 0.912, "blimp/accuracy/principle_A_domain_3": 0.629, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.948, "blimp/accuracy/animate_subject_trans": 0.916, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.919, "blimp/accuracy/distractor_agreement_relative_clause": 0.675, "blimp/accuracy/transitive": 0.878, "blimp/accuracy/sentential_subject_island": 0.39, "blimp/accuracy/adjunct_island": 0.891, "blimp/accuracy/intransitive": 0.757, "blimp/accuracy/existential_there_subject_raising": 0.886, "blimp/accuracy/irregular_past_participle_adjectives": 0.975, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.684, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.336, "blimp/accuracy/only_npi_scope": 0.708, "blimp/accuracy/superlative_quantifiers_2": 0.735, "blimp/accuracy/passive_1": 0.899, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.938, "blimp/accuracy/inchoative": 0.628, "blimp/accuracy/anaphor_gender_agreement": 0.974, "blimp/accuracy/principle_A_c_command": 0.652, "blimp/accuracy/only_npi_licensor_present": 0.81, "blimp/accuracy/expletive_it_object_raising": 0.785, "blimp/accuracy/left_branch_island_simple_question": 0.803, "blimp/accuracy/wh_questions_subject_gap": 0.928, "blimp/accuracy/existential_there_quantifiers_2": 0.465, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.937, "blimp/accuracy/sentential_negation_npi_scope": 0.65, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.843, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.873, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.91, "blimp/accuracy/principle_A_case_2": 0.934, "blimp/accuracy/distractor_agreement_relational_noun": 0.845, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.992, "blimp/accuracy/superlative_quantifiers_1": 0.55, "blimp/accuracy/wh_island": 0.81, "blimp/accuracy/principle_A_domain_1": 0.994, "blimp/accuracy/complex_NP_island": 0.643, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.971, "blimp/accuracy/irregular_past_participle_verbs": 0.908, "blimp/accuracy/drop_argument": 0.734, "blimp/accuracy/wh_questions_object_gap": 0.823, "blimp/accuracy/animate_subject_passive": 0.8, "blimp/accuracy/existential_there_quantifiers_1": 0.976, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.888, "blimp/accuracy/npi_present_2": 0.562, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.95, "blimp/accuracy/anaphor_number_agreement": 0.989, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.963, "blimp/accuracy/existential_there_object_raising": 0.862, "blimp/accuracy/matrix_question_npi_licensor_present": 0.369, "blimp/accuracy/npi_present_1": 0.581, "blimp/accuracy/wh_vs_that_no_gap": 0.972, "blimp/accuracy/left_branch_island_echo_question": 0.6, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.964, "blimp/accuracy/causative": 0.757, "blimp/accuracy/group_average": 0.8020149253731341, "blimp/accuracy/seq_average": 0.8020149253731343, "cbt/accuracy/NE": 0.813301282051282, "cbt/accuracy/V": 0.9344, "cbt/accuracy/CN": 0.8776, "cbt/accuracy/P": 0.9232, "cbt/accuracy/group_average": 0.8871253205128206, "cbt/accuracy/seq_average": 0.8871548619447779, "hellaswag/accuracy/val": 0.35152360087631945, "hellaswag/accuracy/group_average": 0.35152360087631945, "hellaswag/accuracy/seq_average": 0.35152360087631945, "piqa/accuracy/val": 0.6365614798694232, "piqa/accuracy/group_average": 0.6365614798694232, "piqa/accuracy/seq_average": 0.6365614798694232, "ai2arc/accuracy/ARC-Easy": 0.37167019027484144, "ai2arc/accuracy/ARC-Challenge": 0.23090128755364808, "ai2arc/accuracy/group_average": 0.30128573891424476, "ai2arc/accuracy/seq_average": 0.32521246458923514, "mmlu/accuracy/MMLU": 0.26335359313550233, "mmlu/accuracy/group_average": 0.26335359313550233, "mmlu/accuracy/seq_average": 0.26335359313550233, "openbookqa/accuracy/test": 0.27, "openbookqa/accuracy/group_average": 0.27, "openbookqa/accuracy/seq_average": 0.27, "race/accuracy/test/high": 0.2890222984562607, "race/accuracy/test/middle": 0.3607242339832869, "race/accuracy/group_average": 0.3248732662197738, "race/accuracy/seq_average": 0.30989055533036075, "siqa/accuracy/dev": 0.3694984646878199, "siqa/accuracy/group_average": 0.3694984646878199, "siqa/accuracy/seq_average": 0.3694984646878199, "winogrande/accuracy/dev": 0.505130228887135, "winogrande/accuracy/group_average": 0.505130228887135, "winogrande/accuracy/seq_average": 0.505130228887135, "commonsenseqa/accuracy/dev_rand_split": 0.2784602784602785, "commonsenseqa/accuracy/group_average": 0.2784602784602785, "commonsenseqa/accuracy/seq_average": 0.2784602784602785}
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_sig/export/ol/result-model-205000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.3490779816158236, "val/accuracy": 0.5128919813368056, "val/perplexity": 10.475906292288581, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.3582519152149652, "lambada/accuracy/total": 0.35384316770186336, "lambada/accuracy/openai_last_token": 0.796777950310559, "lambada/perplexity": 7.179454384541888, "lambada/lm_loss": 2.9343860198674525, "lambada/lm_perplexity": 18.809950653055125, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4333675745193345, "mean_loss": 2.3536649484153944, "blimp/accuracy/passive_2": 0.896, "blimp/accuracy/determiner_noun_agreement_2": 0.979, "blimp/accuracy/ellipsis_n_bar_1": 0.837, "blimp/accuracy/tough_vs_raising_2": 0.875, "blimp/accuracy/tough_vs_raising_1": 0.57, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.928, "blimp/accuracy/principle_A_reconstruction": 0.451, "blimp/accuracy/wh_vs_that_with_gap": 0.45, "blimp/accuracy/principle_A_domain_2": 0.903, "blimp/accuracy/determiner_noun_agreement_1": 0.99, "blimp/accuracy/ellipsis_n_bar_2": 0.917, "blimp/accuracy/principle_A_domain_3": 0.653, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.937, "blimp/accuracy/animate_subject_trans": 0.908, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.93, "blimp/accuracy/distractor_agreement_relative_clause": 0.658, "blimp/accuracy/transitive": 0.889, "blimp/accuracy/sentential_subject_island": 0.301, "blimp/accuracy/adjunct_island": 0.888, "blimp/accuracy/intransitive": 0.769, "blimp/accuracy/existential_there_subject_raising": 0.892, "blimp/accuracy/irregular_past_participle_adjectives": 0.93, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.694, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.279, "blimp/accuracy/only_npi_scope": 0.761, "blimp/accuracy/superlative_quantifiers_2": 0.782, "blimp/accuracy/passive_1": 0.88, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.924, "blimp/accuracy/inchoative": 0.624, "blimp/accuracy/anaphor_gender_agreement": 0.979, "blimp/accuracy/principle_A_c_command": 0.613, "blimp/accuracy/only_npi_licensor_present": 0.681, "blimp/accuracy/expletive_it_object_raising": 0.796, "blimp/accuracy/left_branch_island_simple_question": 0.789, "blimp/accuracy/wh_questions_subject_gap": 0.924, "blimp/accuracy/existential_there_quantifiers_2": 0.53, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.937, "blimp/accuracy/sentential_negation_npi_scope": 0.673, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.853, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.894, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.915, "blimp/accuracy/principle_A_case_2": 0.915, "blimp/accuracy/distractor_agreement_relational_noun": 0.811, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.99, "blimp/accuracy/superlative_quantifiers_1": 0.642, "blimp/accuracy/wh_island": 0.848, "blimp/accuracy/principle_A_domain_1": 0.994, "blimp/accuracy/complex_NP_island": 0.612, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.98, "blimp/accuracy/irregular_past_participle_verbs": 0.922, "blimp/accuracy/drop_argument": 0.727, "blimp/accuracy/wh_questions_object_gap": 0.796, "blimp/accuracy/animate_subject_passive": 0.797, "blimp/accuracy/existential_there_quantifiers_1": 0.967, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.882, "blimp/accuracy/npi_present_2": 0.612, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.952, "blimp/accuracy/anaphor_number_agreement": 0.985, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.97, "blimp/accuracy/existential_there_object_raising": 0.84, "blimp/accuracy/matrix_question_npi_licensor_present": 0.411, "blimp/accuracy/npi_present_1": 0.614, "blimp/accuracy/wh_vs_that_no_gap": 0.977, "blimp/accuracy/left_branch_island_echo_question": 0.547, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.973, "blimp/accuracy/causative": 0.746, "blimp/accuracy/group_average": 0.7998358208955221, "blimp/accuracy/seq_average": 0.7998358208955224, "cbt/accuracy/NE": 0.8169070512820513, "cbt/accuracy/V": 0.9324, "cbt/accuracy/CN": 0.8724, "cbt/accuracy/P": 0.9152, "cbt/accuracy/group_average": 0.8842267628205128, "cbt/accuracy/seq_average": 0.8842537014805922, "hellaswag/accuracy/val": 0.3480382393945429, "hellaswag/accuracy/group_average": 0.3480382393945429, "hellaswag/accuracy/seq_average": 0.3480382393945429, "piqa/accuracy/val": 0.6327529923830251, "piqa/accuracy/group_average": 0.6327529923830251, "piqa/accuracy/seq_average": 0.6327529923830251, "ai2arc/accuracy/ARC-Easy": 0.38097251585623676, "ai2arc/accuracy/ARC-Challenge": 0.22832618025751072, "ai2arc/accuracy/group_average": 0.3046493480568737, "ai2arc/accuracy/seq_average": 0.3305949008498584}
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_sig/export/ol/result-model-220000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.3369310167100696, "val/accuracy": 0.5158100430927579, "val/perplexity": 10.349425559878181, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.3790061429420613, "lambada/accuracy/total": 0.34937888198757766, "lambada/accuracy/openai_last_token": 0.7983307453416149, "lambada/perplexity": 7.157079401339057, "lambada/lm_loss": 2.935198203087187, "lambada/lm_perplexity": 18.82523398493229, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.43259446254016776, "mean_loss": 2.3579685798260654, "blimp/accuracy/passive_2": 0.896, "blimp/accuracy/determiner_noun_agreement_2": 0.984, "blimp/accuracy/ellipsis_n_bar_1": 0.859, "blimp/accuracy/tough_vs_raising_2": 0.865, "blimp/accuracy/tough_vs_raising_1": 0.619, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.917, "blimp/accuracy/principle_A_reconstruction": 0.365, "blimp/accuracy/wh_vs_that_with_gap": 0.484, "blimp/accuracy/principle_A_domain_2": 0.901, "blimp/accuracy/determiner_noun_agreement_1": 0.993, "blimp/accuracy/ellipsis_n_bar_2": 0.925, "blimp/accuracy/principle_A_domain_3": 0.629, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.929, "blimp/accuracy/animate_subject_trans": 0.92, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.908, "blimp/accuracy/distractor_agreement_relative_clause": 0.658, "blimp/accuracy/transitive": 0.884, "blimp/accuracy/sentential_subject_island": 0.338, "blimp/accuracy/adjunct_island": 0.866, "blimp/accuracy/intransitive": 0.761, "blimp/accuracy/existential_there_subject_raising": 0.908, "blimp/accuracy/irregular_past_participle_adjectives": 0.964, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.542, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.353, "blimp/accuracy/only_npi_scope": 0.767, "blimp/accuracy/superlative_quantifiers_2": 0.676, "blimp/accuracy/passive_1": 0.885, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.921, "blimp/accuracy/inchoative": 0.604, "blimp/accuracy/anaphor_gender_agreement": 0.975, "blimp/accuracy/principle_A_c_command": 0.663, "blimp/accuracy/only_npi_licensor_present": 0.608, "blimp/accuracy/expletive_it_object_raising": 0.801, "blimp/accuracy/left_branch_island_simple_question": 0.687, "blimp/accuracy/wh_questions_subject_gap": 0.938, "blimp/accuracy/existential_there_quantifiers_2": 0.476, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.952, "blimp/accuracy/sentential_negation_npi_scope": 0.624, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.83, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.892, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.922, "blimp/accuracy/principle_A_case_2": 0.937, "blimp/accuracy/distractor_agreement_relational_noun": 0.805, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.977, "blimp/accuracy/superlative_quantifiers_1": 0.671, "blimp/accuracy/wh_island": 0.845, "blimp/accuracy/principle_A_domain_1": 0.996, "blimp/accuracy/complex_NP_island": 0.609, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.979, "blimp/accuracy/irregular_past_participle_verbs": 0.867, "blimp/accuracy/drop_argument": 0.729, "blimp/accuracy/wh_questions_object_gap": 0.849, "blimp/accuracy/animate_subject_passive": 0.78, "blimp/accuracy/existential_there_quantifiers_1": 0.964, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.877, "blimp/accuracy/npi_present_2": 0.572, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.947, "blimp/accuracy/anaphor_number_agreement": 0.984, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.97, "blimp/accuracy/existential_there_object_raising": 0.865, "blimp/accuracy/matrix_question_npi_licensor_present": 0.348, "blimp/accuracy/npi_present_1": 0.613, "blimp/accuracy/wh_vs_that_no_gap": 0.973, "blimp/accuracy/left_branch_island_echo_question": 0.534, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.966, "blimp/accuracy/causative": 0.74, "blimp/accuracy/group_average": 0.7923283582089551, "blimp/accuracy/seq_average": 0.7923283582089552, "cbt/accuracy/NE": 0.8145032051282052, "cbt/accuracy/V": 0.9328, "cbt/accuracy/CN": 0.878, "cbt/accuracy/P": 0.9152, "cbt/accuracy/group_average": 0.8851258012820513, "cbt/accuracy/seq_average": 0.8851540616246498, "hellaswag/accuracy/val": 0.3545110535749851, "hellaswag/accuracy/group_average": 0.3545110535749851, "hellaswag/accuracy/seq_average": 0.3545110535749851, "piqa/accuracy/val": 0.6273122959738846, "piqa/accuracy/group_average": 0.6273122959738846, "piqa/accuracy/seq_average": 0.6273122959738846, "ai2arc/accuracy/ARC-Easy": 0.3775898520084567, "ai2arc/accuracy/ARC-Challenge": 0.22660944206008585, "ai2arc/accuracy/group_average": 0.3020996470342713, "ai2arc/accuracy/seq_average": 0.3277620396600567}
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_sig/export/ol/result-model-40000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.5960448734344, "val/accuracy": 0.47812325613839285, "val/perplexity": 13.410592415133511, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.653609186966227, "lambada/accuracy/total": 0.28998447204968947, "lambada/accuracy/openai_last_token": 0.765333850931677, "lambada/perplexity": 10.336835237555952, "lambada/lm_loss": 3.1596724447526836, "lambada/lm_perplexity": 23.56287652102451, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.3840538640940412, "mean_loss": 2.624827030200313, "blimp/accuracy/passive_2": 0.866, "blimp/accuracy/determiner_noun_agreement_2": 0.985, "blimp/accuracy/ellipsis_n_bar_1": 0.803, "blimp/accuracy/tough_vs_raising_2": 0.827, "blimp/accuracy/tough_vs_raising_1": 0.638, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.934, "blimp/accuracy/principle_A_reconstruction": 0.3, "blimp/accuracy/wh_vs_that_with_gap": 0.453, "blimp/accuracy/principle_A_domain_2": 0.86, "blimp/accuracy/determiner_noun_agreement_1": 0.989, "blimp/accuracy/ellipsis_n_bar_2": 0.914, "blimp/accuracy/principle_A_domain_3": 0.649, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.949, "blimp/accuracy/animate_subject_trans": 0.904, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.916, "blimp/accuracy/distractor_agreement_relative_clause": 0.612, "blimp/accuracy/transitive": 0.861, "blimp/accuracy/sentential_subject_island": 0.345, "blimp/accuracy/adjunct_island": 0.85, "blimp/accuracy/intransitive": 0.779, "blimp/accuracy/existential_there_subject_raising": 0.874, "blimp/accuracy/irregular_past_participle_adjectives": 0.904, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.418, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.233, "blimp/accuracy/only_npi_scope": 0.68, "blimp/accuracy/superlative_quantifiers_2": 0.583, "blimp/accuracy/passive_1": 0.869, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.93, "blimp/accuracy/inchoative": 0.594, "blimp/accuracy/anaphor_gender_agreement": 0.961, "blimp/accuracy/principle_A_c_command": 0.628, "blimp/accuracy/only_npi_licensor_present": 0.616, "blimp/accuracy/expletive_it_object_raising": 0.748, "blimp/accuracy/left_branch_island_simple_question": 0.513, "blimp/accuracy/wh_questions_subject_gap": 0.935, "blimp/accuracy/existential_there_quantifiers_2": 0.463, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.946, "blimp/accuracy/sentential_negation_npi_scope": 0.647, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.819, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.896, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.911, "blimp/accuracy/principle_A_case_2": 0.918, "blimp/accuracy/distractor_agreement_relational_noun": 0.836, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.997, "blimp/accuracy/superlative_quantifiers_1": 0.624, "blimp/accuracy/wh_island": 0.811, "blimp/accuracy/principle_A_domain_1": 0.976, "blimp/accuracy/complex_NP_island": 0.598, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.971, "blimp/accuracy/irregular_past_participle_verbs": 0.91, "blimp/accuracy/drop_argument": 0.77, "blimp/accuracy/wh_questions_object_gap": 0.814, "blimp/accuracy/animate_subject_passive": 0.789, "blimp/accuracy/existential_there_quantifiers_1": 0.972, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.888, "blimp/accuracy/npi_present_2": 0.555, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.919, "blimp/accuracy/anaphor_number_agreement": 0.966, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.957, "blimp/accuracy/existential_there_object_raising": 0.81, "blimp/accuracy/matrix_question_npi_licensor_present": 0.266, "blimp/accuracy/npi_present_1": 0.502, "blimp/accuracy/wh_vs_that_no_gap": 0.982, "blimp/accuracy/left_branch_island_echo_question": 0.472, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.976, "blimp/accuracy/causative": 0.7, "blimp/accuracy/group_average": 0.7698656716417911, "blimp/accuracy/seq_average": 0.7698656716417911, "cbt/accuracy/NE": 0.7688301282051282, "cbt/accuracy/V": 0.9136, "cbt/accuracy/CN": 0.8332, "cbt/accuracy/P": 0.8876, "cbt/accuracy/group_average": 0.850807532051282, "cbt/accuracy/seq_average": 0.8508403361344538, "hellaswag/accuracy/val": 0.3016331408086039, "hellaswag/accuracy/group_average": 0.3016331408086039, "hellaswag/accuracy/seq_average": 0.3016331408086039, "piqa/accuracy/val": 0.6017410228509249, "piqa/accuracy/group_average": 0.6017410228509249, "piqa/accuracy/seq_average": 0.6017410228509249, "ai2arc/accuracy/ARC-Easy": 0.34418604651162793, "ai2arc/accuracy/ARC-Challenge": 0.2240343347639485, "ai2arc/accuracy/group_average": 0.2841101906377882, "ai2arc/accuracy/seq_average": 0.3045325779036827}
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_sig/export/result-model-200000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.3515828450520835, "val/accuracy": 0.5130092075892857, "val/perplexity": 10.502179899082856, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.3596811235321233, "lambada/accuracy/total": 0.3454968944099379, "lambada/accuracy/openai_last_token": 0.7948369565217391, "lambada/perplexity": 7.550164124295158, "lambada/lm_loss": 2.956487303645998, "lambada/lm_perplexity": 19.230302760374638, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4292530509996118, "mean_loss": 2.3556319842921036, "blimp/accuracy/passive_2": 0.908, "blimp/accuracy/determiner_noun_agreement_2": 0.985, "blimp/accuracy/ellipsis_n_bar_1": 0.815, "blimp/accuracy/tough_vs_raising_2": 0.873, "blimp/accuracy/tough_vs_raising_1": 0.59, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.908, "blimp/accuracy/principle_A_reconstruction": 0.437, "blimp/accuracy/wh_vs_that_with_gap": 0.505, "blimp/accuracy/principle_A_domain_2": 0.9, "blimp/accuracy/determiner_noun_agreement_1": 0.993, "blimp/accuracy/ellipsis_n_bar_2": 0.902, "blimp/accuracy/principle_A_domain_3": 0.633, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.945, "blimp/accuracy/animate_subject_trans": 0.915, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.913, "blimp/accuracy/distractor_agreement_relative_clause": 0.674, "blimp/accuracy/transitive": 0.877, "blimp/accuracy/sentential_subject_island": 0.395, "blimp/accuracy/adjunct_island": 0.885, "blimp/accuracy/intransitive": 0.763, "blimp/accuracy/existential_there_subject_raising": 0.891, "blimp/accuracy/irregular_past_participle_adjectives": 0.956, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.67, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.36, "blimp/accuracy/only_npi_scope": 0.663, "blimp/accuracy/superlative_quantifiers_2": 0.736, "blimp/accuracy/passive_1": 0.903, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.932, "blimp/accuracy/inchoative": 0.622, "blimp/accuracy/anaphor_gender_agreement": 0.974, "blimp/accuracy/principle_A_c_command": 0.65, "blimp/accuracy/only_npi_licensor_present": 0.754, "blimp/accuracy/expletive_it_object_raising": 0.794, "blimp/accuracy/left_branch_island_simple_question": 0.764, "blimp/accuracy/wh_questions_subject_gap": 0.938, "blimp/accuracy/existential_there_quantifiers_2": 0.467, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.95, "blimp/accuracy/sentential_negation_npi_scope": 0.671, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.857, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.883, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.913, "blimp/accuracy/principle_A_case_2": 0.933, "blimp/accuracy/distractor_agreement_relational_noun": 0.84, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.994, "blimp/accuracy/superlative_quantifiers_1": 0.619, "blimp/accuracy/wh_island": 0.815, "blimp/accuracy/principle_A_domain_1": 0.993, "blimp/accuracy/complex_NP_island": 0.646, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.984, "blimp/accuracy/irregular_past_participle_verbs": 0.913, "blimp/accuracy/drop_argument": 0.736, "blimp/accuracy/wh_questions_object_gap": 0.836, "blimp/accuracy/animate_subject_passive": 0.792, "blimp/accuracy/existential_there_quantifiers_1": 0.969, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.898, "blimp/accuracy/npi_present_2": 0.568, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.962, "blimp/accuracy/anaphor_number_agreement": 0.987, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.968, "blimp/accuracy/existential_there_object_raising": 0.876, "blimp/accuracy/matrix_question_npi_licensor_present": 0.372, "blimp/accuracy/npi_present_1": 0.577, "blimp/accuracy/wh_vs_that_no_gap": 0.977, "blimp/accuracy/left_branch_island_echo_question": 0.599, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.967, "blimp/accuracy/causative": 0.747, "blimp/accuracy/group_average": 0.8019701492537312, "blimp/accuracy/seq_average": 0.8019701492537313, "cbt/accuracy/NE": 0.813301282051282, "cbt/accuracy/V": 0.9336, "cbt/accuracy/CN": 0.872, "cbt/accuracy/P": 0.9208, "cbt/accuracy/group_average": 0.8849253205128205, "cbt/accuracy/seq_average": 0.884953981592637, "hellaswag/accuracy/val": 0.35232025492929697, "hellaswag/accuracy/group_average": 0.35232025492929697, "hellaswag/accuracy/seq_average": 0.35232025492929697, "piqa/accuracy/val": 0.6278563656147987, "piqa/accuracy/group_average": 0.6278563656147987, "piqa/accuracy/seq_average": 0.6278563656147987, "ai2arc/accuracy/ARC-Easy": 0.3691331923890063, "ai2arc/accuracy/ARC-Challenge": 0.2240343347639485, "ai2arc/accuracy/group_average": 0.2965837635764774, "ai2arc/accuracy/seq_average": 0.3212464589235127, "mmlu/accuracy/MMLU": 0.2657847693957812, "mmlu/accuracy/group_average": 0.2657847693957812, "mmlu/accuracy/seq_average": 0.2657847693957812, "openbookqa/accuracy/test": 0.274, "openbookqa/accuracy/group_average": 0.274, "openbookqa/accuracy/seq_average": 0.274, "race/accuracy/test/high": 0.2887364208118925, "race/accuracy/test/middle": 0.3579387186629526, "race/accuracy/group_average": 0.32333756973742256, "race/accuracy/seq_average": 0.30887717875962706, "siqa/accuracy/dev": 0.3694984646878199, "siqa/accuracy/group_average": 0.3694984646878199, "siqa/accuracy/seq_average": 0.3694984646878199, "winogrande/accuracy/dev": 0.5011838989739542, "winogrande/accuracy/group_average": 0.5011838989739542, "winogrande/accuracy/seq_average": 0.5011838989739542, "commonsenseqa/accuracy/dev_rand_split": 0.2809172809172809, "commonsenseqa/accuracy/group_average": 0.2809172809172809, "commonsenseqa/accuracy/seq_average": 0.2809172809172809}