2b7e51846cf1b478ffc54f40f741f2090762ba4099f1fd4d79a8d9e246712cec
Browse files- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-100000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-120000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-140000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-160000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-180000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-20000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-200000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-220000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-240000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-260000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-280000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-300000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-320000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-340000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-360000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-380000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-40000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-400000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-60000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-80000.pth.json +1 -0
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-100000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.4356536865234375, "val/accuracy": 0.5009087456597222, "val/perplexity": 11.423283519769734, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.4433760554153725, "lambada/accuracy/total": 0.31191770186335405, "lambada/accuracy/openai_last_token": 0.7824145962732919, "lambada/perplexity": 8.00391465729314, "lambada/lm_loss": 3.027271912970417, "lambada/lm_perplexity": 20.640845686774384, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4064132237615381, "mean_loss": 2.439514870969405, "blimp/accuracy/passive_2": 0.895, "blimp/accuracy/determiner_noun_agreement_2": 0.987, "blimp/accuracy/ellipsis_n_bar_1": 0.84, "blimp/accuracy/tough_vs_raising_2": 0.869, "blimp/accuracy/tough_vs_raising_1": 0.616, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.929, "blimp/accuracy/principle_A_reconstruction": 0.392, "blimp/accuracy/wh_vs_that_with_gap": 0.509, "blimp/accuracy/principle_A_domain_2": 0.862, "blimp/accuracy/determiner_noun_agreement_1": 0.992, "blimp/accuracy/ellipsis_n_bar_2": 0.905, "blimp/accuracy/principle_A_domain_3": 0.585, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.929, "blimp/accuracy/animate_subject_trans": 0.904, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.875, "blimp/accuracy/distractor_agreement_relative_clause": 0.678, "blimp/accuracy/transitive": 0.861, "blimp/accuracy/sentential_subject_island": 0.334, "blimp/accuracy/adjunct_island": 0.828, "blimp/accuracy/intransitive": 0.792, "blimp/accuracy/existential_there_subject_raising": 0.872, "blimp/accuracy/irregular_past_participle_adjectives": 0.975, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.606, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.327, "blimp/accuracy/only_npi_scope": 0.702, "blimp/accuracy/superlative_quantifiers_2": 0.827, "blimp/accuracy/passive_1": 0.881, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.925, "blimp/accuracy/inchoative": 0.636, "blimp/accuracy/anaphor_gender_agreement": 0.971, "blimp/accuracy/principle_A_c_command": 0.647, "blimp/accuracy/only_npi_licensor_present": 0.457, "blimp/accuracy/expletive_it_object_raising": 0.828, "blimp/accuracy/left_branch_island_simple_question": 0.681, "blimp/accuracy/wh_questions_subject_gap": 0.947, "blimp/accuracy/existential_there_quantifiers_2": 0.588, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.952, "blimp/accuracy/sentential_negation_npi_scope": 0.717, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.827, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.905, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.914, "blimp/accuracy/principle_A_case_2": 0.966, "blimp/accuracy/distractor_agreement_relational_noun": 0.821, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.98, "blimp/accuracy/superlative_quantifiers_1": 0.745, "blimp/accuracy/wh_island": 0.731, "blimp/accuracy/principle_A_domain_1": 0.972, "blimp/accuracy/complex_NP_island": 0.544, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.978, "blimp/accuracy/irregular_past_participle_verbs": 0.903, "blimp/accuracy/drop_argument": 0.729, "blimp/accuracy/wh_questions_object_gap": 0.851, "blimp/accuracy/animate_subject_passive": 0.797, "blimp/accuracy/existential_there_quantifiers_1": 0.982, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.908, "blimp/accuracy/npi_present_2": 0.614, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.969, "blimp/accuracy/anaphor_number_agreement": 0.996, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.967, "blimp/accuracy/existential_there_object_raising": 0.838, "blimp/accuracy/matrix_question_npi_licensor_present": 0.26, "blimp/accuracy/npi_present_1": 0.595, "blimp/accuracy/wh_vs_that_no_gap": 0.975, "blimp/accuracy/left_branch_island_echo_question": 0.423, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.962, "blimp/accuracy/causative": 0.758, "blimp/accuracy/group_average": 0.7915074626865671, "blimp/accuracy/seq_average": 0.7915074626865671, "cbt/accuracy/NE": 0.7972756410256411, "cbt/accuracy/V": 0.9292, "cbt/accuracy/CN": 0.8624, "cbt/accuracy/P": 0.908, "cbt/accuracy/group_average": 0.8742189102564103, "cbt/accuracy/seq_average": 0.8742496998799519, "hellaswag/accuracy/val": 0.32812188807010556, "hellaswag/accuracy/group_average": 0.32812188807010556, "hellaswag/accuracy/seq_average": 0.32812188807010556, "piqa/accuracy/val": 0.6196953210010882, "piqa/accuracy/group_average": 0.6196953210010882, "piqa/accuracy/seq_average": 0.6196953210010882, "ai2arc/accuracy/ARC-Easy": 0.3602536997885835, "ai2arc/accuracy/ARC-Challenge": 0.2206008583690987, "ai2arc/accuracy/group_average": 0.29042727907884114, "ai2arc/accuracy/seq_average": 0.3141643059490085, "mmlu/accuracy/MMLU": 0.26285305684662136, "mmlu/accuracy/group_average": 0.26285305684662136, "mmlu/accuracy/seq_average": 0.26285305684662136, "openbookqa/accuracy/test": 0.298, "openbookqa/accuracy/group_average": 0.298, "openbookqa/accuracy/seq_average": 0.298, "race/accuracy/test/high": 0.2787307032590051, "race/accuracy/test/middle": 0.34818941504178275, "race/accuracy/group_average": 0.31346005915039393, "race/accuracy/seq_average": 0.298946088366437, "siqa/accuracy/dev": 0.3638689866939611, "siqa/accuracy/group_average": 0.3638689866939611, "siqa/accuracy/seq_average": 0.3638689866939611, "winogrande/accuracy/dev": 0.5074980268350434, "winogrande/accuracy/group_average": 0.5074980268350434, "winogrande/accuracy/seq_average": 0.5074980268350434, "commonsenseqa/accuracy/dev_rand_split": 0.26453726453726456, "commonsenseqa/accuracy/group_average": 0.26453726453726456, "commonsenseqa/accuracy/seq_average": 0.26453726453726456}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-120000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.411532931857639, "val/accuracy": 0.5051317971850199, "val/perplexity": 11.151041837230075, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.5710930676193713, "lambada/accuracy/total": 0.34006211180124224, "lambada/accuracy/openai_last_token": 0.7911490683229814, "lambada/perplexity": 7.553013350587308, "lambada/lm_loss": 2.9961420642287377, "lambada/lm_perplexity": 20.008197493008314, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.42259695449313106, "mean_loss": 2.491312999738505, "blimp/accuracy/passive_2": 0.911, "blimp/accuracy/determiner_noun_agreement_2": 0.981, "blimp/accuracy/ellipsis_n_bar_1": 0.82, "blimp/accuracy/tough_vs_raising_2": 0.878, "blimp/accuracy/tough_vs_raising_1": 0.624, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.921, "blimp/accuracy/principle_A_reconstruction": 0.406, "blimp/accuracy/wh_vs_that_with_gap": 0.506, "blimp/accuracy/principle_A_domain_2": 0.853, "blimp/accuracy/determiner_noun_agreement_1": 0.996, "blimp/accuracy/ellipsis_n_bar_2": 0.913, "blimp/accuracy/principle_A_domain_3": 0.566, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.921, "blimp/accuracy/animate_subject_trans": 0.905, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.883, "blimp/accuracy/distractor_agreement_relative_clause": 0.662, "blimp/accuracy/transitive": 0.866, "blimp/accuracy/sentential_subject_island": 0.349, "blimp/accuracy/adjunct_island": 0.849, "blimp/accuracy/intransitive": 0.775, "blimp/accuracy/existential_there_subject_raising": 0.862, "blimp/accuracy/irregular_past_participle_adjectives": 0.937, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.645, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.323, "blimp/accuracy/only_npi_scope": 0.709, "blimp/accuracy/superlative_quantifiers_2": 0.707, "blimp/accuracy/passive_1": 0.879, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.926, "blimp/accuracy/inchoative": 0.64, "blimp/accuracy/anaphor_gender_agreement": 0.962, "blimp/accuracy/principle_A_c_command": 0.619, "blimp/accuracy/only_npi_licensor_present": 0.701, "blimp/accuracy/expletive_it_object_raising": 0.801, "blimp/accuracy/left_branch_island_simple_question": 0.704, "blimp/accuracy/wh_questions_subject_gap": 0.943, "blimp/accuracy/existential_there_quantifiers_2": 0.503, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.936, "blimp/accuracy/sentential_negation_npi_scope": 0.706, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.832, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.913, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.915, "blimp/accuracy/principle_A_case_2": 0.949, "blimp/accuracy/distractor_agreement_relational_noun": 0.852, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.987, "blimp/accuracy/superlative_quantifiers_1": 0.843, "blimp/accuracy/wh_island": 0.776, "blimp/accuracy/principle_A_domain_1": 0.985, "blimp/accuracy/complex_NP_island": 0.551, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.974, "blimp/accuracy/irregular_past_participle_verbs": 0.947, "blimp/accuracy/drop_argument": 0.735, "blimp/accuracy/wh_questions_object_gap": 0.811, "blimp/accuracy/animate_subject_passive": 0.802, "blimp/accuracy/existential_there_quantifiers_1": 0.992, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.909, "blimp/accuracy/npi_present_2": 0.586, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.959, "blimp/accuracy/anaphor_number_agreement": 0.986, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.971, "blimp/accuracy/existential_there_object_raising": 0.874, "blimp/accuracy/matrix_question_npi_licensor_present": 0.234, "blimp/accuracy/npi_present_1": 0.598, "blimp/accuracy/wh_vs_that_no_gap": 0.981, "blimp/accuracy/left_branch_island_echo_question": 0.408, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.969, "blimp/accuracy/causative": 0.785, "blimp/accuracy/group_average": 0.7942089552238807, "blimp/accuracy/seq_average": 0.7942089552238806, "cbt/accuracy/NE": 0.8024839743589743, "cbt/accuracy/V": 0.9332, "cbt/accuracy/CN": 0.864, "cbt/accuracy/P": 0.9052, "cbt/accuracy/group_average": 0.8762209935897436, "cbt/accuracy/seq_average": 0.87625050020008, "hellaswag/accuracy/val": 0.3323043218482374, "hellaswag/accuracy/group_average": 0.3323043218482374, "hellaswag/accuracy/seq_average": 0.3323043218482374, "piqa/accuracy/val": 0.6262241566920566, "piqa/accuracy/group_average": 0.6262241566920566, "piqa/accuracy/seq_average": 0.6262241566920566, "ai2arc/accuracy/ARC-Easy": 0.36659619450317127, "ai2arc/accuracy/ARC-Challenge": 0.21802575107296138, "ai2arc/accuracy/group_average": 0.29231097278806634, "ai2arc/accuracy/seq_average": 0.3175637393767705, "mmlu/accuracy/MMLU": 0.26313907758312477, "mmlu/accuracy/group_average": 0.26313907758312477, "mmlu/accuracy/seq_average": 0.26313907758312477, "openbookqa/accuracy/test": 0.298, "openbookqa/accuracy/group_average": 0.298, "openbookqa/accuracy/seq_average": 0.298, "race/accuracy/test/high": 0.28416237850200116, "race/accuracy/test/middle": 0.34192200557103064, "race/accuracy/group_average": 0.3130421920365159, "race/accuracy/seq_average": 0.3009728415079043, "siqa/accuracy/dev": 0.3654042988741044, "siqa/accuracy/group_average": 0.3654042988741044, "siqa/accuracy/seq_average": 0.3654042988741044, "winogrande/accuracy/dev": 0.5035516969218626, "winogrande/accuracy/group_average": 0.5035516969218626, "winogrande/accuracy/seq_average": 0.5035516969218626, "commonsenseqa/accuracy/dev_rand_split": 0.26371826371826373, "commonsenseqa/accuracy/group_average": 0.26371826371826373, "commonsenseqa/accuracy/seq_average": 0.26371826371826373}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-140000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.390654790969122, "val/accuracy": 0.507262214781746, "val/perplexity": 10.920642340415057, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.3997605602193324, "lambada/accuracy/total": 0.33656832298136646, "lambada/accuracy/openai_last_token": 0.7897903726708074, "lambada/perplexity": 7.638020823055553, "lambada/lm_loss": 2.9876965111479414, "lambada/lm_perplexity": 19.8399287604656, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4219152688815563, "mean_loss": 2.395207675594227, "blimp/accuracy/passive_2": 0.911, "blimp/accuracy/determiner_noun_agreement_2": 0.986, "blimp/accuracy/ellipsis_n_bar_1": 0.836, "blimp/accuracy/tough_vs_raising_2": 0.905, "blimp/accuracy/tough_vs_raising_1": 0.626, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.934, "blimp/accuracy/principle_A_reconstruction": 0.479, "blimp/accuracy/wh_vs_that_with_gap": 0.554, "blimp/accuracy/principle_A_domain_2": 0.869, "blimp/accuracy/determiner_noun_agreement_1": 0.993, "blimp/accuracy/ellipsis_n_bar_2": 0.919, "blimp/accuracy/principle_A_domain_3": 0.576, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.933, "blimp/accuracy/animate_subject_trans": 0.916, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.909, "blimp/accuracy/distractor_agreement_relative_clause": 0.659, "blimp/accuracy/transitive": 0.884, "blimp/accuracy/sentential_subject_island": 0.354, "blimp/accuracy/adjunct_island": 0.891, "blimp/accuracy/intransitive": 0.784, "blimp/accuracy/existential_there_subject_raising": 0.89, "blimp/accuracy/irregular_past_participle_adjectives": 0.95, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.748, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.419, "blimp/accuracy/only_npi_scope": 0.764, "blimp/accuracy/superlative_quantifiers_2": 0.685, "blimp/accuracy/passive_1": 0.883, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.913, "blimp/accuracy/inchoative": 0.635, "blimp/accuracy/anaphor_gender_agreement": 0.965, "blimp/accuracy/principle_A_c_command": 0.623, "blimp/accuracy/only_npi_licensor_present": 0.695, "blimp/accuracy/expletive_it_object_raising": 0.785, "blimp/accuracy/left_branch_island_simple_question": 0.785, "blimp/accuracy/wh_questions_subject_gap": 0.94, "blimp/accuracy/existential_there_quantifiers_2": 0.472, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.94, "blimp/accuracy/sentential_negation_npi_scope": 0.671, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.819, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.909, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.9, "blimp/accuracy/principle_A_case_2": 0.958, "blimp/accuracy/distractor_agreement_relational_noun": 0.866, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.981, "blimp/accuracy/superlative_quantifiers_1": 0.802, "blimp/accuracy/wh_island": 0.785, "blimp/accuracy/principle_A_domain_1": 0.975, "blimp/accuracy/complex_NP_island": 0.596, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.982, "blimp/accuracy/irregular_past_participle_verbs": 0.914, "blimp/accuracy/drop_argument": 0.759, "blimp/accuracy/wh_questions_object_gap": 0.839, "blimp/accuracy/animate_subject_passive": 0.799, "blimp/accuracy/existential_there_quantifiers_1": 0.987, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.898, "blimp/accuracy/npi_present_2": 0.569, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.971, "blimp/accuracy/anaphor_number_agreement": 0.986, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.971, "blimp/accuracy/existential_there_object_raising": 0.823, "blimp/accuracy/matrix_question_npi_licensor_present": 0.358, "blimp/accuracy/npi_present_1": 0.593, "blimp/accuracy/wh_vs_that_no_gap": 0.987, "blimp/accuracy/left_branch_island_echo_question": 0.539, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.959, "blimp/accuracy/causative": 0.753, "blimp/accuracy/group_average": 0.8053582089552238, "blimp/accuracy/seq_average": 0.8053582089552239, "cbt/accuracy/NE": 0.8024839743589743, "cbt/accuracy/V": 0.9384, "cbt/accuracy/CN": 0.8728, "cbt/accuracy/P": 0.916, "cbt/accuracy/group_average": 0.8824209935897436, "cbt/accuracy/seq_average": 0.882452981192477, "hellaswag/accuracy/val": 0.3390758812985461, "hellaswag/accuracy/group_average": 0.3390758812985461, "hellaswag/accuracy/seq_average": 0.3390758812985461, "piqa/accuracy/val": 0.6251360174102285, "piqa/accuracy/group_average": 0.6251360174102285, "piqa/accuracy/seq_average": 0.6251360174102285, "ai2arc/accuracy/ARC-Easy": 0.36448202959830867, "ai2arc/accuracy/ARC-Challenge": 0.22317596566523606, "ai2arc/accuracy/group_average": 0.2938289976317724, "ai2arc/accuracy/seq_average": 0.3178470254957507, "mmlu/accuracy/MMLU": 0.26378262424025745, "mmlu/accuracy/group_average": 0.26378262424025745, "mmlu/accuracy/seq_average": 0.26378262424025745, "openbookqa/accuracy/test": 0.296, "openbookqa/accuracy/group_average": 0.296, "openbookqa/accuracy/seq_average": 0.296, "race/accuracy/test/high": 0.279874213836478, "race/accuracy/test/middle": 0.35236768802228413, "race/accuracy/group_average": 0.3161209509293811, "race/accuracy/seq_average": 0.3009728415079043, "siqa/accuracy/dev": 0.3741044012282497, "siqa/accuracy/group_average": 0.3741044012282497, "siqa/accuracy/seq_average": 0.3741044012282497, "winogrande/accuracy/dev": 0.5067087608524072, "winogrande/accuracy/group_average": 0.5067087608524072, "winogrande/accuracy/seq_average": 0.5067087608524072, "commonsenseqa/accuracy/dev_rand_split": 0.2628992628992629, "commonsenseqa/accuracy/group_average": 0.2628992628992629, "commonsenseqa/accuracy/seq_average": 0.2628992628992629}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-160000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.3742549835689486, "val/accuracy": 0.5096764942956349, "val/perplexity": 10.743006487533608, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.4370856433181287, "lambada/accuracy/total": 0.3103649068322981, "lambada/accuracy/openai_last_token": 0.7853260869565217, "lambada/perplexity": 7.7168954328166715, "lambada/lm_loss": 2.9652141481740677, "lambada/lm_perplexity": 19.39885702650396, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4100207005639665, "mean_loss": 2.4056703134435384, "blimp/accuracy/passive_2": 0.917, "blimp/accuracy/determiner_noun_agreement_2": 0.987, "blimp/accuracy/ellipsis_n_bar_1": 0.873, "blimp/accuracy/tough_vs_raising_2": 0.89, "blimp/accuracy/tough_vs_raising_1": 0.584, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.924, "blimp/accuracy/principle_A_reconstruction": 0.439, "blimp/accuracy/wh_vs_that_with_gap": 0.5, "blimp/accuracy/principle_A_domain_2": 0.868, "blimp/accuracy/determiner_noun_agreement_1": 0.995, "blimp/accuracy/ellipsis_n_bar_2": 0.915, "blimp/accuracy/principle_A_domain_3": 0.553, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.936, "blimp/accuracy/animate_subject_trans": 0.908, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.888, "blimp/accuracy/distractor_agreement_relative_clause": 0.684, "blimp/accuracy/transitive": 0.871, "blimp/accuracy/sentential_subject_island": 0.327, "blimp/accuracy/adjunct_island": 0.873, "blimp/accuracy/intransitive": 0.753, "blimp/accuracy/existential_there_subject_raising": 0.898, "blimp/accuracy/irregular_past_participle_adjectives": 0.956, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.738, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.376, "blimp/accuracy/only_npi_scope": 0.791, "blimp/accuracy/superlative_quantifiers_2": 0.839, "blimp/accuracy/passive_1": 0.893, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.93, "blimp/accuracy/inchoative": 0.627, "blimp/accuracy/anaphor_gender_agreement": 0.963, "blimp/accuracy/principle_A_c_command": 0.632, "blimp/accuracy/only_npi_licensor_present": 0.689, "blimp/accuracy/expletive_it_object_raising": 0.775, "blimp/accuracy/left_branch_island_simple_question": 0.788, "blimp/accuracy/wh_questions_subject_gap": 0.929, "blimp/accuracy/existential_there_quantifiers_2": 0.435, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.938, "blimp/accuracy/sentential_negation_npi_scope": 0.71, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.846, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.914, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.914, "blimp/accuracy/principle_A_case_2": 0.945, "blimp/accuracy/distractor_agreement_relational_noun": 0.868, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.983, "blimp/accuracy/superlative_quantifiers_1": 0.823, "blimp/accuracy/wh_island": 0.861, "blimp/accuracy/principle_A_domain_1": 0.985, "blimp/accuracy/complex_NP_island": 0.592, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.971, "blimp/accuracy/irregular_past_participle_verbs": 0.896, "blimp/accuracy/drop_argument": 0.689, "blimp/accuracy/wh_questions_object_gap": 0.835, "blimp/accuracy/animate_subject_passive": 0.783, "blimp/accuracy/existential_there_quantifiers_1": 0.987, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.915, "blimp/accuracy/npi_present_2": 0.577, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.965, "blimp/accuracy/anaphor_number_agreement": 0.988, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.976, "blimp/accuracy/existential_there_object_raising": 0.862, "blimp/accuracy/matrix_question_npi_licensor_present": 0.29, "blimp/accuracy/npi_present_1": 0.512, "blimp/accuracy/wh_vs_that_no_gap": 0.978, "blimp/accuracy/left_branch_island_echo_question": 0.489, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.96, "blimp/accuracy/causative": 0.768, "blimp/accuracy/group_average": 0.8024477611940298, "blimp/accuracy/seq_average": 0.8024477611940298, "cbt/accuracy/NE": 0.8084935897435898, "cbt/accuracy/V": 0.9356, "cbt/accuracy/CN": 0.8724, "cbt/accuracy/P": 0.9172, "cbt/accuracy/group_average": 0.8834233974358974, "cbt/accuracy/seq_average": 0.883453381352541, "hellaswag/accuracy/val": 0.34176458872734516, "hellaswag/accuracy/group_average": 0.34176458872734516, "hellaswag/accuracy/seq_average": 0.34176458872734516, "piqa/accuracy/val": 0.6284004352557128, "piqa/accuracy/group_average": 0.6284004352557128, "piqa/accuracy/seq_average": 0.6284004352557128, "ai2arc/accuracy/ARC-Easy": 0.37293868921775897, "ai2arc/accuracy/ARC-Challenge": 0.23605150214592274, "ai2arc/accuracy/group_average": 0.30449509568184085, "ai2arc/accuracy/seq_average": 0.3277620396600567, "mmlu/accuracy/MMLU": 0.267286378262424, "mmlu/accuracy/group_average": 0.267286378262424, "mmlu/accuracy/seq_average": 0.267286378262424, "openbookqa/accuracy/test": 0.3, "openbookqa/accuracy/group_average": 0.3, "openbookqa/accuracy/seq_average": 0.3, "race/accuracy/test/high": 0.28187535734705543, "race/accuracy/test/middle": 0.35236768802228413, "race/accuracy/group_average": 0.31712152268466975, "race/accuracy/seq_average": 0.3023915687069315, "siqa/accuracy/dev": 0.3781985670419652, "siqa/accuracy/group_average": 0.3781985670419652, "siqa/accuracy/seq_average": 0.3781985670419652, "winogrande/accuracy/dev": 0.505130228887135, "winogrande/accuracy/group_average": 0.505130228887135, "winogrande/accuracy/seq_average": 0.505130228887135, "commonsenseqa/accuracy/dev_rand_split": 0.26535626535626533, "commonsenseqa/accuracy/group_average": 0.26535626535626533, "commonsenseqa/accuracy/seq_average": 0.26535626535626533}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-180000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.354521736266121, "val/accuracy": 0.5131496853298612, "val/perplexity": 10.533090061871638, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.4604196489227483, "lambada/accuracy/total": 0.3466614906832298, "lambada/accuracy/openai_last_token": 0.796777950310559, "lambada/perplexity": 7.297201948316286, "lambada/lm_loss": 2.9618438890087857, "lambada/lm_perplexity": 19.33358789953182, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4299055880065455, "mean_loss": 2.4074706925944347, "blimp/accuracy/passive_2": 0.926, "blimp/accuracy/determiner_noun_agreement_2": 0.978, "blimp/accuracy/ellipsis_n_bar_1": 0.847, "blimp/accuracy/tough_vs_raising_2": 0.898, "blimp/accuracy/tough_vs_raising_1": 0.608, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.924, "blimp/accuracy/principle_A_reconstruction": 0.311, "blimp/accuracy/wh_vs_that_with_gap": 0.496, "blimp/accuracy/principle_A_domain_2": 0.882, "blimp/accuracy/determiner_noun_agreement_1": 0.991, "blimp/accuracy/ellipsis_n_bar_2": 0.903, "blimp/accuracy/principle_A_domain_3": 0.555, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.933, "blimp/accuracy/animate_subject_trans": 0.903, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.923, "blimp/accuracy/distractor_agreement_relative_clause": 0.682, "blimp/accuracy/transitive": 0.876, "blimp/accuracy/sentential_subject_island": 0.352, "blimp/accuracy/adjunct_island": 0.846, "blimp/accuracy/intransitive": 0.785, "blimp/accuracy/existential_there_subject_raising": 0.902, "blimp/accuracy/irregular_past_participle_adjectives": 0.989, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.778, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.396, "blimp/accuracy/only_npi_scope": 0.739, "blimp/accuracy/superlative_quantifiers_2": 0.853, "blimp/accuracy/passive_1": 0.889, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.925, "blimp/accuracy/inchoative": 0.623, "blimp/accuracy/anaphor_gender_agreement": 0.948, "blimp/accuracy/principle_A_c_command": 0.638, "blimp/accuracy/only_npi_licensor_present": 0.571, "blimp/accuracy/expletive_it_object_raising": 0.787, "blimp/accuracy/left_branch_island_simple_question": 0.831, "blimp/accuracy/wh_questions_subject_gap": 0.941, "blimp/accuracy/existential_there_quantifiers_2": 0.376, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.932, "blimp/accuracy/sentential_negation_npi_scope": 0.714, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.825, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.91, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.905, "blimp/accuracy/principle_A_case_2": 0.956, "blimp/accuracy/distractor_agreement_relational_noun": 0.844, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.981, "blimp/accuracy/superlative_quantifiers_1": 0.782, "blimp/accuracy/wh_island": 0.787, "blimp/accuracy/principle_A_domain_1": 0.982, "blimp/accuracy/complex_NP_island": 0.522, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.977, "blimp/accuracy/irregular_past_participle_verbs": 0.917, "blimp/accuracy/drop_argument": 0.748, "blimp/accuracy/wh_questions_object_gap": 0.854, "blimp/accuracy/animate_subject_passive": 0.791, "blimp/accuracy/existential_there_quantifiers_1": 0.981, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.883, "blimp/accuracy/npi_present_2": 0.534, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.948, "blimp/accuracy/anaphor_number_agreement": 0.99, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.965, "blimp/accuracy/existential_there_object_raising": 0.879, "blimp/accuracy/matrix_question_npi_licensor_present": 0.371, "blimp/accuracy/npi_present_1": 0.528, "blimp/accuracy/wh_vs_that_no_gap": 0.978, "blimp/accuracy/left_branch_island_echo_question": 0.463, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.968, "blimp/accuracy/causative": 0.754, "blimp/accuracy/group_average": 0.7981194029850748, "blimp/accuracy/seq_average": 0.7981194029850747, "cbt/accuracy/NE": 0.8173076923076923, "cbt/accuracy/V": 0.9368, "cbt/accuracy/CN": 0.8768, "cbt/accuracy/P": 0.9188, "cbt/accuracy/group_average": 0.8874269230769232, "cbt/accuracy/seq_average": 0.8874549819927972, "hellaswag/accuracy/val": 0.35152360087631945, "hellaswag/accuracy/group_average": 0.35152360087631945, "hellaswag/accuracy/seq_average": 0.35152360087631945, "piqa/accuracy/val": 0.6305767138193689, "piqa/accuracy/group_average": 0.6305767138193689, "piqa/accuracy/seq_average": 0.6305767138193689, "ai2arc/accuracy/ARC-Easy": 0.37378435517970404, "ai2arc/accuracy/ARC-Challenge": 0.2351931330472103, "ai2arc/accuracy/group_average": 0.30448874411345717, "ai2arc/accuracy/seq_average": 0.32804532577903683, "mmlu/accuracy/MMLU": 0.26206649982123703, "mmlu/accuracy/group_average": 0.26206649982123703, "mmlu/accuracy/seq_average": 0.26206649982123703, "openbookqa/accuracy/test": 0.284, "openbookqa/accuracy/group_average": 0.284, "openbookqa/accuracy/seq_average": 0.284, "race/accuracy/test/high": 0.28416237850200116, "race/accuracy/test/middle": 0.36142061281337046, "race/accuracy/group_average": 0.32279149565768583, "race/accuracy/seq_average": 0.306647750304013, "siqa/accuracy/dev": 0.37563971340839303, "siqa/accuracy/group_average": 0.37563971340839303, "siqa/accuracy/seq_average": 0.37563971340839303, "winogrande/accuracy/dev": 0.510655090765588, "winogrande/accuracy/group_average": 0.510655090765588, "winogrande/accuracy/seq_average": 0.510655090765588, "commonsenseqa/accuracy/dev_rand_split": 0.276003276003276, "commonsenseqa/accuracy/group_average": 0.276003276003276, "commonsenseqa/accuracy/seq_average": 0.276003276003276}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-20000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.7142518058655756, "val/accuracy": 0.46253894624255953, "val/perplexity": 15.093313117921188, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.621668276579484, "lambada/accuracy/total": 0.23039596273291926, "lambada/accuracy/openai_last_token": 0.750582298136646, "lambada/perplexity": 13.27529761951055, "lambada/lm_loss": 3.259138567205084, "lambada/lm_perplexity": 26.027106879400577, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.3464674544877394, "mean_loss": 2.6679600412225297, "blimp/accuracy/passive_2": 0.902, "blimp/accuracy/determiner_noun_agreement_2": 0.988, "blimp/accuracy/ellipsis_n_bar_1": 0.799, "blimp/accuracy/tough_vs_raising_2": 0.871, "blimp/accuracy/tough_vs_raising_1": 0.582, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.847, "blimp/accuracy/principle_A_reconstruction": 0.318, "blimp/accuracy/wh_vs_that_with_gap": 0.45, "blimp/accuracy/principle_A_domain_2": 0.819, "blimp/accuracy/determiner_noun_agreement_1": 0.99, "blimp/accuracy/ellipsis_n_bar_2": 0.895, "blimp/accuracy/principle_A_domain_3": 0.547, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.92, "blimp/accuracy/animate_subject_trans": 0.884, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.887, "blimp/accuracy/distractor_agreement_relative_clause": 0.551, "blimp/accuracy/transitive": 0.854, "blimp/accuracy/sentential_subject_island": 0.316, "blimp/accuracy/adjunct_island": 0.748, "blimp/accuracy/intransitive": 0.754, "blimp/accuracy/existential_there_subject_raising": 0.855, "blimp/accuracy/irregular_past_participle_adjectives": 0.991, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.332, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.179, "blimp/accuracy/only_npi_scope": 0.68, "blimp/accuracy/superlative_quantifiers_2": 0.661, "blimp/accuracy/passive_1": 0.897, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.879, "blimp/accuracy/inchoative": 0.597, "blimp/accuracy/anaphor_gender_agreement": 0.964, "blimp/accuracy/principle_A_c_command": 0.45, "blimp/accuracy/only_npi_licensor_present": 0.669, "blimp/accuracy/expletive_it_object_raising": 0.775, "blimp/accuracy/left_branch_island_simple_question": 0.422, "blimp/accuracy/wh_questions_subject_gap": 0.939, "blimp/accuracy/existential_there_quantifiers_2": 0.236, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.944, "blimp/accuracy/sentential_negation_npi_scope": 0.619, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.807, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.924, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.859, "blimp/accuracy/principle_A_case_2": 0.962, "blimp/accuracy/distractor_agreement_relational_noun": 0.794, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.995, "blimp/accuracy/superlative_quantifiers_1": 0.621, "blimp/accuracy/wh_island": 0.777, "blimp/accuracy/principle_A_domain_1": 0.954, "blimp/accuracy/complex_NP_island": 0.426, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.968, "blimp/accuracy/irregular_past_participle_verbs": 0.795, "blimp/accuracy/drop_argument": 0.763, "blimp/accuracy/wh_questions_object_gap": 0.836, "blimp/accuracy/animate_subject_passive": 0.786, "blimp/accuracy/existential_there_quantifiers_1": 0.971, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.842, "blimp/accuracy/npi_present_2": 0.614, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.938, "blimp/accuracy/anaphor_number_agreement": 0.982, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.945, "blimp/accuracy/existential_there_object_raising": 0.786, "blimp/accuracy/matrix_question_npi_licensor_present": 0.176, "blimp/accuracy/npi_present_1": 0.628, "blimp/accuracy/wh_vs_that_no_gap": 0.983, "blimp/accuracy/left_branch_island_echo_question": 0.438, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.984, "blimp/accuracy/causative": 0.674, "blimp/accuracy/group_average": 0.7498358208955224, "blimp/accuracy/seq_average": 0.7498358208955224, "cbt/accuracy/NE": 0.7411858974358975, "cbt/accuracy/V": 0.8952, "cbt/accuracy/CN": 0.8012, "cbt/accuracy/P": 0.868, "cbt/accuracy/group_average": 0.8263964743589743, "cbt/accuracy/seq_average": 0.8264305722288916, "hellaswag/accuracy/val": 0.28908583947420835, "hellaswag/accuracy/group_average": 0.28908583947420835, "hellaswag/accuracy/seq_average": 0.28908583947420835, "piqa/accuracy/val": 0.573449401523395, "piqa/accuracy/group_average": 0.573449401523395, "piqa/accuracy/seq_average": 0.573449401523395, "ai2arc/accuracy/ARC-Easy": 0.3306553911205074, "ai2arc/accuracy/ARC-Challenge": 0.21888412017167383, "ai2arc/accuracy/group_average": 0.27476975564609063, "ai2arc/accuracy/seq_average": 0.29376770538243624, "mmlu/accuracy/MMLU": 0.2590632820879514, "mmlu/accuracy/group_average": 0.2590632820879514, "mmlu/accuracy/seq_average": 0.2590632820879514, "openbookqa/accuracy/test": 0.282, "openbookqa/accuracy/group_average": 0.282, "openbookqa/accuracy/seq_average": 0.282, "race/accuracy/test/high": 0.2715837621497999, "race/accuracy/test/middle": 0.34192200557103064, "race/accuracy/group_average": 0.30675288386041527, "race/accuracy/seq_average": 0.2920551276854479, "siqa/accuracy/dev": 0.3602865916069601, "siqa/accuracy/group_average": 0.3602865916069601, "siqa/accuracy/seq_average": 0.3602865916069601, "winogrande/accuracy/dev": 0.4996053670086819, "winogrande/accuracy/group_average": 0.4996053670086819, "winogrande/accuracy/seq_average": 0.4996053670086819, "commonsenseqa/accuracy/dev_rand_split": 0.24897624897624898, "commonsenseqa/accuracy/group_average": 0.24897624897624898, "commonsenseqa/accuracy/seq_average": 0.24897624897624898}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-200000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.3405279734778026, "val/accuracy": 0.5146561879960317, "val/perplexity": 10.386719027474394, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.2603206279114905, "lambada/accuracy/total": 0.3536490683229814, "lambada/accuracy/openai_last_token": 0.7940605590062112, "lambada/perplexity": 7.15045198793658, "lambada/lm_loss": 2.946139235599659, "lambada/lm_perplexity": 19.032332351086062, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.43415262815950656, "mean_loss": 2.3004243006946465, "blimp/accuracy/passive_2": 0.937, "blimp/accuracy/determiner_noun_agreement_2": 0.983, "blimp/accuracy/ellipsis_n_bar_1": 0.838, "blimp/accuracy/tough_vs_raising_2": 0.877, "blimp/accuracy/tough_vs_raising_1": 0.602, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.938, "blimp/accuracy/principle_A_reconstruction": 0.397, "blimp/accuracy/wh_vs_that_with_gap": 0.497, "blimp/accuracy/principle_A_domain_2": 0.856, "blimp/accuracy/determiner_noun_agreement_1": 0.994, "blimp/accuracy/ellipsis_n_bar_2": 0.91, "blimp/accuracy/principle_A_domain_3": 0.551, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.939, "blimp/accuracy/animate_subject_trans": 0.915, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.933, "blimp/accuracy/distractor_agreement_relative_clause": 0.715, "blimp/accuracy/transitive": 0.865, "blimp/accuracy/sentential_subject_island": 0.341, "blimp/accuracy/adjunct_island": 0.854, "blimp/accuracy/intransitive": 0.782, "blimp/accuracy/existential_there_subject_raising": 0.901, "blimp/accuracy/irregular_past_participle_adjectives": 0.955, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.745, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.405, "blimp/accuracy/only_npi_scope": 0.718, "blimp/accuracy/superlative_quantifiers_2": 0.824, "blimp/accuracy/passive_1": 0.903, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.919, "blimp/accuracy/inchoative": 0.607, "blimp/accuracy/anaphor_gender_agreement": 0.977, "blimp/accuracy/principle_A_c_command": 0.642, "blimp/accuracy/only_npi_licensor_present": 0.805, "blimp/accuracy/expletive_it_object_raising": 0.785, "blimp/accuracy/left_branch_island_simple_question": 0.818, "blimp/accuracy/wh_questions_subject_gap": 0.933, "blimp/accuracy/existential_there_quantifiers_2": 0.538, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.948, "blimp/accuracy/sentential_negation_npi_scope": 0.7, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.851, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.902, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.919, "blimp/accuracy/principle_A_case_2": 0.952, "blimp/accuracy/distractor_agreement_relational_noun": 0.87, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.99, "blimp/accuracy/superlative_quantifiers_1": 0.765, "blimp/accuracy/wh_island": 0.794, "blimp/accuracy/principle_A_domain_1": 0.983, "blimp/accuracy/complex_NP_island": 0.602, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.979, "blimp/accuracy/irregular_past_participle_verbs": 0.882, "blimp/accuracy/drop_argument": 0.762, "blimp/accuracy/wh_questions_object_gap": 0.855, "blimp/accuracy/animate_subject_passive": 0.798, "blimp/accuracy/existential_there_quantifiers_1": 0.991, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.902, "blimp/accuracy/npi_present_2": 0.547, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.953, "blimp/accuracy/anaphor_number_agreement": 0.993, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.97, "blimp/accuracy/existential_there_object_raising": 0.845, "blimp/accuracy/matrix_question_npi_licensor_present": 0.328, "blimp/accuracy/npi_present_1": 0.622, "blimp/accuracy/wh_vs_that_no_gap": 0.985, "blimp/accuracy/left_branch_island_echo_question": 0.501, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.967, "blimp/accuracy/causative": 0.794, "blimp/accuracy/group_average": 0.8081940298507458, "blimp/accuracy/seq_average": 0.8081940298507463, "cbt/accuracy/NE": 0.8161057692307693, "cbt/accuracy/V": 0.9412, "cbt/accuracy/CN": 0.874, "cbt/accuracy/P": 0.9172, "cbt/accuracy/group_average": 0.8871264423076923, "cbt/accuracy/seq_average": 0.8871548619447779, "hellaswag/accuracy/val": 0.3519219279028082, "hellaswag/accuracy/group_average": 0.3519219279028082, "hellaswag/accuracy/seq_average": 0.3519219279028082, "piqa/accuracy/val": 0.6376496191512514, "piqa/accuracy/group_average": 0.6376496191512514, "piqa/accuracy/seq_average": 0.6376496191512514, "ai2arc/accuracy/ARC-Easy": 0.3733615221987315, "ai2arc/accuracy/ARC-Challenge": 0.22660944206008585, "ai2arc/accuracy/group_average": 0.2999854821294087, "ai2arc/accuracy/seq_average": 0.32492917847025493, "mmlu/accuracy/MMLU": 0.26206649982123703, "mmlu/accuracy/group_average": 0.26206649982123703, "mmlu/accuracy/seq_average": 0.26206649982123703, "openbookqa/accuracy/test": 0.292, "openbookqa/accuracy/group_average": 0.292, "openbookqa/accuracy/seq_average": 0.292, "race/accuracy/test/high": 0.2875929102344197, "race/accuracy/test/middle": 0.3607242339832869, "race/accuracy/group_average": 0.3241585721088533, "race/accuracy/seq_average": 0.30887717875962706, "siqa/accuracy/dev": 0.38178096212896623, "siqa/accuracy/group_average": 0.38178096212896623, "siqa/accuracy/seq_average": 0.38178096212896623, "winogrande/accuracy/dev": 0.5153906866614049, "winogrande/accuracy/group_average": 0.5153906866614049, "winogrande/accuracy/seq_average": 0.5153906866614049, "commonsenseqa/accuracy/dev_rand_split": 0.27927927927927926, "commonsenseqa/accuracy/group_average": 0.27927927927927926, "commonsenseqa/accuracy/seq_average": 0.27927927927927926}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-220000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.32483152359251, "val/accuracy": 0.5170656234499008, "val/perplexity": 10.224957276600021, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.4504910107725157, "lambada/accuracy/total": 0.35248447204968947, "lambada/accuracy/openai_last_token": 0.796972049689441, "lambada/perplexity": 6.999316116727751, "lambada/lm_loss": 2.929496158302655, "lambada/lm_perplexity": 18.718197112202002, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.43477504774979514, "mean_loss": 2.387661267182513, "blimp/accuracy/passive_2": 0.934, "blimp/accuracy/determiner_noun_agreement_2": 0.992, "blimp/accuracy/ellipsis_n_bar_1": 0.863, "blimp/accuracy/tough_vs_raising_2": 0.886, "blimp/accuracy/tough_vs_raising_1": 0.609, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.909, "blimp/accuracy/principle_A_reconstruction": 0.392, "blimp/accuracy/wh_vs_that_with_gap": 0.493, "blimp/accuracy/principle_A_domain_2": 0.866, "blimp/accuracy/determiner_noun_agreement_1": 0.994, "blimp/accuracy/ellipsis_n_bar_2": 0.914, "blimp/accuracy/principle_A_domain_3": 0.551, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.942, "blimp/accuracy/animate_subject_trans": 0.913, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.89, "blimp/accuracy/distractor_agreement_relative_clause": 0.665, "blimp/accuracy/transitive": 0.879, "blimp/accuracy/sentential_subject_island": 0.351, "blimp/accuracy/adjunct_island": 0.863, "blimp/accuracy/intransitive": 0.792, "blimp/accuracy/existential_there_subject_raising": 0.912, "blimp/accuracy/irregular_past_participle_adjectives": 0.985, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.736, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.432, "blimp/accuracy/only_npi_scope": 0.755, "blimp/accuracy/superlative_quantifiers_2": 0.864, "blimp/accuracy/passive_1": 0.9, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.929, "blimp/accuracy/inchoative": 0.646, "blimp/accuracy/anaphor_gender_agreement": 0.98, "blimp/accuracy/principle_A_c_command": 0.643, "blimp/accuracy/only_npi_licensor_present": 0.7, "blimp/accuracy/expletive_it_object_raising": 0.783, "blimp/accuracy/left_branch_island_simple_question": 0.783, "blimp/accuracy/wh_questions_subject_gap": 0.922, "blimp/accuracy/existential_there_quantifiers_2": 0.391, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.951, "blimp/accuracy/sentential_negation_npi_scope": 0.765, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.822, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.87, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.906, "blimp/accuracy/principle_A_case_2": 0.962, "blimp/accuracy/distractor_agreement_relational_noun": 0.857, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.989, "blimp/accuracy/superlative_quantifiers_1": 0.857, "blimp/accuracy/wh_island": 0.825, "blimp/accuracy/principle_A_domain_1": 0.982, "blimp/accuracy/complex_NP_island": 0.581, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.977, "blimp/accuracy/irregular_past_participle_verbs": 0.89, "blimp/accuracy/drop_argument": 0.751, "blimp/accuracy/wh_questions_object_gap": 0.832, "blimp/accuracy/animate_subject_passive": 0.787, "blimp/accuracy/existential_there_quantifiers_1": 0.974, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.886, "blimp/accuracy/npi_present_2": 0.557, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.942, "blimp/accuracy/anaphor_number_agreement": 0.992, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.965, "blimp/accuracy/existential_there_object_raising": 0.865, "blimp/accuracy/matrix_question_npi_licensor_present": 0.298, "blimp/accuracy/npi_present_1": 0.611, "blimp/accuracy/wh_vs_that_no_gap": 0.98, "blimp/accuracy/left_branch_island_echo_question": 0.505, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.959, "blimp/accuracy/causative": 0.762, "blimp/accuracy/group_average": 0.805358208955224, "blimp/accuracy/seq_average": 0.8053582089552239, "cbt/accuracy/NE": 0.8145032051282052, "cbt/accuracy/V": 0.9368, "cbt/accuracy/CN": 0.874, "cbt/accuracy/P": 0.9176, "cbt/accuracy/group_average": 0.8857258012820513, "cbt/accuracy/seq_average": 0.8857543017206883, "hellaswag/accuracy/val": 0.35859390559649473, "hellaswag/accuracy/group_average": 0.35859390559649473, "hellaswag/accuracy/seq_average": 0.35859390559649473, "piqa/accuracy/val": 0.6409140369967355, "piqa/accuracy/group_average": 0.6409140369967355, "piqa/accuracy/seq_average": 0.6409140369967355, "ai2arc/accuracy/ARC-Easy": 0.3733615221987315, "ai2arc/accuracy/ARC-Challenge": 0.23261802575107296, "ai2arc/accuracy/group_average": 0.30298977397490223, "ai2arc/accuracy/seq_average": 0.32691218130311617, "mmlu/accuracy/MMLU": 0.2604933857704684, "mmlu/accuracy/group_average": 0.2604933857704684, "mmlu/accuracy/seq_average": 0.2604933857704684, "openbookqa/accuracy/test": 0.294, "openbookqa/accuracy/group_average": 0.294, "openbookqa/accuracy/seq_average": 0.294, "race/accuracy/test/high": 0.2890222984562607, "race/accuracy/test/middle": 0.36559888579387184, "race/accuracy/group_average": 0.3273105921250663, "race/accuracy/seq_average": 0.31130928252938794, "siqa/accuracy/dev": 0.37615148413510746, "siqa/accuracy/group_average": 0.37615148413510746, "siqa/accuracy/seq_average": 0.37615148413510746, "winogrande/accuracy/dev": 0.5059194948697711, "winogrande/accuracy/group_average": 0.5059194948697711, "winogrande/accuracy/seq_average": 0.5059194948697711, "commonsenseqa/accuracy/dev_rand_split": 0.2833742833742834, "commonsenseqa/accuracy/group_average": 0.2833742833742834, "commonsenseqa/accuracy/seq_average": 0.2833742833742834}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-240000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.3118973989335316, "val/accuracy": 0.5190352182539683, "val/perplexity": 10.09355800366742, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.4005498471467392, "lambada/accuracy/total": 0.358695652173913, "lambada/accuracy/openai_last_token": 0.7989130434782609, "lambada/perplexity": 6.867405409793414, "lambada/lm_loss": 2.913757980400743, "lambada/lm_perplexity": 18.425912839956364, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4388654352139406, "mean_loss": 2.3562236230401354, "blimp/accuracy/passive_2": 0.911, "blimp/accuracy/determiner_noun_agreement_2": 0.983, "blimp/accuracy/ellipsis_n_bar_1": 0.857, "blimp/accuracy/tough_vs_raising_2": 0.91, "blimp/accuracy/tough_vs_raising_1": 0.584, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.919, "blimp/accuracy/principle_A_reconstruction": 0.446, "blimp/accuracy/wh_vs_that_with_gap": 0.474, "blimp/accuracy/principle_A_domain_2": 0.877, "blimp/accuracy/determiner_noun_agreement_1": 0.992, "blimp/accuracy/ellipsis_n_bar_2": 0.904, "blimp/accuracy/principle_A_domain_3": 0.541, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.93, "blimp/accuracy/animate_subject_trans": 0.903, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.898, "blimp/accuracy/distractor_agreement_relative_clause": 0.71, "blimp/accuracy/transitive": 0.889, "blimp/accuracy/sentential_subject_island": 0.286, "blimp/accuracy/adjunct_island": 0.866, "blimp/accuracy/intransitive": 0.801, "blimp/accuracy/existential_there_subject_raising": 0.904, "blimp/accuracy/irregular_past_participle_adjectives": 0.935, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.731, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.391, "blimp/accuracy/only_npi_scope": 0.668, "blimp/accuracy/superlative_quantifiers_2": 0.805, "blimp/accuracy/passive_1": 0.895, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.914, "blimp/accuracy/inchoative": 0.635, "blimp/accuracy/anaphor_gender_agreement": 0.961, "blimp/accuracy/principle_A_c_command": 0.651, "blimp/accuracy/only_npi_licensor_present": 0.587, "blimp/accuracy/expletive_it_object_raising": 0.773, "blimp/accuracy/left_branch_island_simple_question": 0.795, "blimp/accuracy/wh_questions_subject_gap": 0.925, "blimp/accuracy/existential_there_quantifiers_2": 0.595, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.953, "blimp/accuracy/sentential_negation_npi_scope": 0.727, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.803, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.904, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.903, "blimp/accuracy/principle_A_case_2": 0.96, "blimp/accuracy/distractor_agreement_relational_noun": 0.873, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.988, "blimp/accuracy/superlative_quantifiers_1": 0.922, "blimp/accuracy/wh_island": 0.79, "blimp/accuracy/principle_A_domain_1": 0.98, "blimp/accuracy/complex_NP_island": 0.575, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.973, "blimp/accuracy/irregular_past_participle_verbs": 0.923, "blimp/accuracy/drop_argument": 0.755, "blimp/accuracy/wh_questions_object_gap": 0.846, "blimp/accuracy/animate_subject_passive": 0.78, "blimp/accuracy/existential_there_quantifiers_1": 0.991, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.897, "blimp/accuracy/npi_present_2": 0.568, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.948, "blimp/accuracy/anaphor_number_agreement": 0.991, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.964, "blimp/accuracy/existential_there_object_raising": 0.895, "blimp/accuracy/matrix_question_npi_licensor_present": 0.296, "blimp/accuracy/npi_present_1": 0.592, "blimp/accuracy/wh_vs_that_no_gap": 0.984, "blimp/accuracy/left_branch_island_echo_question": 0.491, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.975, "blimp/accuracy/causative": 0.767, "blimp/accuracy/group_average": 0.8038805970149253, "blimp/accuracy/seq_average": 0.8038805970149254, "cbt/accuracy/NE": 0.8189102564102564, "cbt/accuracy/V": 0.9416, "cbt/accuracy/CN": 0.882, "cbt/accuracy/P": 0.9232, "cbt/accuracy/group_average": 0.8914275641025641, "cbt/accuracy/seq_average": 0.8914565826330533, "hellaswag/accuracy/val": 0.3558056164110735, "hellaswag/accuracy/group_average": 0.3558056164110735, "hellaswag/accuracy/seq_average": 0.3558056164110735, "piqa/accuracy/val": 0.6316648531011969, "piqa/accuracy/group_average": 0.6316648531011969, "piqa/accuracy/seq_average": 0.6316648531011969, "ai2arc/accuracy/ARC-Easy": 0.3792811839323467, "ai2arc/accuracy/ARC-Challenge": 0.2223175965665236, "ai2arc/accuracy/group_average": 0.30079939024943514, "ai2arc/accuracy/seq_average": 0.3274787535410765, "mmlu/accuracy/MMLU": 0.2621380050053629, "mmlu/accuracy/group_average": 0.2621380050053629, "mmlu/accuracy/seq_average": 0.2621380050053629, "openbookqa/accuracy/test": 0.28, "openbookqa/accuracy/group_average": 0.28, "openbookqa/accuracy/seq_average": 0.28, "race/accuracy/test/high": 0.2938822184105203, "race/accuracy/test/middle": 0.36768802228412256, "race/accuracy/group_average": 0.33078512034732144, "race/accuracy/seq_average": 0.31536278881232266, "siqa/accuracy/dev": 0.3781985670419652, "siqa/accuracy/group_average": 0.3781985670419652, "siqa/accuracy/seq_average": 0.3781985670419652, "winogrande/accuracy/dev": 0.510655090765588, "winogrande/accuracy/group_average": 0.510655090765588, "winogrande/accuracy/seq_average": 0.510655090765588, "commonsenseqa/accuracy/dev_rand_split": 0.276003276003276, "commonsenseqa/accuracy/group_average": 0.276003276003276, "commonsenseqa/accuracy/seq_average": 0.276003276003276}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-260000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.3007604205419145, "val/accuracy": 0.5207684229290674, "val/perplexity": 9.981769912505897, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.4085600479789404, "lambada/accuracy/total": 0.3532608695652174, "lambada/accuracy/openai_last_token": 0.7991071428571429, "lambada/perplexity": 6.9345331829936745, "lambada/lm_loss": 2.888929688662226, "lambada/lm_perplexity": 17.9740614608688, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4370146462471424, "mean_loss": 2.3546602342604275, "blimp/accuracy/passive_2": 0.913, "blimp/accuracy/determiner_noun_agreement_2": 0.986, "blimp/accuracy/ellipsis_n_bar_1": 0.853, "blimp/accuracy/tough_vs_raising_2": 0.878, "blimp/accuracy/tough_vs_raising_1": 0.605, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.927, "blimp/accuracy/principle_A_reconstruction": 0.435, "blimp/accuracy/wh_vs_that_with_gap": 0.496, "blimp/accuracy/principle_A_domain_2": 0.874, "blimp/accuracy/determiner_noun_agreement_1": 0.992, "blimp/accuracy/ellipsis_n_bar_2": 0.891, "blimp/accuracy/principle_A_domain_3": 0.543, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.933, "blimp/accuracy/animate_subject_trans": 0.916, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.923, "blimp/accuracy/distractor_agreement_relative_clause": 0.726, "blimp/accuracy/transitive": 0.871, "blimp/accuracy/sentential_subject_island": 0.276, "blimp/accuracy/adjunct_island": 0.848, "blimp/accuracy/intransitive": 0.766, "blimp/accuracy/existential_there_subject_raising": 0.903, "blimp/accuracy/irregular_past_participle_adjectives": 0.895, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.763, "blimp/accuracy/principle_A_case_1": 0.999, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.403, "blimp/accuracy/only_npi_scope": 0.731, "blimp/accuracy/superlative_quantifiers_2": 0.801, "blimp/accuracy/passive_1": 0.894, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.92, "blimp/accuracy/inchoative": 0.639, "blimp/accuracy/anaphor_gender_agreement": 0.975, "blimp/accuracy/principle_A_c_command": 0.683, "blimp/accuracy/only_npi_licensor_present": 0.621, "blimp/accuracy/expletive_it_object_raising": 0.776, "blimp/accuracy/left_branch_island_simple_question": 0.828, "blimp/accuracy/wh_questions_subject_gap": 0.928, "blimp/accuracy/existential_there_quantifiers_2": 0.448, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.957, "blimp/accuracy/sentential_negation_npi_scope": 0.749, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.824, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.903, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.904, "blimp/accuracy/principle_A_case_2": 0.954, "blimp/accuracy/distractor_agreement_relational_noun": 0.87, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.987, "blimp/accuracy/superlative_quantifiers_1": 0.874, "blimp/accuracy/wh_island": 0.805, "blimp/accuracy/principle_A_domain_1": 0.981, "blimp/accuracy/complex_NP_island": 0.566, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.978, "blimp/accuracy/irregular_past_participle_verbs": 0.888, "blimp/accuracy/drop_argument": 0.75, "blimp/accuracy/wh_questions_object_gap": 0.835, "blimp/accuracy/animate_subject_passive": 0.811, "blimp/accuracy/existential_there_quantifiers_1": 0.992, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.924, "blimp/accuracy/npi_present_2": 0.605, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.968, "blimp/accuracy/anaphor_number_agreement": 0.989, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.973, "blimp/accuracy/existential_there_object_raising": 0.836, "blimp/accuracy/matrix_question_npi_licensor_present": 0.346, "blimp/accuracy/npi_present_1": 0.566, "blimp/accuracy/wh_vs_that_no_gap": 0.979, "blimp/accuracy/left_branch_island_echo_question": 0.455, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.97, "blimp/accuracy/causative": 0.769, "blimp/accuracy/group_average": 0.8039850746268656, "blimp/accuracy/seq_average": 0.8039850746268656, "cbt/accuracy/NE": 0.8157051282051282, "cbt/accuracy/V": 0.94, "cbt/accuracy/CN": 0.8908, "cbt/accuracy/P": 0.9264, "cbt/accuracy/group_average": 0.8932262820512821, "cbt/accuracy/seq_average": 0.8932573029211685, "hellaswag/accuracy/val": 0.3602867954590719, "hellaswag/accuracy/group_average": 0.3602867954590719, "hellaswag/accuracy/seq_average": 0.3602867954590719, "piqa/accuracy/val": 0.6523394994559304, "piqa/accuracy/group_average": 0.6523394994559304, "piqa/accuracy/seq_average": 0.6523394994559304, "ai2arc/accuracy/ARC-Easy": 0.386046511627907, "ai2arc/accuracy/ARC-Challenge": 0.23261802575107296, "ai2arc/accuracy/group_average": 0.30933226868949, "ai2arc/accuracy/seq_average": 0.33541076487252125, "mmlu/accuracy/MMLU": 0.2640686449767608, "mmlu/accuracy/group_average": 0.2640686449767608, "mmlu/accuracy/seq_average": 0.2640686449767608, "openbookqa/accuracy/test": 0.278, "openbookqa/accuracy/group_average": 0.278, "openbookqa/accuracy/seq_average": 0.278, "race/accuracy/test/high": 0.2893081761006289, "race/accuracy/test/middle": 0.3607242339832869, "race/accuracy/group_average": 0.32501620504195794, "race/accuracy/seq_average": 0.3100932306445075, "siqa/accuracy/dev": 0.3812691914022518, "siqa/accuracy/group_average": 0.3812691914022518, "siqa/accuracy/seq_average": 0.3812691914022518, "winogrande/accuracy/dev": 0.516179952644041, "winogrande/accuracy/group_average": 0.516179952644041, "winogrande/accuracy/seq_average": 0.516179952644041, "commonsenseqa/accuracy/dev_rand_split": 0.27354627354627353, "commonsenseqa/accuracy/group_average": 0.27354627354627353, "commonsenseqa/accuracy/seq_average": 0.27354627354627353}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-280000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.2882167271205356, "val/accuracy": 0.5223466176835317, "val/perplexity": 9.857343664748617, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.41845703125, "lambada/accuracy/total": 0.36645962732919257, "lambada/accuracy/openai_last_token": 0.8026009316770186, "lambada/perplexity": 6.663270923589426, "lambada/lm_loss": 2.8813995113805175, "lambada/lm_perplexity": 17.839221911607776, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.44440312250636216, "mean_loss": 2.353336879185268, "blimp/accuracy/passive_2": 0.919, "blimp/accuracy/determiner_noun_agreement_2": 0.984, "blimp/accuracy/ellipsis_n_bar_1": 0.851, "blimp/accuracy/tough_vs_raising_2": 0.888, "blimp/accuracy/tough_vs_raising_1": 0.609, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.905, "blimp/accuracy/principle_A_reconstruction": 0.425, "blimp/accuracy/wh_vs_that_with_gap": 0.486, "blimp/accuracy/principle_A_domain_2": 0.877, "blimp/accuracy/determiner_noun_agreement_1": 0.991, "blimp/accuracy/ellipsis_n_bar_2": 0.912, "blimp/accuracy/principle_A_domain_3": 0.554, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.93, "blimp/accuracy/animate_subject_trans": 0.91, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.911, "blimp/accuracy/distractor_agreement_relative_clause": 0.726, "blimp/accuracy/transitive": 0.879, "blimp/accuracy/sentential_subject_island": 0.294, "blimp/accuracy/adjunct_island": 0.845, "blimp/accuracy/intransitive": 0.78, "blimp/accuracy/existential_there_subject_raising": 0.893, "blimp/accuracy/irregular_past_participle_adjectives": 0.943, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.767, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.375, "blimp/accuracy/only_npi_scope": 0.677, "blimp/accuracy/superlative_quantifiers_2": 0.809, "blimp/accuracy/passive_1": 0.892, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.914, "blimp/accuracy/inchoative": 0.63, "blimp/accuracy/anaphor_gender_agreement": 0.98, "blimp/accuracy/principle_A_c_command": 0.68, "blimp/accuracy/only_npi_licensor_present": 0.656, "blimp/accuracy/expletive_it_object_raising": 0.769, "blimp/accuracy/left_branch_island_simple_question": 0.823, "blimp/accuracy/wh_questions_subject_gap": 0.923, "blimp/accuracy/existential_there_quantifiers_2": 0.52, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.95, "blimp/accuracy/sentential_negation_npi_scope": 0.711, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.809, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.907, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.907, "blimp/accuracy/principle_A_case_2": 0.956, "blimp/accuracy/distractor_agreement_relational_noun": 0.898, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.99, "blimp/accuracy/superlative_quantifiers_1": 0.889, "blimp/accuracy/wh_island": 0.778, "blimp/accuracy/principle_A_domain_1": 0.989, "blimp/accuracy/complex_NP_island": 0.569, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.97, "blimp/accuracy/irregular_past_participle_verbs": 0.91, "blimp/accuracy/drop_argument": 0.731, "blimp/accuracy/wh_questions_object_gap": 0.83, "blimp/accuracy/animate_subject_passive": 0.792, "blimp/accuracy/existential_there_quantifiers_1": 0.992, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.899, "blimp/accuracy/npi_present_2": 0.553, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.967, "blimp/accuracy/anaphor_number_agreement": 0.993, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.974, "blimp/accuracy/existential_there_object_raising": 0.845, "blimp/accuracy/matrix_question_npi_licensor_present": 0.334, "blimp/accuracy/npi_present_1": 0.566, "blimp/accuracy/wh_vs_that_no_gap": 0.98, "blimp/accuracy/left_branch_island_echo_question": 0.502, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.972, "blimp/accuracy/causative": 0.753, "blimp/accuracy/group_average": 0.8036268656716419, "blimp/accuracy/seq_average": 0.8036268656716418, "cbt/accuracy/NE": 0.8173076923076923, "cbt/accuracy/V": 0.9428, "cbt/accuracy/CN": 0.8928, "cbt/accuracy/P": 0.9284, "cbt/accuracy/group_average": 0.895326923076923, "cbt/accuracy/seq_average": 0.8953581432573029, "hellaswag/accuracy/val": 0.360884285998805, "hellaswag/accuracy/group_average": 0.360884285998805, "hellaswag/accuracy/seq_average": 0.360884285998805, "piqa/accuracy/val": 0.6420021762785637, "piqa/accuracy/group_average": 0.6420021762785637, "piqa/accuracy/seq_average": 0.6420021762785637, "ai2arc/accuracy/ARC-Easy": 0.3839323467230444, "ai2arc/accuracy/ARC-Challenge": 0.23004291845493563, "ai2arc/accuracy/group_average": 0.30698763258899, "ai2arc/accuracy/seq_average": 0.33314447592067986, "mmlu/accuracy/MMLU": 0.262567036110118, "mmlu/accuracy/group_average": 0.262567036110118, "mmlu/accuracy/seq_average": 0.262567036110118, "openbookqa/accuracy/test": 0.284, "openbookqa/accuracy/group_average": 0.284, "openbookqa/accuracy/seq_average": 0.284, "race/accuracy/test/high": 0.29531160663236133, "race/accuracy/test/middle": 0.3711699164345404, "race/accuracy/group_average": 0.33324076153345084, "race/accuracy/seq_average": 0.31738954195379004, "siqa/accuracy/dev": 0.3843398157625384, "siqa/accuracy/group_average": 0.3843398157625384, "siqa/accuracy/seq_average": 0.3843398157625384, "winogrande/accuracy/dev": 0.4972375690607735, "winogrande/accuracy/group_average": 0.4972375690607735, "winogrande/accuracy/seq_average": 0.4972375690607735, "commonsenseqa/accuracy/dev_rand_split": 0.28665028665028663, "commonsenseqa/accuracy/group_average": 0.28665028665028663, "commonsenseqa/accuracy/seq_average": 0.28665028665028663}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-300000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.2784590948195684, "val/accuracy": 0.5241553896949405, "val/perplexity": 9.761627072899175, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.475020698879076, "lambada/accuracy/total": 0.35403726708074534, "lambada/accuracy/openai_last_token": 0.7994953416149069, "lambada/perplexity": 6.796557642654688, "lambada/lm_loss": 2.8831330943316367, "lambada/lm_perplexity": 17.87017450426659, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4390963283878429, "mean_loss": 2.3767398968493225, "blimp/accuracy/passive_2": 0.924, "blimp/accuracy/determiner_noun_agreement_2": 0.991, "blimp/accuracy/ellipsis_n_bar_1": 0.848, "blimp/accuracy/tough_vs_raising_2": 0.88, "blimp/accuracy/tough_vs_raising_1": 0.619, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.921, "blimp/accuracy/principle_A_reconstruction": 0.461, "blimp/accuracy/wh_vs_that_with_gap": 0.51, "blimp/accuracy/principle_A_domain_2": 0.854, "blimp/accuracy/determiner_noun_agreement_1": 0.992, "blimp/accuracy/ellipsis_n_bar_2": 0.895, "blimp/accuracy/principle_A_domain_3": 0.53, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.928, "blimp/accuracy/animate_subject_trans": 0.911, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.91, "blimp/accuracy/distractor_agreement_relative_clause": 0.687, "blimp/accuracy/transitive": 0.884, "blimp/accuracy/sentential_subject_island": 0.352, "blimp/accuracy/adjunct_island": 0.82, "blimp/accuracy/intransitive": 0.772, "blimp/accuracy/existential_there_subject_raising": 0.893, "blimp/accuracy/irregular_past_participle_adjectives": 0.957, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.793, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.435, "blimp/accuracy/only_npi_scope": 0.752, "blimp/accuracy/superlative_quantifiers_2": 0.858, "blimp/accuracy/passive_1": 0.888, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.93, "blimp/accuracy/inchoative": 0.622, "blimp/accuracy/anaphor_gender_agreement": 0.973, "blimp/accuracy/principle_A_c_command": 0.689, "blimp/accuracy/only_npi_licensor_present": 0.671, "blimp/accuracy/expletive_it_object_raising": 0.791, "blimp/accuracy/left_branch_island_simple_question": 0.848, "blimp/accuracy/wh_questions_subject_gap": 0.926, "blimp/accuracy/existential_there_quantifiers_2": 0.486, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.945, "blimp/accuracy/sentential_negation_npi_scope": 0.723, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.823, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.892, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.909, "blimp/accuracy/principle_A_case_2": 0.964, "blimp/accuracy/distractor_agreement_relational_noun": 0.854, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.984, "blimp/accuracy/superlative_quantifiers_1": 0.855, "blimp/accuracy/wh_island": 0.781, "blimp/accuracy/principle_A_domain_1": 0.992, "blimp/accuracy/complex_NP_island": 0.535, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.977, "blimp/accuracy/irregular_past_participle_verbs": 0.898, "blimp/accuracy/drop_argument": 0.746, "blimp/accuracy/wh_questions_object_gap": 0.828, "blimp/accuracy/animate_subject_passive": 0.796, "blimp/accuracy/existential_there_quantifiers_1": 0.99, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.9, "blimp/accuracy/npi_present_2": 0.549, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.962, "blimp/accuracy/anaphor_number_agreement": 0.99, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.971, "blimp/accuracy/existential_there_object_raising": 0.847, "blimp/accuracy/matrix_question_npi_licensor_present": 0.323, "blimp/accuracy/npi_present_1": 0.554, "blimp/accuracy/wh_vs_that_no_gap": 0.982, "blimp/accuracy/left_branch_island_echo_question": 0.488, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.96, "blimp/accuracy/causative": 0.732, "blimp/accuracy/group_average": 0.8052388059701492, "blimp/accuracy/seq_average": 0.8052388059701493, "cbt/accuracy/NE": 0.8181089743589743, "cbt/accuracy/V": 0.9436, "cbt/accuracy/CN": 0.8816, "cbt/accuracy/P": 0.9252, "cbt/accuracy/group_average": 0.8921272435897436, "cbt/accuracy/seq_average": 0.8921568627450981, "hellaswag/accuracy/val": 0.3655646285600478, "hellaswag/accuracy/group_average": 0.3655646285600478, "hellaswag/accuracy/seq_average": 0.3655646285600478, "piqa/accuracy/val": 0.6479869423286181, "piqa/accuracy/group_average": 0.6479869423286181, "piqa/accuracy/seq_average": 0.6479869423286181, "ai2arc/accuracy/ARC-Easy": 0.3898520084566596, "ai2arc/accuracy/ARC-Challenge": 0.21888412017167383, "ai2arc/accuracy/group_average": 0.30436806431416674, "ai2arc/accuracy/seq_average": 0.33342776203966007, "mmlu/accuracy/MMLU": 0.26035037540221667, "mmlu/accuracy/group_average": 0.26035037540221667, "mmlu/accuracy/seq_average": 0.26035037540221667, "openbookqa/accuracy/test": 0.29, "openbookqa/accuracy/group_average": 0.29, "openbookqa/accuracy/seq_average": 0.29, "race/accuracy/test/high": 0.2918810748999428, "race/accuracy/test/middle": 0.3607242339832869, "race/accuracy/group_average": 0.3263026544416149, "race/accuracy/seq_average": 0.31191730847182814, "siqa/accuracy/dev": 0.3797338792221085, "siqa/accuracy/group_average": 0.3797338792221085, "siqa/accuracy/seq_average": 0.3797338792221085, "winogrande/accuracy/dev": 0.5019731649565904, "winogrande/accuracy/group_average": 0.5019731649565904, "winogrande/accuracy/seq_average": 0.5019731649565904, "commonsenseqa/accuracy/dev_rand_split": 0.276003276003276, "commonsenseqa/accuracy/group_average": 0.276003276003276, "commonsenseqa/accuracy/seq_average": 0.276003276003276}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-320000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.269898914155506, "val/accuracy": 0.5251920185391865, "val/perplexity": 9.67842241311943, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.36659856760724, "lambada/accuracy/total": 0.36898291925465837, "lambada/accuracy/openai_last_token": 0.8041537267080745, "lambada/perplexity": 6.61053520771611, "lambada/lm_loss": 2.878546524662503, "lambada/lm_perplexity": 17.788399380909407, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4470874688969224, "mean_loss": 2.318248740881373, "blimp/accuracy/passive_2": 0.93, "blimp/accuracy/determiner_noun_agreement_2": 0.99, "blimp/accuracy/ellipsis_n_bar_1": 0.85, "blimp/accuracy/tough_vs_raising_2": 0.901, "blimp/accuracy/tough_vs_raising_1": 0.615, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.916, "blimp/accuracy/principle_A_reconstruction": 0.482, "blimp/accuracy/wh_vs_that_with_gap": 0.482, "blimp/accuracy/principle_A_domain_2": 0.875, "blimp/accuracy/determiner_noun_agreement_1": 0.992, "blimp/accuracy/ellipsis_n_bar_2": 0.905, "blimp/accuracy/principle_A_domain_3": 0.567, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.934, "blimp/accuracy/animate_subject_trans": 0.918, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.913, "blimp/accuracy/distractor_agreement_relative_clause": 0.677, "blimp/accuracy/transitive": 0.882, "blimp/accuracy/sentential_subject_island": 0.343, "blimp/accuracy/adjunct_island": 0.862, "blimp/accuracy/intransitive": 0.776, "blimp/accuracy/existential_there_subject_raising": 0.897, "blimp/accuracy/irregular_past_participle_adjectives": 0.96, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.762, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.406, "blimp/accuracy/only_npi_scope": 0.71, "blimp/accuracy/superlative_quantifiers_2": 0.837, "blimp/accuracy/passive_1": 0.897, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.916, "blimp/accuracy/inchoative": 0.642, "blimp/accuracy/anaphor_gender_agreement": 0.972, "blimp/accuracy/principle_A_c_command": 0.679, "blimp/accuracy/only_npi_licensor_present": 0.667, "blimp/accuracy/expletive_it_object_raising": 0.785, "blimp/accuracy/left_branch_island_simple_question": 0.824, "blimp/accuracy/wh_questions_subject_gap": 0.94, "blimp/accuracy/existential_there_quantifiers_2": 0.46, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.947, "blimp/accuracy/sentential_negation_npi_scope": 0.729, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.819, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.916, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.907, "blimp/accuracy/principle_A_case_2": 0.959, "blimp/accuracy/distractor_agreement_relational_noun": 0.867, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.985, "blimp/accuracy/superlative_quantifiers_1": 0.883, "blimp/accuracy/wh_island": 0.829, "blimp/accuracy/principle_A_domain_1": 0.989, "blimp/accuracy/complex_NP_island": 0.576, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.978, "blimp/accuracy/irregular_past_participle_verbs": 0.922, "blimp/accuracy/drop_argument": 0.736, "blimp/accuracy/wh_questions_object_gap": 0.853, "blimp/accuracy/animate_subject_passive": 0.793, "blimp/accuracy/existential_there_quantifiers_1": 0.99, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.891, "blimp/accuracy/npi_present_2": 0.555, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.965, "blimp/accuracy/anaphor_number_agreement": 0.987, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.971, "blimp/accuracy/existential_there_object_raising": 0.847, "blimp/accuracy/matrix_question_npi_licensor_present": 0.311, "blimp/accuracy/npi_present_1": 0.561, "blimp/accuracy/wh_vs_that_no_gap": 0.982, "blimp/accuracy/left_branch_island_echo_question": 0.485, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.975, "blimp/accuracy/causative": 0.75, "blimp/accuracy/group_average": 0.8077910447761194, "blimp/accuracy/seq_average": 0.8077910447761194, "cbt/accuracy/NE": 0.8209134615384616, "cbt/accuracy/V": 0.9452, "cbt/accuracy/CN": 0.8928, "cbt/accuracy/P": 0.9244, "cbt/accuracy/group_average": 0.8958283653846154, "cbt/accuracy/seq_average": 0.8958583433373349, "hellaswag/accuracy/val": 0.3639713204540928, "hellaswag/accuracy/group_average": 0.3639713204540928, "hellaswag/accuracy/seq_average": 0.3639713204540928, "piqa/accuracy/val": 0.64689880304679, "piqa/accuracy/group_average": 0.64689880304679, "piqa/accuracy/seq_average": 0.64689880304679, "ai2arc/accuracy/ARC-Easy": 0.38520084566596197, "ai2arc/accuracy/ARC-Challenge": 0.2257510729613734, "ai2arc/accuracy/group_average": 0.30547595931366767, "ai2arc/accuracy/seq_average": 0.33257790368271956, "mmlu/accuracy/MMLU": 0.2593493028244548, "mmlu/accuracy/group_average": 0.2593493028244548, "mmlu/accuracy/seq_average": 0.2593493028244548, "openbookqa/accuracy/test": 0.29, "openbookqa/accuracy/group_average": 0.29, "openbookqa/accuracy/seq_average": 0.29, "race/accuracy/test/high": 0.296169239565466, "race/accuracy/test/middle": 0.36629526462395545, "race/accuracy/group_average": 0.33123225209471074, "race/accuracy/seq_average": 0.3165788406972031, "siqa/accuracy/dev": 0.38075742067553736, "siqa/accuracy/group_average": 0.38075742067553736, "siqa/accuracy/seq_average": 0.38075742067553736, "winogrande/accuracy/dev": 0.5059194948697711, "winogrande/accuracy/group_average": 0.5059194948697711, "winogrande/accuracy/seq_average": 0.5059194948697711, "commonsenseqa/accuracy/dev_rand_split": 0.27436527436527436, "commonsenseqa/accuracy/group_average": 0.27436527436527436, "commonsenseqa/accuracy/seq_average": 0.27436527436527436}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-340000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.261232164171007, "val/accuracy": 0.526702396453373, "val/perplexity": 9.594904383531768, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.49296882582007, "lambada/accuracy/total": 0.3924689440993789, "lambada/accuracy/openai_last_token": 0.8055124223602484, "lambada/perplexity": 6.247134449737527, "lambada/lm_loss": 2.8688472079335714, "lambada/lm_perplexity": 17.616698099856446, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.459585670276376, "mean_loss": 2.3771004949955383, "blimp/accuracy/passive_2": 0.924, "blimp/accuracy/determiner_noun_agreement_2": 0.989, "blimp/accuracy/ellipsis_n_bar_1": 0.844, "blimp/accuracy/tough_vs_raising_2": 0.911, "blimp/accuracy/tough_vs_raising_1": 0.623, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.925, "blimp/accuracy/principle_A_reconstruction": 0.47, "blimp/accuracy/wh_vs_that_with_gap": 0.476, "blimp/accuracy/principle_A_domain_2": 0.874, "blimp/accuracy/determiner_noun_agreement_1": 0.995, "blimp/accuracy/ellipsis_n_bar_2": 0.903, "blimp/accuracy/principle_A_domain_3": 0.567, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.93, "blimp/accuracy/animate_subject_trans": 0.923, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.923, "blimp/accuracy/distractor_agreement_relative_clause": 0.697, "blimp/accuracy/transitive": 0.884, "blimp/accuracy/sentential_subject_island": 0.329, "blimp/accuracy/adjunct_island": 0.848, "blimp/accuracy/intransitive": 0.784, "blimp/accuracy/existential_there_subject_raising": 0.9, "blimp/accuracy/irregular_past_participle_adjectives": 0.945, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.767, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.398, "blimp/accuracy/only_npi_scope": 0.75, "blimp/accuracy/superlative_quantifiers_2": 0.897, "blimp/accuracy/passive_1": 0.894, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.924, "blimp/accuracy/inchoative": 0.62, "blimp/accuracy/anaphor_gender_agreement": 0.985, "blimp/accuracy/principle_A_c_command": 0.668, "blimp/accuracy/only_npi_licensor_present": 0.629, "blimp/accuracy/expletive_it_object_raising": 0.785, "blimp/accuracy/left_branch_island_simple_question": 0.845, "blimp/accuracy/wh_questions_subject_gap": 0.931, "blimp/accuracy/existential_there_quantifiers_2": 0.645, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.946, "blimp/accuracy/sentential_negation_npi_scope": 0.74, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.829, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.911, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.91, "blimp/accuracy/principle_A_case_2": 0.96, "blimp/accuracy/distractor_agreement_relational_noun": 0.879, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.983, "blimp/accuracy/superlative_quantifiers_1": 0.86, "blimp/accuracy/wh_island": 0.802, "blimp/accuracy/principle_A_domain_1": 0.983, "blimp/accuracy/complex_NP_island": 0.577, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.974, "blimp/accuracy/irregular_past_participle_verbs": 0.915, "blimp/accuracy/drop_argument": 0.735, "blimp/accuracy/wh_questions_object_gap": 0.852, "blimp/accuracy/animate_subject_passive": 0.804, "blimp/accuracy/existential_there_quantifiers_1": 0.987, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.912, "blimp/accuracy/npi_present_2": 0.563, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.959, "blimp/accuracy/anaphor_number_agreement": 0.993, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.974, "blimp/accuracy/existential_there_object_raising": 0.832, "blimp/accuracy/matrix_question_npi_licensor_present": 0.332, "blimp/accuracy/npi_present_1": 0.556, "blimp/accuracy/wh_vs_that_no_gap": 0.982, "blimp/accuracy/left_branch_island_echo_question": 0.524, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.972, "blimp/accuracy/causative": 0.771, "blimp/accuracy/group_average": 0.8122238805970147, "blimp/accuracy/seq_average": 0.8122238805970149, "cbt/accuracy/NE": 0.8261217948717948, "cbt/accuracy/V": 0.9488, "cbt/accuracy/CN": 0.8916, "cbt/accuracy/P": 0.926, "cbt/accuracy/group_average": 0.8981304487179487, "cbt/accuracy/seq_average": 0.8981592637054822, "hellaswag/accuracy/val": 0.3723361880103565, "hellaswag/accuracy/group_average": 0.3723361880103565, "hellaswag/accuracy/seq_average": 0.3723361880103565, "piqa/accuracy/val": 0.6556039173014145, "piqa/accuracy/group_average": 0.6556039173014145, "piqa/accuracy/seq_average": 0.6556039173014145, "ai2arc/accuracy/ARC-Easy": 0.3885835095137421, "ai2arc/accuracy/ARC-Challenge": 0.23261802575107296, "ai2arc/accuracy/group_average": 0.3106007676324075, "ai2arc/accuracy/seq_average": 0.3371104815864023, "mmlu/accuracy/MMLU": 0.26006435466571326, "mmlu/accuracy/group_average": 0.26006435466571326, "mmlu/accuracy/seq_average": 0.26006435466571326, "openbookqa/accuracy/test": 0.288, "openbookqa/accuracy/group_average": 0.288, "openbookqa/accuracy/seq_average": 0.288, "race/accuracy/test/high": 0.2938822184105203, "race/accuracy/test/middle": 0.366991643454039, "race/accuracy/group_average": 0.33043693093227966, "race/accuracy/seq_average": 0.3151601134981759, "siqa/accuracy/dev": 0.38945752302968273, "siqa/accuracy/group_average": 0.38945752302968273, "siqa/accuracy/seq_average": 0.38945752302968273, "winogrande/accuracy/dev": 0.5035516969218626, "winogrande/accuracy/group_average": 0.5035516969218626, "winogrande/accuracy/seq_average": 0.5035516969218626, "commonsenseqa/accuracy/dev_rand_split": 0.2833742833742834, "commonsenseqa/accuracy/group_average": 0.2833742833742834, "commonsenseqa/accuracy/seq_average": 0.2833742833742834}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-360000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.255616203187004, "val/accuracy": 0.5276683020213294, "val/perplexity": 9.54117079895118, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.514542099851999, "lambada/accuracy/total": 0.3815993788819876, "lambada/accuracy/openai_last_token": 0.8076475155279503, "lambada/perplexity": 6.378070666226518, "lambada/lm_loss": 2.858989959414411, "lambada/lm_perplexity": 17.443898989657246, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4546338404516585, "mean_loss": 2.3850791515195016, "blimp/accuracy/passive_2": 0.925, "blimp/accuracy/determiner_noun_agreement_2": 0.987, "blimp/accuracy/ellipsis_n_bar_1": 0.844, "blimp/accuracy/tough_vs_raising_2": 0.88, "blimp/accuracy/tough_vs_raising_1": 0.616, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.92, "blimp/accuracy/principle_A_reconstruction": 0.495, "blimp/accuracy/wh_vs_that_with_gap": 0.503, "blimp/accuracy/principle_A_domain_2": 0.874, "blimp/accuracy/determiner_noun_agreement_1": 0.99, "blimp/accuracy/ellipsis_n_bar_2": 0.907, "blimp/accuracy/principle_A_domain_3": 0.567, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.938, "blimp/accuracy/animate_subject_trans": 0.911, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.925, "blimp/accuracy/distractor_agreement_relative_clause": 0.662, "blimp/accuracy/transitive": 0.879, "blimp/accuracy/sentential_subject_island": 0.338, "blimp/accuracy/adjunct_island": 0.831, "blimp/accuracy/intransitive": 0.779, "blimp/accuracy/existential_there_subject_raising": 0.897, "blimp/accuracy/irregular_past_participle_adjectives": 0.967, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.785, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.429, "blimp/accuracy/only_npi_scope": 0.744, "blimp/accuracy/superlative_quantifiers_2": 0.886, "blimp/accuracy/passive_1": 0.888, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.914, "blimp/accuracy/inchoative": 0.652, "blimp/accuracy/anaphor_gender_agreement": 0.975, "blimp/accuracy/principle_A_c_command": 0.69, "blimp/accuracy/only_npi_licensor_present": 0.577, "blimp/accuracy/expletive_it_object_raising": 0.75, "blimp/accuracy/left_branch_island_simple_question": 0.838, "blimp/accuracy/wh_questions_subject_gap": 0.927, "blimp/accuracy/existential_there_quantifiers_2": 0.534, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.952, "blimp/accuracy/sentential_negation_npi_scope": 0.719, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.848, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.907, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.903, "blimp/accuracy/principle_A_case_2": 0.959, "blimp/accuracy/distractor_agreement_relational_noun": 0.846, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.98, "blimp/accuracy/superlative_quantifiers_1": 0.898, "blimp/accuracy/wh_island": 0.785, "blimp/accuracy/principle_A_domain_1": 0.99, "blimp/accuracy/complex_NP_island": 0.569, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.978, "blimp/accuracy/irregular_past_participle_verbs": 0.904, "blimp/accuracy/drop_argument": 0.749, "blimp/accuracy/wh_questions_object_gap": 0.855, "blimp/accuracy/animate_subject_passive": 0.802, "blimp/accuracy/existential_there_quantifiers_1": 0.99, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.902, "blimp/accuracy/npi_present_2": 0.564, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.957, "blimp/accuracy/anaphor_number_agreement": 0.991, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.979, "blimp/accuracy/existential_there_object_raising": 0.837, "blimp/accuracy/matrix_question_npi_licensor_present": 0.315, "blimp/accuracy/npi_present_1": 0.602, "blimp/accuracy/wh_vs_that_no_gap": 0.979, "blimp/accuracy/left_branch_island_echo_question": 0.508, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.966, "blimp/accuracy/causative": 0.765, "blimp/accuracy/group_average": 0.8092985074626867, "blimp/accuracy/seq_average": 0.8092985074626866, "cbt/accuracy/NE": 0.8269230769230769, "cbt/accuracy/V": 0.9408, "cbt/accuracy/CN": 0.894, "cbt/accuracy/P": 0.926, "cbt/accuracy/group_average": 0.8969307692307693, "cbt/accuracy/seq_average": 0.8969587835134054, "hellaswag/accuracy/val": 0.37114120693089026, "hellaswag/accuracy/group_average": 0.37114120693089026, "hellaswag/accuracy/seq_average": 0.37114120693089026, "piqa/accuracy/val": 0.6588683351468988, "piqa/accuracy/group_average": 0.6588683351468988, "piqa/accuracy/seq_average": 0.6588683351468988, "ai2arc/accuracy/ARC-Easy": 0.3873150105708245, "ai2arc/accuracy/ARC-Challenge": 0.23433476394849787, "ai2arc/accuracy/group_average": 0.3108248872596612, "ai2arc/accuracy/seq_average": 0.33682719546742207, "mmlu/accuracy/MMLU": 0.2627815516624955, "mmlu/accuracy/group_average": 0.2627815516624955, "mmlu/accuracy/seq_average": 0.2627815516624955, "openbookqa/accuracy/test": 0.288, "openbookqa/accuracy/group_average": 0.288, "openbookqa/accuracy/seq_average": 0.288, "race/accuracy/test/high": 0.2990280160091481, "race/accuracy/test/middle": 0.3635097493036212, "race/accuracy/group_average": 0.33126888265638466, "race/accuracy/seq_average": 0.3177948925820835, "siqa/accuracy/dev": 0.38485158648925283, "siqa/accuracy/group_average": 0.38485158648925283, "siqa/accuracy/seq_average": 0.38485158648925283, "winogrande/accuracy/dev": 0.5090765588003157, "winogrande/accuracy/group_average": 0.5090765588003157, "winogrande/accuracy/seq_average": 0.5090765588003157, "commonsenseqa/accuracy/dev_rand_split": 0.28255528255528256, "commonsenseqa/accuracy/group_average": 0.28255528255528256, "commonsenseqa/accuracy/seq_average": 0.28255528255528256}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-380000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.2520538814484126, "val/accuracy": 0.5282989986359127, "val/perplexity": 9.50724254635785, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.401569816636743, "lambada/accuracy/total": 0.38955745341614906, "lambada/accuracy/openai_last_token": 0.8072593167701864, "lambada/perplexity": 6.139135607985434, "lambada/lm_loss": 2.8556100095754333, "lambada/lm_perplexity": 17.38503901399363, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4589282260260309, "mean_loss": 2.3268118490425778, "blimp/accuracy/passive_2": 0.926, "blimp/accuracy/determiner_noun_agreement_2": 0.988, "blimp/accuracy/ellipsis_n_bar_1": 0.854, "blimp/accuracy/tough_vs_raising_2": 0.894, "blimp/accuracy/tough_vs_raising_1": 0.607, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.92, "blimp/accuracy/principle_A_reconstruction": 0.432, "blimp/accuracy/wh_vs_that_with_gap": 0.472, "blimp/accuracy/principle_A_domain_2": 0.882, "blimp/accuracy/determiner_noun_agreement_1": 0.993, "blimp/accuracy/ellipsis_n_bar_2": 0.907, "blimp/accuracy/principle_A_domain_3": 0.58, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.941, "blimp/accuracy/animate_subject_trans": 0.918, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.918, "blimp/accuracy/distractor_agreement_relative_clause": 0.703, "blimp/accuracy/transitive": 0.878, "blimp/accuracy/sentential_subject_island": 0.329, "blimp/accuracy/adjunct_island": 0.856, "blimp/accuracy/intransitive": 0.775, "blimp/accuracy/existential_there_subject_raising": 0.883, "blimp/accuracy/irregular_past_participle_adjectives": 0.97, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.785, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.386, "blimp/accuracy/only_npi_scope": 0.73, "blimp/accuracy/superlative_quantifiers_2": 0.863, "blimp/accuracy/passive_1": 0.889, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.915, "blimp/accuracy/inchoative": 0.643, "blimp/accuracy/anaphor_gender_agreement": 0.978, "blimp/accuracy/principle_A_c_command": 0.669, "blimp/accuracy/only_npi_licensor_present": 0.641, "blimp/accuracy/expletive_it_object_raising": 0.798, "blimp/accuracy/left_branch_island_simple_question": 0.839, "blimp/accuracy/wh_questions_subject_gap": 0.93, "blimp/accuracy/existential_there_quantifiers_2": 0.556, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.947, "blimp/accuracy/sentential_negation_npi_scope": 0.722, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.831, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.906, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.904, "blimp/accuracy/principle_A_case_2": 0.947, "blimp/accuracy/distractor_agreement_relational_noun": 0.851, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.971, "blimp/accuracy/superlative_quantifiers_1": 0.891, "blimp/accuracy/wh_island": 0.81, "blimp/accuracy/principle_A_domain_1": 0.989, "blimp/accuracy/complex_NP_island": 0.552, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.972, "blimp/accuracy/irregular_past_participle_verbs": 0.913, "blimp/accuracy/drop_argument": 0.739, "blimp/accuracy/wh_questions_object_gap": 0.852, "blimp/accuracy/animate_subject_passive": 0.798, "blimp/accuracy/existential_there_quantifiers_1": 0.991, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.896, "blimp/accuracy/npi_present_2": 0.562, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.965, "blimp/accuracy/anaphor_number_agreement": 0.992, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.978, "blimp/accuracy/existential_there_object_raising": 0.846, "blimp/accuracy/matrix_question_npi_licensor_present": 0.343, "blimp/accuracy/npi_present_1": 0.592, "blimp/accuracy/wh_vs_that_no_gap": 0.983, "blimp/accuracy/left_branch_island_echo_question": 0.508, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.974, "blimp/accuracy/causative": 0.763, "blimp/accuracy/group_average": 0.8094925373134327, "blimp/accuracy/seq_average": 0.8094925373134328, "cbt/accuracy/NE": 0.8293269230769231, "cbt/accuracy/V": 0.9476, "cbt/accuracy/CN": 0.8944, "cbt/accuracy/P": 0.9288, "cbt/accuracy/group_average": 0.9000317307692307, "cbt/accuracy/seq_average": 0.9000600240096038, "hellaswag/accuracy/val": 0.3730332603067118, "hellaswag/accuracy/group_average": 0.3730332603067118, "hellaswag/accuracy/seq_average": 0.3730332603067118, "piqa/accuracy/val": 0.6572361262241567, "piqa/accuracy/group_average": 0.6572361262241567, "piqa/accuracy/seq_average": 0.6572361262241567, "ai2arc/accuracy/ARC-Easy": 0.38477801268498946, "ai2arc/accuracy/ARC-Challenge": 0.23605150214592274, "ai2arc/accuracy/group_average": 0.3104147574154561, "ai2arc/accuracy/seq_average": 0.3356940509915014, "mmlu/accuracy/MMLU": 0.2584912406149446, "mmlu/accuracy/group_average": 0.2584912406149446, "mmlu/accuracy/seq_average": 0.2584912406149446, "openbookqa/accuracy/test": 0.282, "openbookqa/accuracy/group_average": 0.282, "openbookqa/accuracy/seq_average": 0.282, "race/accuracy/test/high": 0.2918810748999428, "race/accuracy/test/middle": 0.366991643454039, "race/accuracy/group_average": 0.3294363591769909, "race/accuracy/seq_average": 0.3137413862991488, "siqa/accuracy/dev": 0.387410440122825, "siqa/accuracy/group_average": 0.387410440122825, "siqa/accuracy/seq_average": 0.387410440122825, "winogrande/accuracy/dev": 0.5090765588003157, "winogrande/accuracy/group_average": 0.5090765588003157, "winogrande/accuracy/seq_average": 0.5090765588003157, "commonsenseqa/accuracy/dev_rand_split": 0.27682227682227684, "commonsenseqa/accuracy/group_average": 0.27682227682227684, "commonsenseqa/accuracy/seq_average": 0.27682227682227684}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-40000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.583678230406746, "val/accuracy": 0.4798448350694444, "val/perplexity": 13.245769658618856, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.5969825886791535, "lambada/accuracy/total": 0.297554347826087, "lambada/accuracy/openai_last_token": 0.7692158385093167, "lambada/perplexity": 9.538701337382118, "lambada/lm_loss": 3.1489490595332135, "lambada/lm_perplexity": 23.311552648885787, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.38869959144776567, "mean_loss": 2.5903304095429496, "blimp/accuracy/passive_2": 0.897, "blimp/accuracy/determiner_noun_agreement_2": 0.983, "blimp/accuracy/ellipsis_n_bar_1": 0.823, "blimp/accuracy/tough_vs_raising_2": 0.837, "blimp/accuracy/tough_vs_raising_1": 0.605, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.874, "blimp/accuracy/principle_A_reconstruction": 0.251, "blimp/accuracy/wh_vs_that_with_gap": 0.407, "blimp/accuracy/principle_A_domain_2": 0.843, "blimp/accuracy/determiner_noun_agreement_1": 0.99, "blimp/accuracy/ellipsis_n_bar_2": 0.916, "blimp/accuracy/principle_A_domain_3": 0.59, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.914, "blimp/accuracy/animate_subject_trans": 0.9, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.871, "blimp/accuracy/distractor_agreement_relative_clause": 0.599, "blimp/accuracy/transitive": 0.866, "blimp/accuracy/sentential_subject_island": 0.382, "blimp/accuracy/adjunct_island": 0.793, "blimp/accuracy/intransitive": 0.764, "blimp/accuracy/existential_there_subject_raising": 0.872, "blimp/accuracy/irregular_past_participle_adjectives": 0.992, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.437, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.179, "blimp/accuracy/only_npi_scope": 0.72, "blimp/accuracy/superlative_quantifiers_2": 0.665, "blimp/accuracy/passive_1": 0.878, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.896, "blimp/accuracy/inchoative": 0.601, "blimp/accuracy/anaphor_gender_agreement": 0.959, "blimp/accuracy/principle_A_c_command": 0.592, "blimp/accuracy/only_npi_licensor_present": 0.624, "blimp/accuracy/expletive_it_object_raising": 0.782, "blimp/accuracy/left_branch_island_simple_question": 0.453, "blimp/accuracy/wh_questions_subject_gap": 0.945, "blimp/accuracy/existential_there_quantifiers_2": 0.429, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.954, "blimp/accuracy/sentential_negation_npi_scope": 0.633, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.83, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.889, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.88, "blimp/accuracy/principle_A_case_2": 0.972, "blimp/accuracy/distractor_agreement_relational_noun": 0.795, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.993, "blimp/accuracy/superlative_quantifiers_1": 0.783, "blimp/accuracy/wh_island": 0.827, "blimp/accuracy/principle_A_domain_1": 0.968, "blimp/accuracy/complex_NP_island": 0.549, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.961, "blimp/accuracy/irregular_past_participle_verbs": 0.867, "blimp/accuracy/drop_argument": 0.741, "blimp/accuracy/wh_questions_object_gap": 0.824, "blimp/accuracy/animate_subject_passive": 0.799, "blimp/accuracy/existential_there_quantifiers_1": 0.971, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.893, "blimp/accuracy/npi_present_2": 0.505, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.926, "blimp/accuracy/anaphor_number_agreement": 0.989, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.97, "blimp/accuracy/existential_there_object_raising": 0.835, "blimp/accuracy/matrix_question_npi_licensor_present": 0.277, "blimp/accuracy/npi_present_1": 0.46, "blimp/accuracy/wh_vs_that_no_gap": 0.98, "blimp/accuracy/left_branch_island_echo_question": 0.423, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.989, "blimp/accuracy/causative": 0.699, "blimp/accuracy/group_average": 0.765835820895522, "blimp/accuracy/seq_average": 0.7658358208955224, "cbt/accuracy/NE": 0.7816506410256411, "cbt/accuracy/V": 0.9192, "cbt/accuracy/CN": 0.838, "cbt/accuracy/P": 0.8892, "cbt/accuracy/group_average": 0.8570126602564103, "cbt/accuracy/seq_average": 0.8570428171268507, "hellaswag/accuracy/val": 0.3028281218880701, "hellaswag/accuracy/group_average": 0.3028281218880701, "hellaswag/accuracy/seq_average": 0.3028281218880701, "piqa/accuracy/val": 0.6022850924918389, "piqa/accuracy/group_average": 0.6022850924918389, "piqa/accuracy/seq_average": 0.6022850924918389, "ai2arc/accuracy/ARC-Easy": 0.33699788583509516, "ai2arc/accuracy/ARC-Challenge": 0.22317596566523606, "ai2arc/accuracy/group_average": 0.2800869257501656, "ai2arc/accuracy/seq_average": 0.2994334277620397, "mmlu/accuracy/MMLU": 0.2615659635323561, "mmlu/accuracy/group_average": 0.2615659635323561, "mmlu/accuracy/seq_average": 0.2615659635323561, "openbookqa/accuracy/test": 0.292, "openbookqa/accuracy/group_average": 0.292, "openbookqa/accuracy/seq_average": 0.292, "race/accuracy/test/high": 0.27101200686106347, "race/accuracy/test/middle": 0.34052924791086353, "race/accuracy/group_average": 0.3057706273859635, "race/accuracy/seq_average": 0.29124442642886095, "siqa/accuracy/dev": 0.36131013306038895, "siqa/accuracy/group_average": 0.36131013306038895, "siqa/accuracy/seq_average": 0.36131013306038895, "winogrande/accuracy/dev": 0.505130228887135, "winogrande/accuracy/group_average": 0.505130228887135, "winogrande/accuracy/seq_average": 0.505130228887135, "commonsenseqa/accuracy/dev_rand_split": 0.24651924651924653, "commonsenseqa/accuracy/group_average": 0.24651924651924653, "commonsenseqa/accuracy/seq_average": 0.24651924651924653}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-400000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.248143029591394, "val/accuracy": 0.5292891245039683, "val/perplexity": 9.470133740009928, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.510295417738257, "lambada/accuracy/total": 0.3812111801242236, "lambada/accuracy/openai_last_token": 0.8059006211180124, "lambada/perplexity": 6.323844152844343, "lambada/lm_loss": 2.850941985865236, "lambada/lm_perplexity": 17.3040743591591, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.45525015231409593, "mean_loss": 2.3792192236648253, "blimp/accuracy/passive_2": 0.922, "blimp/accuracy/determiner_noun_agreement_2": 0.987, "blimp/accuracy/ellipsis_n_bar_1": 0.835, "blimp/accuracy/tough_vs_raising_2": 0.892, "blimp/accuracy/tough_vs_raising_1": 0.608, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.911, "blimp/accuracy/principle_A_reconstruction": 0.441, "blimp/accuracy/wh_vs_that_with_gap": 0.475, "blimp/accuracy/principle_A_domain_2": 0.877, "blimp/accuracy/determiner_noun_agreement_1": 0.992, "blimp/accuracy/ellipsis_n_bar_2": 0.909, "blimp/accuracy/principle_A_domain_3": 0.578, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.938, "blimp/accuracy/animate_subject_trans": 0.912, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.929, "blimp/accuracy/distractor_agreement_relative_clause": 0.704, "blimp/accuracy/transitive": 0.876, "blimp/accuracy/sentential_subject_island": 0.313, "blimp/accuracy/adjunct_island": 0.843, "blimp/accuracy/intransitive": 0.772, "blimp/accuracy/existential_there_subject_raising": 0.901, "blimp/accuracy/irregular_past_participle_adjectives": 0.957, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.776, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.396, "blimp/accuracy/only_npi_scope": 0.682, "blimp/accuracy/superlative_quantifiers_2": 0.86, "blimp/accuracy/passive_1": 0.894, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.931, "blimp/accuracy/inchoative": 0.635, "blimp/accuracy/anaphor_gender_agreement": 0.981, "blimp/accuracy/principle_A_c_command": 0.661, "blimp/accuracy/only_npi_licensor_present": 0.527, "blimp/accuracy/expletive_it_object_raising": 0.787, "blimp/accuracy/left_branch_island_simple_question": 0.838, "blimp/accuracy/wh_questions_subject_gap": 0.927, "blimp/accuracy/existential_there_quantifiers_2": 0.566, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.955, "blimp/accuracy/sentential_negation_npi_scope": 0.753, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.82, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.905, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.904, "blimp/accuracy/principle_A_case_2": 0.969, "blimp/accuracy/distractor_agreement_relational_noun": 0.857, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.987, "blimp/accuracy/superlative_quantifiers_1": 0.834, "blimp/accuracy/wh_island": 0.801, "blimp/accuracy/principle_A_domain_1": 0.99, "blimp/accuracy/complex_NP_island": 0.569, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.969, "blimp/accuracy/irregular_past_participle_verbs": 0.907, "blimp/accuracy/drop_argument": 0.73, "blimp/accuracy/wh_questions_object_gap": 0.849, "blimp/accuracy/animate_subject_passive": 0.793, "blimp/accuracy/existential_there_quantifiers_1": 0.987, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.886, "blimp/accuracy/npi_present_2": 0.588, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.962, "blimp/accuracy/anaphor_number_agreement": 0.992, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.974, "blimp/accuracy/existential_there_object_raising": 0.858, "blimp/accuracy/matrix_question_npi_licensor_present": 0.316, "blimp/accuracy/npi_present_1": 0.601, "blimp/accuracy/wh_vs_that_no_gap": 0.984, "blimp/accuracy/left_branch_island_echo_question": 0.545, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.973, "blimp/accuracy/causative": 0.765, "blimp/accuracy/group_average": 0.8068059701492539, "blimp/accuracy/seq_average": 0.8068059701492537, "cbt/accuracy/NE": 0.828125, "cbt/accuracy/V": 0.944, "cbt/accuracy/CN": 0.8928, "cbt/accuracy/P": 0.9296, "cbt/accuracy/group_average": 0.89863125, "cbt/accuracy/seq_average": 0.8986594637855142, "hellaswag/accuracy/val": 0.371539533957379, "hellaswag/accuracy/group_average": 0.371539533957379, "hellaswag/accuracy/seq_average": 0.371539533957379, "piqa/accuracy/val": 0.6556039173014145, "piqa/accuracy/group_average": 0.6556039173014145, "piqa/accuracy/seq_average": 0.6556039173014145, "ai2arc/accuracy/ARC-Easy": 0.39027484143763214, "ai2arc/accuracy/ARC-Challenge": 0.2369098712446352, "ai2arc/accuracy/group_average": 0.3135923563411337, "ai2arc/accuracy/seq_average": 0.3396600566572238, "mmlu/accuracy/MMLU": 0.25963532356095814, "mmlu/accuracy/group_average": 0.25963532356095814, "mmlu/accuracy/seq_average": 0.25963532356095814, "openbookqa/accuracy/test": 0.298, "openbookqa/accuracy/group_average": 0.298, "openbookqa/accuracy/seq_average": 0.298, "race/accuracy/test/high": 0.29845626072041165, "race/accuracy/test/middle": 0.3649025069637883, "race/accuracy/group_average": 0.33167938384209994, "race/accuracy/seq_average": 0.3177948925820835, "siqa/accuracy/dev": 0.38485158648925283, "siqa/accuracy/group_average": 0.38485158648925283, "siqa/accuracy/seq_average": 0.38485158648925283, "winogrande/accuracy/dev": 0.5114443567482242, "winogrande/accuracy/group_average": 0.5114443567482242, "winogrande/accuracy/seq_average": 0.5114443567482242, "commonsenseqa/accuracy/dev_rand_split": 0.28746928746928746, "commonsenseqa/accuracy/group_average": 0.28746928746928746, "commonsenseqa/accuracy/seq_average": 0.28746928746928746}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-60000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.5153450133308533, "val/accuracy": 0.48922971695188494, "val/perplexity": 12.370876160107766, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.679591018961083, "lambada/accuracy/total": 0.2750388198757764, "lambada/accuracy/openai_last_token": 0.7676630434782609, "lambada/perplexity": 9.67711024154842, "lambada/lm_loss": 3.069904853298915, "lambada/lm_perplexity": 21.53985313153299, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.3821342684138307, "mean_loss": 2.5974680161459682, "blimp/accuracy/passive_2": 0.914, "blimp/accuracy/determiner_noun_agreement_2": 0.981, "blimp/accuracy/ellipsis_n_bar_1": 0.841, "blimp/accuracy/tough_vs_raising_2": 0.856, "blimp/accuracy/tough_vs_raising_1": 0.574, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.91, "blimp/accuracy/principle_A_reconstruction": 0.318, "blimp/accuracy/wh_vs_that_with_gap": 0.496, "blimp/accuracy/principle_A_domain_2": 0.858, "blimp/accuracy/determiner_noun_agreement_1": 0.994, "blimp/accuracy/ellipsis_n_bar_2": 0.893, "blimp/accuracy/principle_A_domain_3": 0.559, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.939, "blimp/accuracy/animate_subject_trans": 0.906, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.892, "blimp/accuracy/distractor_agreement_relative_clause": 0.644, "blimp/accuracy/transitive": 0.867, "blimp/accuracy/sentential_subject_island": 0.352, "blimp/accuracy/adjunct_island": 0.81, "blimp/accuracy/intransitive": 0.714, "blimp/accuracy/existential_there_subject_raising": 0.873, "blimp/accuracy/irregular_past_participle_adjectives": 0.889, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.512, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.274, "blimp/accuracy/only_npi_scope": 0.679, "blimp/accuracy/superlative_quantifiers_2": 0.531, "blimp/accuracy/passive_1": 0.885, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.947, "blimp/accuracy/inchoative": 0.61, "blimp/accuracy/anaphor_gender_agreement": 0.975, "blimp/accuracy/principle_A_c_command": 0.582, "blimp/accuracy/only_npi_licensor_present": 0.918, "blimp/accuracy/expletive_it_object_raising": 0.799, "blimp/accuracy/left_branch_island_simple_question": 0.607, "blimp/accuracy/wh_questions_subject_gap": 0.945, "blimp/accuracy/existential_there_quantifiers_2": 0.489, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.945, "blimp/accuracy/sentential_negation_npi_scope": 0.62, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.787, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.909, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.92, "blimp/accuracy/principle_A_case_2": 0.948, "blimp/accuracy/distractor_agreement_relational_noun": 0.87, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.984, "blimp/accuracy/superlative_quantifiers_1": 0.818, "blimp/accuracy/wh_island": 0.832, "blimp/accuracy/principle_A_domain_1": 0.986, "blimp/accuracy/complex_NP_island": 0.53, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.968, "blimp/accuracy/irregular_past_participle_verbs": 0.871, "blimp/accuracy/drop_argument": 0.737, "blimp/accuracy/wh_questions_object_gap": 0.843, "blimp/accuracy/animate_subject_passive": 0.809, "blimp/accuracy/existential_there_quantifiers_1": 0.965, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.913, "blimp/accuracy/npi_present_2": 0.621, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.926, "blimp/accuracy/anaphor_number_agreement": 0.995, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.969, "blimp/accuracy/existential_there_object_raising": 0.847, "blimp/accuracy/matrix_question_npi_licensor_present": 0.248, "blimp/accuracy/npi_present_1": 0.612, "blimp/accuracy/wh_vs_that_no_gap": 0.986, "blimp/accuracy/left_branch_island_echo_question": 0.474, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.981, "blimp/accuracy/causative": 0.73, "blimp/accuracy/group_average": 0.783238805970149, "blimp/accuracy/seq_average": 0.7832388059701493, "cbt/accuracy/NE": 0.780448717948718, "cbt/accuracy/V": 0.9208, "cbt/accuracy/CN": 0.8448, "cbt/accuracy/P": 0.8988, "cbt/accuracy/group_average": 0.8612121794871795, "cbt/accuracy/seq_average": 0.8612444977991196, "hellaswag/accuracy/val": 0.31676956781517623, "hellaswag/accuracy/group_average": 0.31676956781517623, "hellaswag/accuracy/seq_average": 0.31676956781517623, "piqa/accuracy/val": 0.5990206746463548, "piqa/accuracy/group_average": 0.5990206746463548, "piqa/accuracy/seq_average": 0.5990206746463548, "ai2arc/accuracy/ARC-Easy": 0.3568710359408034, "ai2arc/accuracy/ARC-Challenge": 0.21802575107296138, "ai2arc/accuracy/group_average": 0.2874483935068824, "ai2arc/accuracy/seq_average": 0.3110481586402266, "mmlu/accuracy/MMLU": 0.262567036110118, "mmlu/accuracy/group_average": 0.262567036110118, "mmlu/accuracy/seq_average": 0.262567036110118, "openbookqa/accuracy/test": 0.28, "openbookqa/accuracy/group_average": 0.28, "openbookqa/accuracy/seq_average": 0.28, "race/accuracy/test/high": 0.27815894797026874, "race/accuracy/test/middle": 0.3426183844011142, "race/accuracy/group_average": 0.3103886661856915, "race/accuracy/seq_average": 0.2969193352249696, "siqa/accuracy/dev": 0.37001023541453426, "siqa/accuracy/group_average": 0.37001023541453426, "siqa/accuracy/seq_average": 0.37001023541453426, "winogrande/accuracy/dev": 0.5035516969218626, "winogrande/accuracy/group_average": 0.5035516969218626, "winogrande/accuracy/seq_average": 0.5035516969218626, "commonsenseqa/accuracy/dev_rand_split": 0.2620802620802621, "commonsenseqa/accuracy/group_average": 0.2620802620802621, "commonsenseqa/accuracy/seq_average": 0.2620802620802621}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_sigmoid_standardlb/export/result-model-80000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.4682539682539684, "val/accuracy": 0.49615672278025796, "val/perplexity": 11.801822494758214, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.6010745978503493, "lambada/accuracy/total": 0.28163819875776397, "lambada/accuracy/openai_last_token": 0.7717391304347826, "lambada/perplexity": 9.50189834987948, "lambada/lm_loss": 3.042646035833492, "lambada/lm_perplexity": 20.96063250648177, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.388897460769011, "mean_loss": 2.534664283052159, "blimp/accuracy/passive_2": 0.896, "blimp/accuracy/determiner_noun_agreement_2": 0.989, "blimp/accuracy/ellipsis_n_bar_1": 0.849, "blimp/accuracy/tough_vs_raising_2": 0.876, "blimp/accuracy/tough_vs_raising_1": 0.629, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.919, "blimp/accuracy/principle_A_reconstruction": 0.373, "blimp/accuracy/wh_vs_that_with_gap": 0.548, "blimp/accuracy/principle_A_domain_2": 0.887, "blimp/accuracy/determiner_noun_agreement_1": 0.993, "blimp/accuracy/ellipsis_n_bar_2": 0.886, "blimp/accuracy/principle_A_domain_3": 0.583, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.925, "blimp/accuracy/animate_subject_trans": 0.907, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.881, "blimp/accuracy/distractor_agreement_relative_clause": 0.655, "blimp/accuracy/transitive": 0.873, "blimp/accuracy/sentential_subject_island": 0.384, "blimp/accuracy/adjunct_island": 0.806, "blimp/accuracy/intransitive": 0.769, "blimp/accuracy/existential_there_subject_raising": 0.874, "blimp/accuracy/irregular_past_participle_adjectives": 0.932, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.474, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.348, "blimp/accuracy/only_npi_scope": 0.682, "blimp/accuracy/superlative_quantifiers_2": 0.761, "blimp/accuracy/passive_1": 0.891, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.933, "blimp/accuracy/inchoative": 0.654, "blimp/accuracy/anaphor_gender_agreement": 0.982, "blimp/accuracy/principle_A_c_command": 0.616, "blimp/accuracy/only_npi_licensor_present": 0.56, "blimp/accuracy/expletive_it_object_raising": 0.765, "blimp/accuracy/left_branch_island_simple_question": 0.508, "blimp/accuracy/wh_questions_subject_gap": 0.932, "blimp/accuracy/existential_there_quantifiers_2": 0.524, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.946, "blimp/accuracy/sentential_negation_npi_scope": 0.682, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.826, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.872, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.919, "blimp/accuracy/principle_A_case_2": 0.943, "blimp/accuracy/distractor_agreement_relational_noun": 0.836, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.965, "blimp/accuracy/superlative_quantifiers_1": 0.81, "blimp/accuracy/wh_island": 0.773, "blimp/accuracy/principle_A_domain_1": 0.985, "blimp/accuracy/complex_NP_island": 0.537, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.975, "blimp/accuracy/irregular_past_participle_verbs": 0.885, "blimp/accuracy/drop_argument": 0.752, "blimp/accuracy/wh_questions_object_gap": 0.822, "blimp/accuracy/animate_subject_passive": 0.817, "blimp/accuracy/existential_there_quantifiers_1": 0.985, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.92, "blimp/accuracy/npi_present_2": 0.642, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.95, "blimp/accuracy/anaphor_number_agreement": 0.992, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.968, "blimp/accuracy/existential_there_object_raising": 0.854, "blimp/accuracy/matrix_question_npi_licensor_present": 0.266, "blimp/accuracy/npi_present_1": 0.624, "blimp/accuracy/wh_vs_that_no_gap": 0.971, "blimp/accuracy/left_branch_island_echo_question": 0.386, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.963, "blimp/accuracy/causative": 0.739, "blimp/accuracy/group_average": 0.7861044776119402, "blimp/accuracy/seq_average": 0.7861044776119402, "cbt/accuracy/NE": 0.8000801282051282, "cbt/accuracy/V": 0.9268, "cbt/accuracy/CN": 0.8544, "cbt/accuracy/P": 0.8992, "cbt/accuracy/group_average": 0.8701200320512821, "cbt/accuracy/seq_average": 0.8701480592236894, "hellaswag/accuracy/val": 0.32045409281019716, "hellaswag/accuracy/group_average": 0.32045409281019716, "hellaswag/accuracy/seq_average": 0.32045409281019716, "piqa/accuracy/val": 0.6196953210010882, "piqa/accuracy/group_average": 0.6196953210010882, "piqa/accuracy/seq_average": 0.6196953210010882, "ai2arc/accuracy/ARC-Easy": 0.3568710359408034, "ai2arc/accuracy/ARC-Challenge": 0.2223175965665236, "ai2arc/accuracy/group_average": 0.2895943162536635, "ai2arc/accuracy/seq_average": 0.3124645892351275, "mmlu/accuracy/MMLU": 0.2624240257418663, "mmlu/accuracy/group_average": 0.2624240257418663, "mmlu/accuracy/seq_average": 0.2624240257418663, "openbookqa/accuracy/test": 0.3, "openbookqa/accuracy/group_average": 0.3, "openbookqa/accuracy/seq_average": 0.3, "race/accuracy/test/high": 0.27815894797026874, "race/accuracy/test/middle": 0.3544568245125348, "race/accuracy/group_average": 0.31630788624140177, "race/accuracy/seq_average": 0.3003648155654641, "siqa/accuracy/dev": 0.37001023541453426, "siqa/accuracy/group_average": 0.37001023541453426, "siqa/accuracy/seq_average": 0.37001023541453426, "winogrande/accuracy/dev": 0.5082872928176796, "winogrande/accuracy/group_average": 0.5082872928176796, "winogrande/accuracy/seq_average": 0.5082872928176796, "commonsenseqa/accuracy/dev_rand_split": 0.25552825552825553, "commonsenseqa/accuracy/group_average": 0.25552825552825553, "commonsenseqa/accuracy/seq_average": 0.25552825552825553}
|