Upload folder using huggingface_hub
#4448
by
DavidNguyen
- opened
- Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/export/result-model-120000.pth.json +1 -0
- Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/export/result-model-120001.pth.json +1 -0
- Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/export/result-model-160000.pth.json +1 -1
- Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/export/result-model-180000.pth.json +1 -0
- Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/export/result-model-200000.pth.json +1 -0
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/export/result-model-120000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.4369583129882812, "val/accuracy": 0.5002707204511089, "val/perplexity": 11.438196363495134, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.3129389982045807, "lambada/accuracy/total": 0.34627329192546585, "lambada/accuracy/openai_last_token": 0.7878493788819876, "lambada/perplexity": 7.96804092333307, "lambada/lm_loss": 3.0043084211430595, "lambada/lm_perplexity": 20.172260561966155, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.42327200618828736, "mean_loss": 2.374948655596431, "blimp/accuracy/passive_2": 0.918, "blimp/accuracy/determiner_noun_agreement_2": 0.984, "blimp/accuracy/ellipsis_n_bar_1": 0.836, "blimp/accuracy/tough_vs_raising_2": 0.881, "blimp/accuracy/tough_vs_raising_1": 0.6, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.896, "blimp/accuracy/principle_A_reconstruction": 0.41, "blimp/accuracy/wh_vs_that_with_gap": 0.468, "blimp/accuracy/principle_A_domain_2": 0.883, "blimp/accuracy/determiner_noun_agreement_1": 0.993, "blimp/accuracy/ellipsis_n_bar_2": 0.918, "blimp/accuracy/principle_A_domain_3": 0.581, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.916, "blimp/accuracy/animate_subject_trans": 0.891, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.9, "blimp/accuracy/distractor_agreement_relative_clause": 0.68, "blimp/accuracy/transitive": 0.866, "blimp/accuracy/sentential_subject_island": 0.399, "blimp/accuracy/adjunct_island": 0.862, "blimp/accuracy/intransitive": 0.764, "blimp/accuracy/existential_there_subject_raising": 0.869, "blimp/accuracy/irregular_past_participle_adjectives": 0.895, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.67, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.31, "blimp/accuracy/only_npi_scope": 0.691, "blimp/accuracy/superlative_quantifiers_2": 0.759, "blimp/accuracy/passive_1": 0.887, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.923, "blimp/accuracy/inchoative": 0.612, "blimp/accuracy/anaphor_gender_agreement": 0.969, "blimp/accuracy/principle_A_c_command": 0.682, "blimp/accuracy/only_npi_licensor_present": 0.65, "blimp/accuracy/expletive_it_object_raising": 0.784, "blimp/accuracy/left_branch_island_simple_question": 0.768, "blimp/accuracy/wh_questions_subject_gap": 0.929, "blimp/accuracy/existential_there_quantifiers_2": 0.411, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.95, "blimp/accuracy/sentential_negation_npi_scope": 0.749, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.792, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.918, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.918, "blimp/accuracy/principle_A_case_2": 0.92, "blimp/accuracy/distractor_agreement_relational_noun": 0.857, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.99, "blimp/accuracy/superlative_quantifiers_1": 0.822, "blimp/accuracy/wh_island": 0.789, "blimp/accuracy/principle_A_domain_1": 0.994, "blimp/accuracy/complex_NP_island": 0.595, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.973, "blimp/accuracy/irregular_past_participle_verbs": 0.877, "blimp/accuracy/drop_argument": 0.736, "blimp/accuracy/wh_questions_object_gap": 0.809, "blimp/accuracy/animate_subject_passive": 0.819, "blimp/accuracy/existential_there_quantifiers_1": 0.976, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.888, "blimp/accuracy/npi_present_2": 0.612, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.964, "blimp/accuracy/anaphor_number_agreement": 0.986, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.955, "blimp/accuracy/existential_there_object_raising": 0.843, "blimp/accuracy/matrix_question_npi_licensor_present": 0.295, "blimp/accuracy/npi_present_1": 0.646, "blimp/accuracy/wh_vs_that_no_gap": 0.979, "blimp/accuracy/left_branch_island_echo_question": 0.473, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.957, "blimp/accuracy/causative": 0.755, "blimp/accuracy/group_average": 0.7954029850746269, "blimp/accuracy/seq_average": 0.7954029850746268, "cbt/accuracy/NE": 0.8020833333333334, "cbt/accuracy/V": 0.93, "cbt/accuracy/CN": 0.8656, "cbt/accuracy/P": 0.904, "cbt/accuracy/group_average": 0.8754208333333333, "cbt/accuracy/seq_average": 0.8754501800720288, "hellaswag/accuracy/val": 0.3337980481975702, "hellaswag/accuracy/group_average": 0.3337980481975702, "hellaswag/accuracy/seq_average": 0.3337980481975702, "piqa/accuracy/val": 0.6218715995647442, "piqa/accuracy/group_average": 0.6218715995647442, "piqa/accuracy/seq_average": 0.6218715995647442, "ai2arc/accuracy/ARC-Easy": 0.37251585623678646, "ai2arc/accuracy/ARC-Challenge": 0.21716738197424892, "ai2arc/accuracy/group_average": 0.2948416191055177, "ai2arc/accuracy/seq_average": 0.3212464589235127, "mmlu/accuracy/MMLU": 0.2661422953164104, "mmlu/accuracy/group_average": 0.2661422953164104, "mmlu/accuracy/seq_average": 0.2661422953164104, "openbookqa/accuracy/test": 0.29, "openbookqa/accuracy/group_average": 0.29, "openbookqa/accuracy/seq_average": 0.29, "race/accuracy/test/high": 0.2804459691252144, "race/accuracy/test/middle": 0.346100278551532, "race/accuracy/group_average": 0.31327312383837325, "race/accuracy/seq_average": 0.2995541143088772, "siqa/accuracy/dev": 0.3607983623336745, "siqa/accuracy/group_average": 0.3607983623336745, "siqa/accuracy/seq_average": 0.3607983623336745, "winogrande/accuracy/dev": 0.5019731649565904, "winogrande/accuracy/group_average": 0.5019731649565904, "winogrande/accuracy/seq_average": 0.5019731649565904, "commonsenseqa/accuracy/dev_rand_split": 0.25307125307125306, "commonsenseqa/accuracy/group_average": 0.25307125307125306, "commonsenseqa/accuracy/seq_average": 0.25307125307125306}
|
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/export/result-model-120001.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.42450443390877, "val/accuracy": 0.5012768160912299, "val/perplexity": 11.296629805228108, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.5750654706303377, "lambada/accuracy/total": 0.3513198757763975, "lambada/accuracy/openai_last_token": 0.7905667701863354, "lambada/perplexity": 7.749698948866573, "lambada/lm_loss": 2.9992504132941376, "lambada/lm_perplexity": 20.07048671315332, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.42629834593381366, "mean_loss": 2.499784952269554, "blimp/accuracy/passive_2": 0.898, "blimp/accuracy/determiner_noun_agreement_2": 0.987, "blimp/accuracy/ellipsis_n_bar_1": 0.846, "blimp/accuracy/tough_vs_raising_2": 0.894, "blimp/accuracy/tough_vs_raising_1": 0.587, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.915, "blimp/accuracy/principle_A_reconstruction": 0.457, "blimp/accuracy/wh_vs_that_with_gap": 0.442, "blimp/accuracy/principle_A_domain_2": 0.859, "blimp/accuracy/determiner_noun_agreement_1": 0.995, "blimp/accuracy/ellipsis_n_bar_2": 0.925, "blimp/accuracy/principle_A_domain_3": 0.572, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.914, "blimp/accuracy/animate_subject_trans": 0.905, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.906, "blimp/accuracy/distractor_agreement_relative_clause": 0.662, "blimp/accuracy/transitive": 0.866, "blimp/accuracy/sentential_subject_island": 0.327, "blimp/accuracy/adjunct_island": 0.873, "blimp/accuracy/intransitive": 0.763, "blimp/accuracy/existential_there_subject_raising": 0.893, "blimp/accuracy/irregular_past_participle_adjectives": 0.86, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.759, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.326, "blimp/accuracy/only_npi_scope": 0.66, "blimp/accuracy/superlative_quantifiers_2": 0.871, "blimp/accuracy/passive_1": 0.88, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.898, "blimp/accuracy/inchoative": 0.611, "blimp/accuracy/anaphor_gender_agreement": 0.971, "blimp/accuracy/principle_A_c_command": 0.631, "blimp/accuracy/only_npi_licensor_present": 0.723, "blimp/accuracy/expletive_it_object_raising": 0.767, "blimp/accuracy/left_branch_island_simple_question": 0.864, "blimp/accuracy/wh_questions_subject_gap": 0.952, "blimp/accuracy/existential_there_quantifiers_2": 0.508, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.945, "blimp/accuracy/sentential_negation_npi_scope": 0.709, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.838, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.912, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.908, "blimp/accuracy/principle_A_case_2": 0.931, "blimp/accuracy/distractor_agreement_relational_noun": 0.848, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.975, "blimp/accuracy/superlative_quantifiers_1": 0.924, "blimp/accuracy/wh_island": 0.769, "blimp/accuracy/principle_A_domain_1": 0.994, "blimp/accuracy/complex_NP_island": 0.586, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.969, "blimp/accuracy/irregular_past_participle_verbs": 0.885, "blimp/accuracy/drop_argument": 0.731, "blimp/accuracy/wh_questions_object_gap": 0.859, "blimp/accuracy/animate_subject_passive": 0.791, "blimp/accuracy/existential_there_quantifiers_1": 0.976, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.898, "blimp/accuracy/npi_present_2": 0.528, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.962, "blimp/accuracy/anaphor_number_agreement": 0.987, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.955, "blimp/accuracy/existential_there_object_raising": 0.825, "blimp/accuracy/matrix_question_npi_licensor_present": 0.256, "blimp/accuracy/npi_present_1": 0.572, "blimp/accuracy/wh_vs_that_no_gap": 0.983, "blimp/accuracy/left_branch_island_echo_question": 0.515, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.957, "blimp/accuracy/causative": 0.729, "blimp/accuracy/group_average": 0.7982686567164178, "blimp/accuracy/seq_average": 0.7982686567164179, "cbt/accuracy/NE": 0.797676282051282, "cbt/accuracy/V": 0.934, "cbt/accuracy/CN": 0.8664, "cbt/accuracy/P": 0.9016, "cbt/accuracy/group_average": 0.8749190705128205, "cbt/accuracy/seq_average": 0.8749499799919968, "hellaswag/accuracy/val": 0.3333001394144593, "hellaswag/accuracy/group_average": 0.3333001394144593, "hellaswag/accuracy/seq_average": 0.3333001394144593, "piqa/accuracy/val": 0.6180631120783461, "piqa/accuracy/group_average": 0.6180631120783461, "piqa/accuracy/seq_average": 0.6180631120783461, "ai2arc/accuracy/ARC-Easy": 0.3602536997885835, "ai2arc/accuracy/ARC-Challenge": 0.2257510729613734, "ai2arc/accuracy/group_average": 0.29300238637497844, "ai2arc/accuracy/seq_average": 0.31586402266288954, "mmlu/accuracy/MMLU": 0.26642831605291384, "mmlu/accuracy/group_average": 0.26642831605291384, "mmlu/accuracy/seq_average": 0.26642831605291384, "openbookqa/accuracy/test": 0.28, "openbookqa/accuracy/group_average": 0.28, "openbookqa/accuracy/seq_average": 0.28, "race/accuracy/test/high": 0.2804459691252144, "race/accuracy/test/middle": 0.362116991643454, "race/accuracy/group_average": 0.32128148038433424, "race/accuracy/seq_average": 0.30421564653425215, "siqa/accuracy/dev": 0.3694984646878199, "siqa/accuracy/group_average": 0.3694984646878199, "siqa/accuracy/seq_average": 0.3694984646878199, "winogrande/accuracy/dev": 0.5114443567482242, "winogrande/accuracy/group_average": 0.5114443567482242, "winogrande/accuracy/seq_average": 0.5114443567482242, "commonsenseqa/accuracy/dev_rand_split": 0.26371826371826373, "commonsenseqa/accuracy/group_average": 0.26371826371826373, "commonsenseqa/accuracy/seq_average": 0.26371826371826373}
|
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/export/result-model-160000.pth.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"val/loss": 2.
|
|
|
|
| 1 |
+
{"val/loss": 2.3851849955897175, "val/accuracy": 0.5081767420614919, "val/perplexity": 10.861071729450302, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.21494440114276, "lambada/accuracy/total": 0.33482142857142855, "lambada/accuracy/openai_last_token": 0.7936723602484472, "lambada/perplexity": 7.655349224839091, "lambada/lm_loss": 2.9612883776869356, "lambada/lm_perplexity": 19.32285085511263, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4214990853164602, "mean_loss": 2.3000646983662385, "blimp/accuracy/passive_2": 0.903, "blimp/accuracy/determiner_noun_agreement_2": 0.986, "blimp/accuracy/ellipsis_n_bar_1": 0.85, "blimp/accuracy/tough_vs_raising_2": 0.857, "blimp/accuracy/tough_vs_raising_1": 0.605, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.899, "blimp/accuracy/principle_A_reconstruction": 0.383, "blimp/accuracy/wh_vs_that_with_gap": 0.486, "blimp/accuracy/principle_A_domain_2": 0.905, "blimp/accuracy/determiner_noun_agreement_1": 0.99, "blimp/accuracy/ellipsis_n_bar_2": 0.908, "blimp/accuracy/principle_A_domain_3": 0.604, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.914, "blimp/accuracy/animate_subject_trans": 0.904, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.905, "blimp/accuracy/distractor_agreement_relative_clause": 0.666, "blimp/accuracy/transitive": 0.869, "blimp/accuracy/sentential_subject_island": 0.41, "blimp/accuracy/adjunct_island": 0.875, "blimp/accuracy/intransitive": 0.772, "blimp/accuracy/existential_there_subject_raising": 0.898, "blimp/accuracy/irregular_past_participle_adjectives": 0.971, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.674, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.373, "blimp/accuracy/only_npi_scope": 0.613, "blimp/accuracy/superlative_quantifiers_2": 0.705, "blimp/accuracy/passive_1": 0.876, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.921, "blimp/accuracy/inchoative": 0.614, "blimp/accuracy/anaphor_gender_agreement": 0.97, "blimp/accuracy/principle_A_c_command": 0.698, "blimp/accuracy/only_npi_licensor_present": 0.717, "blimp/accuracy/expletive_it_object_raising": 0.782, "blimp/accuracy/left_branch_island_simple_question": 0.784, "blimp/accuracy/wh_questions_subject_gap": 0.929, "blimp/accuracy/existential_there_quantifiers_2": 0.365, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.939, "blimp/accuracy/sentential_negation_npi_scope": 0.747, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.826, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.9, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.897, "blimp/accuracy/principle_A_case_2": 0.955, "blimp/accuracy/distractor_agreement_relational_noun": 0.822, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.983, "blimp/accuracy/superlative_quantifiers_1": 0.775, "blimp/accuracy/wh_island": 0.74, "blimp/accuracy/principle_A_domain_1": 0.994, "blimp/accuracy/complex_NP_island": 0.609, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.975, "blimp/accuracy/irregular_past_participle_verbs": 0.889, "blimp/accuracy/drop_argument": 0.741, "blimp/accuracy/wh_questions_object_gap": 0.835, "blimp/accuracy/animate_subject_passive": 0.8, "blimp/accuracy/existential_there_quantifiers_1": 0.961, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.907, "blimp/accuracy/npi_present_2": 0.573, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.958, "blimp/accuracy/anaphor_number_agreement": 0.989, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.953, "blimp/accuracy/existential_there_object_raising": 0.829, "blimp/accuracy/matrix_question_npi_licensor_present": 0.312, "blimp/accuracy/npi_present_1": 0.547, "blimp/accuracy/wh_vs_that_no_gap": 0.976, "blimp/accuracy/left_branch_island_echo_question": 0.551, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.96, "blimp/accuracy/causative": 0.729, "blimp/accuracy/group_average": 0.7948208955223879, "blimp/accuracy/seq_average": 0.7948208955223881, "cbt/accuracy/NE": 0.8064903846153846, "cbt/accuracy/V": 0.9376, "cbt/accuracy/CN": 0.8764, "cbt/accuracy/P": 0.9164, "cbt/accuracy/group_average": 0.8842225961538461, "cbt/accuracy/seq_average": 0.8842537014805922, "hellaswag/accuracy/val": 0.34644493128858794, "hellaswag/accuracy/group_average": 0.34644493128858794, "hellaswag/accuracy/seq_average": 0.34644493128858794, "piqa/accuracy/val": 0.6082698585418934, "piqa/accuracy/group_average": 0.6082698585418934, "piqa/accuracy/seq_average": 0.6082698585418934, "ai2arc/accuracy/ARC-Easy": 0.3564482029598309, "ai2arc/accuracy/ARC-Challenge": 0.21888412017167383, "ai2arc/accuracy/group_average": 0.28766616156575237, "ai2arc/accuracy/seq_average": 0.3110481586402266, "mmlu/accuracy/MMLU": 0.267786914551305, "mmlu/accuracy/group_average": 0.267786914551305, "mmlu/accuracy/seq_average": 0.267786914551305, "openbookqa/accuracy/test": 0.288, "openbookqa/accuracy/group_average": 0.288, "openbookqa/accuracy/seq_average": 0.288, "race/accuracy/test/high": 0.2833047455688965, "race/accuracy/test/middle": 0.366991643454039, "race/accuracy/group_average": 0.32514819451146776, "race/accuracy/seq_average": 0.30766112687474667, "siqa/accuracy/dev": 0.36745138178096215, "siqa/accuracy/group_average": 0.36745138178096215, "siqa/accuracy/seq_average": 0.36745138178096215, "winogrande/accuracy/dev": 0.5090765588003157, "winogrande/accuracy/group_average": 0.5090765588003157, "winogrande/accuracy/seq_average": 0.5090765588003157, "commonsenseqa/accuracy/dev_rand_split": 0.2588042588042588, "commonsenseqa/accuracy/group_average": 0.2588042588042588, "commonsenseqa/accuracy/seq_average": 0.2588042588042588}
|
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/export/result-model-180000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.378108855216734, "val/accuracy": 0.5088865218623992, "val/perplexity": 10.784488537554052, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.2516739175926825, "lambada/accuracy/total": 0.34646739130434784, "lambada/accuracy/openai_last_token": 0.796001552795031, "lambada/perplexity": 7.44671847155186, "lambada/lm_loss": 2.9623079937057764, "lambada/lm_perplexity": 19.342562790969215, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.42767695658337357, "mean_loss": 2.3148913864047085, "blimp/accuracy/passive_2": 0.912, "blimp/accuracy/determiner_noun_agreement_2": 0.98, "blimp/accuracy/ellipsis_n_bar_1": 0.857, "blimp/accuracy/tough_vs_raising_2": 0.865, "blimp/accuracy/tough_vs_raising_1": 0.603, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.909, "blimp/accuracy/principle_A_reconstruction": 0.329, "blimp/accuracy/wh_vs_that_with_gap": 0.42, "blimp/accuracy/principle_A_domain_2": 0.882, "blimp/accuracy/determiner_noun_agreement_1": 0.988, "blimp/accuracy/ellipsis_n_bar_2": 0.908, "blimp/accuracy/principle_A_domain_3": 0.592, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.921, "blimp/accuracy/animate_subject_trans": 0.9, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.914, "blimp/accuracy/distractor_agreement_relative_clause": 0.681, "blimp/accuracy/transitive": 0.892, "blimp/accuracy/sentential_subject_island": 0.378, "blimp/accuracy/adjunct_island": 0.886, "blimp/accuracy/intransitive": 0.743, "blimp/accuracy/existential_there_subject_raising": 0.883, "blimp/accuracy/irregular_past_participle_adjectives": 0.947, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.733, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.336, "blimp/accuracy/only_npi_scope": 0.719, "blimp/accuracy/superlative_quantifiers_2": 0.818, "blimp/accuracy/passive_1": 0.896, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.938, "blimp/accuracy/inchoative": 0.591, "blimp/accuracy/anaphor_gender_agreement": 0.965, "blimp/accuracy/principle_A_c_command": 0.683, "blimp/accuracy/only_npi_licensor_present": 0.671, "blimp/accuracy/expletive_it_object_raising": 0.764, "blimp/accuracy/left_branch_island_simple_question": 0.844, "blimp/accuracy/wh_questions_subject_gap": 0.951, "blimp/accuracy/existential_there_quantifiers_2": 0.518, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.943, "blimp/accuracy/sentential_negation_npi_scope": 0.756, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.821, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.927, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.903, "blimp/accuracy/principle_A_case_2": 0.905, "blimp/accuracy/distractor_agreement_relational_noun": 0.827, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.984, "blimp/accuracy/superlative_quantifiers_1": 0.631, "blimp/accuracy/wh_island": 0.754, "blimp/accuracy/principle_A_domain_1": 0.996, "blimp/accuracy/complex_NP_island": 0.613, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.97, "blimp/accuracy/irregular_past_participle_verbs": 0.899, "blimp/accuracy/drop_argument": 0.724, "blimp/accuracy/wh_questions_object_gap": 0.853, "blimp/accuracy/animate_subject_passive": 0.797, "blimp/accuracy/existential_there_quantifiers_1": 0.985, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.882, "blimp/accuracy/npi_present_2": 0.546, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.94, "blimp/accuracy/anaphor_number_agreement": 0.99, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.959, "blimp/accuracy/existential_there_object_raising": 0.885, "blimp/accuracy/matrix_question_npi_licensor_present": 0.398, "blimp/accuracy/npi_present_1": 0.558, "blimp/accuracy/wh_vs_that_no_gap": 0.98, "blimp/accuracy/left_branch_island_echo_question": 0.485, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.972, "blimp/accuracy/causative": 0.759, "blimp/accuracy/group_average": 0.7978955223880596, "blimp/accuracy/seq_average": 0.7978955223880597, "cbt/accuracy/NE": 0.8084935897435898, "cbt/accuracy/V": 0.9356, "cbt/accuracy/CN": 0.8772, "cbt/accuracy/P": 0.92, "cbt/accuracy/group_average": 0.8853233974358974, "cbt/accuracy/seq_average": 0.8853541416566627, "hellaswag/accuracy/val": 0.34714200358494324, "hellaswag/accuracy/group_average": 0.34714200358494324, "hellaswag/accuracy/seq_average": 0.34714200358494324, "piqa/accuracy/val": 0.6305767138193689, "piqa/accuracy/group_average": 0.6305767138193689, "piqa/accuracy/seq_average": 0.6305767138193689, "ai2arc/accuracy/ARC-Easy": 0.36236786469344606, "ai2arc/accuracy/ARC-Challenge": 0.23261802575107296, "ai2arc/accuracy/group_average": 0.2974929452222595, "ai2arc/accuracy/seq_average": 0.31954674220963175, "mmlu/accuracy/MMLU": 0.262567036110118, "mmlu/accuracy/group_average": 0.262567036110118, "mmlu/accuracy/seq_average": 0.262567036110118, "openbookqa/accuracy/test": 0.276, "openbookqa/accuracy/group_average": 0.276, "openbookqa/accuracy/seq_average": 0.276, "race/accuracy/test/high": 0.2858776443682104, "race/accuracy/test/middle": 0.36629526462395545, "race/accuracy/group_average": 0.3260864544960829, "race/accuracy/seq_average": 0.30928252938792056, "siqa/accuracy/dev": 0.3710337768679631, "siqa/accuracy/group_average": 0.3710337768679631, "siqa/accuracy/seq_average": 0.3710337768679631, "winogrande/accuracy/dev": 0.4964483030781373, "winogrande/accuracy/group_average": 0.4964483030781373, "winogrande/accuracy/seq_average": 0.4964483030781373, "commonsenseqa/accuracy/dev_rand_split": 0.26371826371826373, "commonsenseqa/accuracy/group_average": 0.26371826371826373, "commonsenseqa/accuracy/seq_average": 0.26371826371826373}
|
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/export/result-model-200000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.3618279733965473, "val/accuracy": 0.511565177671371, "val/perplexity": 10.610329135481647, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.249894231002523, "lambada/accuracy/total": 0.34607919254658387, "lambada/accuracy/openai_last_token": 0.7901785714285714, "lambada/perplexity": 7.497578859131717, "lambada/lm_loss": 2.9572677957436464, "lambada/lm_perplexity": 19.245317718480347, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4288221851089774, "mean_loss": 2.305861102199535, "blimp/accuracy/passive_2": 0.904, "blimp/accuracy/determiner_noun_agreement_2": 0.983, "blimp/accuracy/ellipsis_n_bar_1": 0.858, "blimp/accuracy/tough_vs_raising_2": 0.857, "blimp/accuracy/tough_vs_raising_1": 0.572, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.911, "blimp/accuracy/principle_A_reconstruction": 0.429, "blimp/accuracy/wh_vs_that_with_gap": 0.504, "blimp/accuracy/principle_A_domain_2": 0.894, "blimp/accuracy/determiner_noun_agreement_1": 0.994, "blimp/accuracy/ellipsis_n_bar_2": 0.896, "blimp/accuracy/principle_A_domain_3": 0.622, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.922, "blimp/accuracy/animate_subject_trans": 0.906, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.929, "blimp/accuracy/distractor_agreement_relative_clause": 0.705, "blimp/accuracy/transitive": 0.865, "blimp/accuracy/sentential_subject_island": 0.386, "blimp/accuracy/adjunct_island": 0.879, "blimp/accuracy/intransitive": 0.729, "blimp/accuracy/existential_there_subject_raising": 0.885, "blimp/accuracy/irregular_past_participle_adjectives": 0.973, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.756, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.369, "blimp/accuracy/only_npi_scope": 0.672, "blimp/accuracy/superlative_quantifiers_2": 0.788, "blimp/accuracy/passive_1": 0.895, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.915, "blimp/accuracy/inchoative": 0.588, "blimp/accuracy/anaphor_gender_agreement": 0.973, "blimp/accuracy/principle_A_c_command": 0.688, "blimp/accuracy/only_npi_licensor_present": 0.981, "blimp/accuracy/expletive_it_object_raising": 0.763, "blimp/accuracy/left_branch_island_simple_question": 0.863, "blimp/accuracy/wh_questions_subject_gap": 0.94, "blimp/accuracy/existential_there_quantifiers_2": 0.521, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.95, "blimp/accuracy/sentential_negation_npi_scope": 0.738, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.844, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.895, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.905, "blimp/accuracy/principle_A_case_2": 0.94, "blimp/accuracy/distractor_agreement_relational_noun": 0.857, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.969, "blimp/accuracy/superlative_quantifiers_1": 0.68, "blimp/accuracy/wh_island": 0.732, "blimp/accuracy/principle_A_domain_1": 0.995, "blimp/accuracy/complex_NP_island": 0.642, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.966, "blimp/accuracy/irregular_past_participle_verbs": 0.935, "blimp/accuracy/drop_argument": 0.719, "blimp/accuracy/wh_questions_object_gap": 0.864, "blimp/accuracy/animate_subject_passive": 0.784, "blimp/accuracy/existential_there_quantifiers_1": 0.979, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.888, "blimp/accuracy/npi_present_2": 0.573, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.946, "blimp/accuracy/anaphor_number_agreement": 0.991, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.962, "blimp/accuracy/existential_there_object_raising": 0.867, "blimp/accuracy/matrix_question_npi_licensor_present": 0.389, "blimp/accuracy/npi_present_1": 0.583, "blimp/accuracy/wh_vs_that_no_gap": 0.98, "blimp/accuracy/left_branch_island_echo_question": 0.556, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.956, "blimp/accuracy/causative": 0.766, "blimp/accuracy/group_average": 0.8084477611940298, "blimp/accuracy/seq_average": 0.8084477611940298, "cbt/accuracy/NE": 0.8161057692307693, "cbt/accuracy/V": 0.9356, "cbt/accuracy/CN": 0.878, "cbt/accuracy/P": 0.9184, "cbt/accuracy/group_average": 0.8870264423076923, "cbt/accuracy/seq_average": 0.8870548219287715, "hellaswag/accuracy/val": 0.3549093806014738, "hellaswag/accuracy/group_average": 0.3549093806014738, "hellaswag/accuracy/seq_average": 0.3549093806014738, "piqa/accuracy/val": 0.6245919477693145, "piqa/accuracy/group_average": 0.6245919477693145, "piqa/accuracy/seq_average": 0.6245919477693145, "ai2arc/accuracy/ARC-Easy": 0.3657505285412262, "ai2arc/accuracy/ARC-Challenge": 0.22832618025751072, "ai2arc/accuracy/group_average": 0.29703835439936843, "ai2arc/accuracy/seq_average": 0.32039660056657226, "mmlu/accuracy/MMLU": 0.26399713979263495, "mmlu/accuracy/group_average": 0.26399713979263495, "mmlu/accuracy/seq_average": 0.26399713979263495, "openbookqa/accuracy/test": 0.284, "openbookqa/accuracy/group_average": 0.284, "openbookqa/accuracy/seq_average": 0.284, "race/accuracy/test/high": 0.2833047455688965, "race/accuracy/test/middle": 0.35863509749303624, "race/accuracy/group_average": 0.3209699215309664, "race/accuracy/seq_average": 0.3052290231049858, "siqa/accuracy/dev": 0.3705220061412487, "siqa/accuracy/group_average": 0.3705220061412487, "siqa/accuracy/seq_average": 0.3705220061412487, "winogrande/accuracy/dev": 0.4996053670086819, "winogrande/accuracy/group_average": 0.4996053670086819, "winogrande/accuracy/seq_average": 0.4996053670086819, "commonsenseqa/accuracy/dev_rand_split": 0.266994266994267, "commonsenseqa/accuracy/group_average": 0.266994266994267, "commonsenseqa/accuracy/seq_average": 0.266994266994267}
|