Upload folder using huggingface_hub
#3190
by
DavidNguyen
- opened
Pretrain_language_model/save/slimpajama_xmoe_no_attmoe_660M_standardlb/export/result-model-240000.pth.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"val/loss": 2.3359447660900297, "val/accuracy": 0.5155319940476191, "val/perplexity": 10.339223464241224, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.379752781080163, "lambada/accuracy/total": 0.33676242236024845, "lambada/accuracy/openai_last_token": 0.7917313664596274, "lambada/perplexity": 7.822700963274279, "lambada/lm_loss": 2.932708747587808, "lambada/lm_perplexity": 18.77842768793121, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.42614720820393376, "mean_loss": 2.357848773585096, "blimp/accuracy/passive_2": 0.907, "blimp/accuracy/determiner_noun_agreement_2": 0.985, "blimp/accuracy/ellipsis_n_bar_1": 0.875, "blimp/accuracy/tough_vs_raising_2": 0.913, "blimp/accuracy/tough_vs_raising_1": 0.594, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.921, "blimp/accuracy/principle_A_reconstruction": 0.374, "blimp/accuracy/wh_vs_that_with_gap": 0.424, "blimp/accuracy/principle_A_domain_2": 0.917, "blimp/accuracy/determiner_noun_agreement_1": 0.987, "blimp/accuracy/ellipsis_n_bar_2": 0.909, "blimp/accuracy/principle_A_domain_3": 0.677, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.947, "blimp/accuracy/animate_subject_trans": 0.911, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.906, "blimp/accuracy/distractor_agreement_relative_clause": 0.674, "blimp/accuracy/transitive": 0.884, "blimp/accuracy/sentential_subject_island": 0.379, "blimp/accuracy/adjunct_island": 0.886, "blimp/accuracy/intransitive": 0.76, "blimp/accuracy/existential_there_subject_raising": 0.888, "blimp/accuracy/irregular_past_participle_adjectives": 0.947, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.466, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.336, "blimp/accuracy/only_npi_scope": 0.597, "blimp/accuracy/superlative_quantifiers_2": 0.76, "blimp/accuracy/passive_1": 0.891, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.904, "blimp/accuracy/inchoative": 0.607, "blimp/accuracy/anaphor_gender_agreement": 0.967, "blimp/accuracy/principle_A_c_command": 0.631, "blimp/accuracy/only_npi_licensor_present": 0.487, "blimp/accuracy/expletive_it_object_raising": 0.799, "blimp/accuracy/left_branch_island_simple_question": 0.56, "blimp/accuracy/wh_questions_subject_gap": 0.931, "blimp/accuracy/existential_there_quantifiers_2": 0.57, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.959, "blimp/accuracy/sentential_negation_npi_scope": 0.731, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.786, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.879, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.918, "blimp/accuracy/principle_A_case_2": 0.95, "blimp/accuracy/distractor_agreement_relational_noun": 0.827, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.988, "blimp/accuracy/superlative_quantifiers_1": 0.799, "blimp/accuracy/wh_island": 0.816, "blimp/accuracy/principle_A_domain_1": 0.989, "blimp/accuracy/complex_NP_island": 0.671, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.972, "blimp/accuracy/irregular_past_participle_verbs": 0.908, "blimp/accuracy/drop_argument": 0.753, "blimp/accuracy/wh_questions_object_gap": 0.852, "blimp/accuracy/animate_subject_passive": 0.792, "blimp/accuracy/existential_there_quantifiers_1": 0.992, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.919, "blimp/accuracy/npi_present_2": 0.584, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.93, "blimp/accuracy/anaphor_number_agreement": 0.987, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.961, "blimp/accuracy/existential_there_object_raising": 0.885, "blimp/accuracy/matrix_question_npi_licensor_present": 0.336, "blimp/accuracy/npi_present_1": 0.474, "blimp/accuracy/wh_vs_that_no_gap": 0.977, "blimp/accuracy/left_branch_island_echo_question": 0.372, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.959, "blimp/accuracy/causative": 0.775, "blimp/accuracy/group_average": 0.789731343283582, "blimp/accuracy/seq_average": 0.7897313432835821, "cbt/accuracy/NE": 0.8104967948717948, "cbt/accuracy/V": 0.9388, "cbt/accuracy/CN": 0.8764, "cbt/accuracy/P": 0.9136, "cbt/accuracy/group_average": 0.8848241987179486, "cbt/accuracy/seq_average": 0.8848539415766307, "hellaswag/accuracy/val": 0.35232025492929697, "hellaswag/accuracy/group_average": 0.35232025492929697, "hellaswag/accuracy/seq_average": 0.35232025492929697, "piqa/accuracy/val": 0.6284004352557128, "piqa/accuracy/group_average": 0.6284004352557128, "piqa/accuracy/seq_average": 0.6284004352557128, "ai2arc/accuracy/ARC-Easy": 0.38266384778012685, "ai2arc/accuracy/ARC-Challenge": 0.23948497854077253, "ai2arc/accuracy/group_average": 0.3110744131604497, "ai2arc/accuracy/seq_average": 0.33541076487252125, "race/accuracy/test/high": 0.29559748427672955, "race/accuracy/test/middle": 0.3607242339832869, "race/accuracy/group_average": 0.32816085913000825, "race/accuracy/seq_average": 0.3145520875557357, "siqa/accuracy/dev": 0.37922210849539406, "siqa/accuracy/group_average": 0.37922210849539406, "siqa/accuracy/seq_average": 0.37922210849539406, "commonsenseqa/accuracy/dev_rand_split": 0.2833742833742834, "commonsenseqa/accuracy/group_average": 0.2833742833742834, "commonsenseqa/accuracy/seq_average": 0.2833742833742834}
|
|
|
|
| 1 |
+
{"val/loss": 2.3359447660900297, "val/accuracy": 0.5155319940476191, "val/perplexity": 10.339223464241224, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.379752781080163, "lambada/accuracy/total": 0.33676242236024845, "lambada/accuracy/openai_last_token": 0.7917313664596274, "lambada/perplexity": 7.822700963274279, "lambada/lm_loss": 2.932708747587808, "lambada/lm_perplexity": 18.77842768793121, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.42614720820393376, "mean_loss": 2.357848773585096, "blimp/accuracy/passive_2": 0.907, "blimp/accuracy/determiner_noun_agreement_2": 0.985, "blimp/accuracy/ellipsis_n_bar_1": 0.875, "blimp/accuracy/tough_vs_raising_2": 0.913, "blimp/accuracy/tough_vs_raising_1": 0.594, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.921, "blimp/accuracy/principle_A_reconstruction": 0.374, "blimp/accuracy/wh_vs_that_with_gap": 0.424, "blimp/accuracy/principle_A_domain_2": 0.917, "blimp/accuracy/determiner_noun_agreement_1": 0.987, "blimp/accuracy/ellipsis_n_bar_2": 0.909, "blimp/accuracy/principle_A_domain_3": 0.677, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.947, "blimp/accuracy/animate_subject_trans": 0.911, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.906, "blimp/accuracy/distractor_agreement_relative_clause": 0.674, "blimp/accuracy/transitive": 0.884, "blimp/accuracy/sentential_subject_island": 0.379, "blimp/accuracy/adjunct_island": 0.886, "blimp/accuracy/intransitive": 0.76, "blimp/accuracy/existential_there_subject_raising": 0.888, "blimp/accuracy/irregular_past_participle_adjectives": 0.947, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.466, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.336, "blimp/accuracy/only_npi_scope": 0.597, "blimp/accuracy/superlative_quantifiers_2": 0.76, "blimp/accuracy/passive_1": 0.891, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.904, "blimp/accuracy/inchoative": 0.607, "blimp/accuracy/anaphor_gender_agreement": 0.967, "blimp/accuracy/principle_A_c_command": 0.631, "blimp/accuracy/only_npi_licensor_present": 0.487, "blimp/accuracy/expletive_it_object_raising": 0.799, "blimp/accuracy/left_branch_island_simple_question": 0.56, "blimp/accuracy/wh_questions_subject_gap": 0.931, "blimp/accuracy/existential_there_quantifiers_2": 0.57, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.959, "blimp/accuracy/sentential_negation_npi_scope": 0.731, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.786, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.879, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.918, "blimp/accuracy/principle_A_case_2": 0.95, "blimp/accuracy/distractor_agreement_relational_noun": 0.827, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.988, "blimp/accuracy/superlative_quantifiers_1": 0.799, "blimp/accuracy/wh_island": 0.816, "blimp/accuracy/principle_A_domain_1": 0.989, "blimp/accuracy/complex_NP_island": 0.671, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.972, "blimp/accuracy/irregular_past_participle_verbs": 0.908, "blimp/accuracy/drop_argument": 0.753, "blimp/accuracy/wh_questions_object_gap": 0.852, "blimp/accuracy/animate_subject_passive": 0.792, "blimp/accuracy/existential_there_quantifiers_1": 0.992, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.919, "blimp/accuracy/npi_present_2": 0.584, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.93, "blimp/accuracy/anaphor_number_agreement": 0.987, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.961, "blimp/accuracy/existential_there_object_raising": 0.885, "blimp/accuracy/matrix_question_npi_licensor_present": 0.336, "blimp/accuracy/npi_present_1": 0.474, "blimp/accuracy/wh_vs_that_no_gap": 0.977, "blimp/accuracy/left_branch_island_echo_question": 0.372, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.959, "blimp/accuracy/causative": 0.775, "blimp/accuracy/group_average": 0.789731343283582, "blimp/accuracy/seq_average": 0.7897313432835821, "cbt/accuracy/NE": 0.8104967948717948, "cbt/accuracy/V": 0.9388, "cbt/accuracy/CN": 0.8764, "cbt/accuracy/P": 0.9136, "cbt/accuracy/group_average": 0.8848241987179486, "cbt/accuracy/seq_average": 0.8848539415766307, "hellaswag/accuracy/val": 0.35232025492929697, "hellaswag/accuracy/group_average": 0.35232025492929697, "hellaswag/accuracy/seq_average": 0.35232025492929697, "piqa/accuracy/val": 0.6284004352557128, "piqa/accuracy/group_average": 0.6284004352557128, "piqa/accuracy/seq_average": 0.6284004352557128, "ai2arc/accuracy/ARC-Easy": 0.38266384778012685, "ai2arc/accuracy/ARC-Challenge": 0.23948497854077253, "ai2arc/accuracy/group_average": 0.3110744131604497, "ai2arc/accuracy/seq_average": 0.33541076487252125, "race/accuracy/test/high": 0.29559748427672955, "race/accuracy/test/middle": 0.3607242339832869, "race/accuracy/group_average": 0.32816085913000825, "race/accuracy/seq_average": 0.3145520875557357, "siqa/accuracy/dev": 0.37922210849539406, "siqa/accuracy/group_average": 0.37922210849539406, "siqa/accuracy/seq_average": 0.37922210849539406, "commonsenseqa/accuracy/dev_rand_split": 0.2833742833742834, "commonsenseqa/accuracy/group_average": 0.2833742833742834, "commonsenseqa/accuracy/seq_average": 0.2833742833742834}
|