Upload folder using huggingface_hub
#352
by
DavidNguyen
- opened
Pretrain_language_model/save/slimpajama_moe_no_attmoe_154M_standard_lb_zlossss/export/result-model-100000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.6115814693390376, "val/accuracy": 0.47894868396577384, "val/perplexity": 13.620574348682341, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.7239334390770575, "lambada/accuracy/total": 0.25485248447204967, "lambada/accuracy/openai_last_token": 0.7620341614906833, "lambada/perplexity": 12.067957867455958, "lambada/lm_loss": 3.1712823086206607, "lambada/lm_perplexity": 23.83803248073084, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.36690058421891175, "mean_loss": 2.6677574542080476, "blimp/accuracy/passive_2": 0.913, "blimp/accuracy/determiner_noun_agreement_2": 0.979, "blimp/accuracy/ellipsis_n_bar_1": 0.817, "blimp/accuracy/tough_vs_raising_2": 0.856, "blimp/accuracy/tough_vs_raising_1": 0.613, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.916, "blimp/accuracy/principle_A_reconstruction": 0.326, "blimp/accuracy/wh_vs_that_with_gap": 0.537, "blimp/accuracy/principle_A_domain_2": 0.838, "blimp/accuracy/determiner_noun_agreement_1": 0.984, "blimp/accuracy/ellipsis_n_bar_2": 0.918, "blimp/accuracy/principle_A_domain_3": 0.547, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.922, "blimp/accuracy/animate_subject_trans": 0.899, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.894, "blimp/accuracy/distractor_agreement_relative_clause": 0.603, "blimp/accuracy/transitive": 0.863, "blimp/accuracy/sentential_subject_island": 0.389, "blimp/accuracy/adjunct_island": 0.787, "blimp/accuracy/intransitive": 0.804, "blimp/accuracy/existential_there_subject_raising": 0.85, "blimp/accuracy/irregular_past_participle_adjectives": 0.91, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.394, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.329, "blimp/accuracy/only_npi_scope": 0.724, "blimp/accuracy/superlative_quantifiers_2": 0.833, "blimp/accuracy/passive_1": 0.9, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.912, "blimp/accuracy/inchoative": 0.635, "blimp/accuracy/anaphor_gender_agreement": 0.948, "blimp/accuracy/principle_A_c_command": 0.599, "blimp/accuracy/only_npi_licensor_present": 0.797, "blimp/accuracy/expletive_it_object_raising": 0.758, "blimp/accuracy/left_branch_island_simple_question": 0.412, "blimp/accuracy/wh_questions_subject_gap": 0.929, "blimp/accuracy/existential_there_quantifiers_2": 0.379, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.942, "blimp/accuracy/sentential_negation_npi_scope": 0.685, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.805, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.856, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.911, "blimp/accuracy/principle_A_case_2": 0.942, "blimp/accuracy/distractor_agreement_relational_noun": 0.83, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.976, "blimp/accuracy/superlative_quantifiers_1": 0.718, "blimp/accuracy/wh_island": 0.82, "blimp/accuracy/principle_A_domain_1": 0.98, "blimp/accuracy/complex_NP_island": 0.555, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.97, "blimp/accuracy/irregular_past_participle_verbs": 0.892, "blimp/accuracy/drop_argument": 0.749, "blimp/accuracy/wh_questions_object_gap": 0.779, "blimp/accuracy/animate_subject_passive": 0.796, "blimp/accuracy/existential_there_quantifiers_1": 0.965, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.886, "blimp/accuracy/npi_present_2": 0.628, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.928, "blimp/accuracy/anaphor_number_agreement": 0.984, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.955, "blimp/accuracy/existential_there_object_raising": 0.821, "blimp/accuracy/matrix_question_npi_licensor_present": 0.271, "blimp/accuracy/npi_present_1": 0.564, "blimp/accuracy/wh_vs_that_no_gap": 0.968, "blimp/accuracy/left_branch_island_echo_question": 0.417, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.959, "blimp/accuracy/causative": 0.691, "blimp/accuracy/group_average": 0.7754776119402986, "blimp/accuracy/seq_average": 0.7754776119402985, "cbt/accuracy/NE": 0.7584134615384616, "cbt/accuracy/V": 0.91, "cbt/accuracy/CN": 0.8188, "cbt/accuracy/P": 0.882, "cbt/accuracy/group_average": 0.8423033653846154, "cbt/accuracy/seq_average": 0.8423369347739096, "hellaswag/accuracy/val": 0.29107747460665206, "hellaswag/accuracy/group_average": 0.29107747460665206, "hellaswag/accuracy/seq_average": 0.29107747460665206, "piqa/accuracy/val": 0.5870511425462459, "piqa/accuracy/group_average": 0.5870511425462459, "piqa/accuracy/seq_average": 0.5870511425462459, "ai2arc/accuracy/ARC-Easy": 0.3276955602536998, "ai2arc/accuracy/ARC-Challenge": 0.21888412017167383, "ai2arc/accuracy/group_average": 0.2732898402126868, "ai2arc/accuracy/seq_average": 0.29178470254957506, "race/accuracy/test/high": 0.26186392224128074, "race/accuracy/test/middle": 0.3307799442896936, "race/accuracy/group_average": 0.29632193326548717, "race/accuracy/seq_average": 0.2819213619781111, "siqa/accuracy/dev": 0.35209825997952915, "siqa/accuracy/group_average": 0.35209825997952915, "siqa/accuracy/seq_average": 0.35209825997952915, "commonsenseqa/accuracy/dev_rand_split": 0.23914823914823916, "commonsenseqa/accuracy/group_average": 0.23914823914823916, "commonsenseqa/accuracy/seq_average": 0.23914823914823916}
|