Upload folder using huggingface_hub

#283
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_film_isolation_standardlb/export/result-model-100000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.598023914155506, "val/accuracy": 0.4809211852058532, "val/perplexity": 13.437158802928014, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.538381256672166, "lambada/accuracy/total": 0.2453416149068323, "lambada/accuracy/openai_last_token": 0.7606754658385093, "lambada/perplexity": 11.842925129763913, "lambada/lm_loss": 3.166239062840477, "lambada/lm_perplexity": 23.718114067201252, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.36313140005634276, "mean_loss": 2.568202585413836, "blimp/accuracy/passive_2": 0.906, "blimp/accuracy/determiner_noun_agreement_2": 0.992, "blimp/accuracy/ellipsis_n_bar_1": 0.823, "blimp/accuracy/tough_vs_raising_2": 0.862, "blimp/accuracy/tough_vs_raising_1": 0.647, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.91, "blimp/accuracy/principle_A_reconstruction": 0.307, "blimp/accuracy/wh_vs_that_with_gap": 0.527, "blimp/accuracy/principle_A_domain_2": 0.814, "blimp/accuracy/determiner_noun_agreement_1": 0.987, "blimp/accuracy/ellipsis_n_bar_2": 0.908, "blimp/accuracy/principle_A_domain_3": 0.558, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.916, "blimp/accuracy/animate_subject_trans": 0.901, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.879, "blimp/accuracy/distractor_agreement_relative_clause": 0.65, "blimp/accuracy/transitive": 0.869, "blimp/accuracy/sentential_subject_island": 0.343, "blimp/accuracy/adjunct_island": 0.859, "blimp/accuracy/intransitive": 0.777, "blimp/accuracy/existential_there_subject_raising": 0.874, "blimp/accuracy/irregular_past_participle_adjectives": 0.963, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.375, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.275, "blimp/accuracy/only_npi_scope": 0.696, "blimp/accuracy/superlative_quantifiers_2": 0.861, "blimp/accuracy/passive_1": 0.894, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.908, "blimp/accuracy/inchoative": 0.639, "blimp/accuracy/anaphor_gender_agreement": 0.967, "blimp/accuracy/principle_A_c_command": 0.648, "blimp/accuracy/only_npi_licensor_present": 0.495, "blimp/accuracy/expletive_it_object_raising": 0.779, "blimp/accuracy/left_branch_island_simple_question": 0.445, "blimp/accuracy/wh_questions_subject_gap": 0.942, "blimp/accuracy/existential_there_quantifiers_2": 0.506, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.933, "blimp/accuracy/sentential_negation_npi_scope": 0.645, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.808, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.872, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.898, "blimp/accuracy/principle_A_case_2": 0.963, "blimp/accuracy/distractor_agreement_relational_noun": 0.834, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.97, "blimp/accuracy/superlative_quantifiers_1": 0.728, "blimp/accuracy/wh_island": 0.773, "blimp/accuracy/principle_A_domain_1": 0.989, "blimp/accuracy/complex_NP_island": 0.555, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.969, "blimp/accuracy/irregular_past_participle_verbs": 0.875, "blimp/accuracy/drop_argument": 0.777, "blimp/accuracy/wh_questions_object_gap": 0.784, "blimp/accuracy/animate_subject_passive": 0.789, "blimp/accuracy/existential_there_quantifiers_1": 0.974, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.86, "blimp/accuracy/npi_present_2": 0.574, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.952, "blimp/accuracy/anaphor_number_agreement": 0.983, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.955, "blimp/accuracy/existential_there_object_raising": 0.779, "blimp/accuracy/matrix_question_npi_licensor_present": 0.197, "blimp/accuracy/npi_present_1": 0.598, "blimp/accuracy/wh_vs_that_no_gap": 0.972, "blimp/accuracy/left_branch_island_echo_question": 0.544, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.966, "blimp/accuracy/causative": 0.725, "blimp/accuracy/group_average": 0.7752686567164179, "blimp/accuracy/seq_average": 0.7752686567164179, "cbt/accuracy/NE": 0.7516025641025641, "cbt/accuracy/V": 0.912, "cbt/accuracy/CN": 0.8224, "cbt/accuracy/P": 0.8848, "cbt/accuracy/group_average": 0.842700641025641, "cbt/accuracy/seq_average": 0.8427370948379351, "hellaswag/accuracy/val": 0.2945628360884286, "hellaswag/accuracy/group_average": 0.2945628360884286, "hellaswag/accuracy/seq_average": 0.2945628360884286, "piqa/accuracy/val": 0.5816104461371056, "piqa/accuracy/group_average": 0.5816104461371056, "piqa/accuracy/seq_average": 0.5816104461371056, "ai2arc/accuracy/ARC-Easy": 0.33742071881606767, "ai2arc/accuracy/ARC-Challenge": 0.20772532188841203, "ai2arc/accuracy/group_average": 0.27257302035223985, "ai2arc/accuracy/seq_average": 0.29461756373937675, "mmlu/accuracy/MMLU": 0.2592062924562031, "mmlu/accuracy/group_average": 0.2592062924562031, "mmlu/accuracy/seq_average": 0.2592062924562031, "openbookqa/accuracy/test": 0.266, "openbookqa/accuracy/group_average": 0.266, "openbookqa/accuracy/seq_average": 0.266, "race/accuracy/test/high": 0.2747284162378502, "race/accuracy/test/middle": 0.34331476323119775, "race/accuracy/group_average": 0.309021589734524, "race/accuracy/seq_average": 0.29468990676935547, "siqa/accuracy/dev": 0.36284544524053225, "siqa/accuracy/group_average": 0.36284544524053225, "siqa/accuracy/seq_average": 0.36284544524053225, "winogrande/accuracy/dev": 0.516179952644041, "winogrande/accuracy/group_average": 0.516179952644041, "winogrande/accuracy/seq_average": 0.516179952644041, "commonsenseqa/accuracy/dev_rand_split": 0.25061425061425063, "commonsenseqa/accuracy/group_average": 0.25061425061425063, "commonsenseqa/accuracy/seq_average": 0.25061425061425063}