Upload folder using huggingface_hub (#3760)
Browse files- a29469e5e3ede53328d59444f7d0819ffb64f56fa6a7aa7f76678c2b04a6e803 (c80c3383fdb1f526a6695827c5e0141ac08fa389)
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/export/result-model-2000000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.345306396484375, "val/accuracy": 0.5136234343998016, "val/perplexity": 10.436469935414774, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.367531154466712, "lambada/accuracy/total": 0.3125, "lambada/accuracy/openai_last_token": 0.7882375776397516, "lambada/perplexity": 8.643184532218681, "lambada/lm_loss": 2.9319599466122455, "lambada/lm_perplexity": 18.764371646204605, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4130617171999008, "mean_loss": 2.3564187754755435, "blimp/accuracy/passive_2": 0.897, "blimp/accuracy/determiner_noun_agreement_2": 0.982, "blimp/accuracy/ellipsis_n_bar_1": 0.845, "blimp/accuracy/tough_vs_raising_2": 0.887, "blimp/accuracy/tough_vs_raising_1": 0.572, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.895, "blimp/accuracy/principle_A_reconstruction": 0.433, "blimp/accuracy/wh_vs_that_with_gap": 0.491, "blimp/accuracy/principle_A_domain_2": 0.886, "blimp/accuracy/determiner_noun_agreement_1": 0.993, "blimp/accuracy/ellipsis_n_bar_2": 0.891, "blimp/accuracy/principle_A_domain_3": 0.617, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.937, "blimp/accuracy/animate_subject_trans": 0.902, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.926, "blimp/accuracy/distractor_agreement_relative_clause": 0.684, "blimp/accuracy/transitive": 0.876, "blimp/accuracy/sentential_subject_island": 0.35, "blimp/accuracy/adjunct_island": 0.878, "blimp/accuracy/intransitive": 0.757, "blimp/accuracy/existential_there_subject_raising": 0.878, "blimp/accuracy/irregular_past_participle_adjectives": 0.869, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.728, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.382, "blimp/accuracy/only_npi_scope": 0.728, "blimp/accuracy/superlative_quantifiers_2": 0.741, "blimp/accuracy/passive_1": 0.903, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.933, "blimp/accuracy/inchoative": 0.624, "blimp/accuracy/anaphor_gender_agreement": 0.972, "blimp/accuracy/principle_A_c_command": 0.678, "blimp/accuracy/only_npi_licensor_present": 0.561, "blimp/accuracy/expletive_it_object_raising": 0.77, "blimp/accuracy/left_branch_island_simple_question": 0.826, "blimp/accuracy/wh_questions_subject_gap": 0.923, "blimp/accuracy/existential_there_quantifiers_2": 0.435, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.944, "blimp/accuracy/sentential_negation_npi_scope": 0.663, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.837, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.87, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.903, "blimp/accuracy/principle_A_case_2": 0.916, "blimp/accuracy/distractor_agreement_relational_noun": 0.83, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.987, "blimp/accuracy/superlative_quantifiers_1": 0.663, "blimp/accuracy/wh_island": 0.849, "blimp/accuracy/principle_A_domain_1": 0.993, "blimp/accuracy/complex_NP_island": 0.601, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.984, "blimp/accuracy/irregular_past_participle_verbs": 0.912, "blimp/accuracy/drop_argument": 0.732, "blimp/accuracy/wh_questions_object_gap": 0.827, "blimp/accuracy/animate_subject_passive": 0.781, "blimp/accuracy/existential_there_quantifiers_1": 0.965, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.912, "blimp/accuracy/npi_present_2": 0.624, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.959, "blimp/accuracy/anaphor_number_agreement": 0.984, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.969, "blimp/accuracy/existential_there_object_raising": 0.853, "blimp/accuracy/matrix_question_npi_licensor_present": 0.358, "blimp/accuracy/npi_present_1": 0.607, "blimp/accuracy/wh_vs_that_no_gap": 0.977, "blimp/accuracy/left_branch_island_echo_question": 0.552, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.965, "blimp/accuracy/causative": 0.746, "blimp/accuracy/group_average": 0.7972089552238806, "blimp/accuracy/seq_average": 0.7972089552238806, "cbt/accuracy/NE": 0.8000801282051282, "cbt/accuracy/V": 0.9356, "cbt/accuracy/CN": 0.8732, "cbt/accuracy/P": 0.9244, "cbt/accuracy/group_average": 0.8833200320512821, "cbt/accuracy/seq_average": 0.8833533413365346, "hellaswag/accuracy/val": 0.3492332204740092, "hellaswag/accuracy/group_average": 0.3492332204740092, "hellaswag/accuracy/seq_average": 0.3492332204740092, "piqa/accuracy/val": 0.6414581066376496, "piqa/accuracy/group_average": 0.6414581066376496, "piqa/accuracy/seq_average": 0.6414581066376496, "ai2arc/accuracy/ARC-Easy": 0.38308668076109936, "ai2arc/accuracy/ARC-Challenge": 0.22660944206008585, "ai2arc/accuracy/group_average": 0.3048480614105926, "ai2arc/accuracy/seq_average": 0.3314447592067989}
|
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/export/result-model-205000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.3490779816158236, "val/accuracy": 0.5128919813368056, "val/perplexity": 10.475906292288581, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.3582519152149652, "lambada/accuracy/total": 0.35384316770186336, "lambada/accuracy/openai_last_token": 0.796777950310559, "lambada/perplexity": 7.179454384541888, "lambada/lm_loss": 2.9343860198674525, "lambada/lm_perplexity": 18.809950653055125, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4333675745193345, "mean_loss": 2.3536649484153944, "blimp/accuracy/passive_2": 0.896, "blimp/accuracy/determiner_noun_agreement_2": 0.979, "blimp/accuracy/ellipsis_n_bar_1": 0.837, "blimp/accuracy/tough_vs_raising_2": 0.875, "blimp/accuracy/tough_vs_raising_1": 0.57, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.928, "blimp/accuracy/principle_A_reconstruction": 0.451, "blimp/accuracy/wh_vs_that_with_gap": 0.45, "blimp/accuracy/principle_A_domain_2": 0.903, "blimp/accuracy/determiner_noun_agreement_1": 0.99, "blimp/accuracy/ellipsis_n_bar_2": 0.917, "blimp/accuracy/principle_A_domain_3": 0.653, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.937, "blimp/accuracy/animate_subject_trans": 0.908, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.93, "blimp/accuracy/distractor_agreement_relative_clause": 0.658, "blimp/accuracy/transitive": 0.889, "blimp/accuracy/sentential_subject_island": 0.301, "blimp/accuracy/adjunct_island": 0.888, "blimp/accuracy/intransitive": 0.769, "blimp/accuracy/existential_there_subject_raising": 0.892, "blimp/accuracy/irregular_past_participle_adjectives": 0.93, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.694, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.279, "blimp/accuracy/only_npi_scope": 0.761, "blimp/accuracy/superlative_quantifiers_2": 0.782, "blimp/accuracy/passive_1": 0.88, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.924, "blimp/accuracy/inchoative": 0.624, "blimp/accuracy/anaphor_gender_agreement": 0.979, "blimp/accuracy/principle_A_c_command": 0.613, "blimp/accuracy/only_npi_licensor_present": 0.681, "blimp/accuracy/expletive_it_object_raising": 0.796, "blimp/accuracy/left_branch_island_simple_question": 0.789, "blimp/accuracy/wh_questions_subject_gap": 0.924, "blimp/accuracy/existential_there_quantifiers_2": 0.53, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.937, "blimp/accuracy/sentential_negation_npi_scope": 0.673, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.853, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.894, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.915, "blimp/accuracy/principle_A_case_2": 0.915, "blimp/accuracy/distractor_agreement_relational_noun": 0.811, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.99, "blimp/accuracy/superlative_quantifiers_1": 0.642, "blimp/accuracy/wh_island": 0.848, "blimp/accuracy/principle_A_domain_1": 0.994, "blimp/accuracy/complex_NP_island": 0.612, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.98, "blimp/accuracy/irregular_past_participle_verbs": 0.922, "blimp/accuracy/drop_argument": 0.727, "blimp/accuracy/wh_questions_object_gap": 0.796, "blimp/accuracy/animate_subject_passive": 0.797, "blimp/accuracy/existential_there_quantifiers_1": 0.967, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.882, "blimp/accuracy/npi_present_2": 0.612, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.952, "blimp/accuracy/anaphor_number_agreement": 0.985, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.97, "blimp/accuracy/existential_there_object_raising": 0.84, "blimp/accuracy/matrix_question_npi_licensor_present": 0.411, "blimp/accuracy/npi_present_1": 0.614, "blimp/accuracy/wh_vs_that_no_gap": 0.977, "blimp/accuracy/left_branch_island_echo_question": 0.547, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.973, "blimp/accuracy/causative": 0.746, "blimp/accuracy/group_average": 0.7998358208955221, "blimp/accuracy/seq_average": 0.7998358208955224, "cbt/accuracy/NE": 0.8169070512820513, "cbt/accuracy/V": 0.9324, "cbt/accuracy/CN": 0.8724, "cbt/accuracy/P": 0.9152, "cbt/accuracy/group_average": 0.8842267628205128, "cbt/accuracy/seq_average": 0.8842537014805922, "hellaswag/accuracy/val": 0.3480382393945429, "hellaswag/accuracy/group_average": 0.3480382393945429, "hellaswag/accuracy/seq_average": 0.3480382393945429, "piqa/accuracy/val": 0.6327529923830251, "piqa/accuracy/group_average": 0.6327529923830251, "piqa/accuracy/seq_average": 0.6327529923830251, "ai2arc/accuracy/ARC-Easy": 0.38097251585623676, "ai2arc/accuracy/ARC-Challenge": 0.22832618025751072, "ai2arc/accuracy/group_average": 0.3046493480568737, "ai2arc/accuracy/seq_average": 0.3305949008498584}
|