Upload folder using huggingface_hub
#329
by
DavidNguyen
- opened
- Pretrain_language_model/save/slimpajama_moe_no_attmoe_660M_standardlb_remoe/export/result-model-340000.pth.json +112 -0
- Pretrain_language_model/save/slimpajama_moe_no_attmoe_660M_standardlb_remoe/export/result-model-360000.pth.json +1 -0
- Pretrain_language_model/save/slimpajama_moe_no_attmoe_660M_standardlb_remoe/export/result-model-40000.pth.json +1 -0
- Pretrain_language_model/save/slimpajama_moe_no_attmoe_660M_standardlb_remoe/export/result-model-400000.pth.json +112 -0
Pretrain_language_model/save/slimpajama_moe_no_attmoe_660M_standardlb_remoe/export/result-model-340000.pth.json
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"val/loss": 2.2663046216207836,
|
| 3 |
+
"val/accuracy": 0.526702396453373,
|
| 4 |
+
"val/perplexity": 9.643697774330832,
|
| 5 |
+
"val/time_since_best_loss": 0,
|
| 6 |
+
"val/time_since_best_accuracy": 0,
|
| 7 |
+
"lambada/loss": 2.357945412582492,
|
| 8 |
+
"lambada/accuracy/total": 0.3825698757763975,
|
| 9 |
+
"lambada/accuracy/openai_last_token": 0.8060947204968945,
|
| 10 |
+
"lambada/perplexity": 6.443450138786187,
|
| 11 |
+
"lambada/lm_loss": 2.8742456989123397,
|
| 12 |
+
"lambada/lm_perplexity": 17.71205885611838,
|
| 13 |
+
"lambada/time_since_best_loss": 0,
|
| 14 |
+
"lambada/time_since_best_accuracy": 0,
|
| 15 |
+
"mean_accuracy": 0.4546361361148853,
|
| 16 |
+
"mean_loss": 2.312125017101638,
|
| 17 |
+
"blimp/accuracy/passive_2": 0.911,
|
| 18 |
+
"blimp/accuracy/determiner_noun_agreement_2": 0.984,
|
| 19 |
+
"blimp/accuracy/ellipsis_n_bar_1": 0.806,
|
| 20 |
+
"blimp/accuracy/tough_vs_raising_2": 0.899,
|
| 21 |
+
"blimp/accuracy/tough_vs_raising_1": 0.628,
|
| 22 |
+
"blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.899,
|
| 23 |
+
"blimp/accuracy/principle_A_reconstruction": 0.314,
|
| 24 |
+
"blimp/accuracy/wh_vs_that_with_gap": 0.485,
|
| 25 |
+
"blimp/accuracy/principle_A_domain_2": 0.875,
|
| 26 |
+
"blimp/accuracy/determiner_noun_agreement_1": 0.994,
|
| 27 |
+
"blimp/accuracy/ellipsis_n_bar_2": 0.908,
|
| 28 |
+
"blimp/accuracy/principle_A_domain_3": 0.634,
|
| 29 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.937,
|
| 30 |
+
"blimp/accuracy/animate_subject_trans": 0.928,
|
| 31 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.947,
|
| 32 |
+
"blimp/accuracy/distractor_agreement_relative_clause": 0.722,
|
| 33 |
+
"blimp/accuracy/transitive": 0.88,
|
| 34 |
+
"blimp/accuracy/sentential_subject_island": 0.352,
|
| 35 |
+
"blimp/accuracy/adjunct_island": 0.836,
|
| 36 |
+
"blimp/accuracy/intransitive": 0.751,
|
| 37 |
+
"blimp/accuracy/existential_there_subject_raising": 0.886,
|
| 38 |
+
"blimp/accuracy/irregular_past_participle_adjectives": 0.949,
|
| 39 |
+
"blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.747,
|
| 40 |
+
"blimp/accuracy/principle_A_case_1": 1.0,
|
| 41 |
+
"blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.328,
|
| 42 |
+
"blimp/accuracy/only_npi_scope": 0.638,
|
| 43 |
+
"blimp/accuracy/superlative_quantifiers_2": 0.848,
|
| 44 |
+
"blimp/accuracy/passive_1": 0.92,
|
| 45 |
+
"blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.941,
|
| 46 |
+
"blimp/accuracy/inchoative": 0.616,
|
| 47 |
+
"blimp/accuracy/anaphor_gender_agreement": 0.981,
|
| 48 |
+
"blimp/accuracy/principle_A_c_command": 0.661,
|
| 49 |
+
"blimp/accuracy/only_npi_licensor_present": 0.611,
|
| 50 |
+
"blimp/accuracy/expletive_it_object_raising": 0.801,
|
| 51 |
+
"blimp/accuracy/left_branch_island_simple_question": 0.787,
|
| 52 |
+
"blimp/accuracy/wh_questions_subject_gap": 0.941,
|
| 53 |
+
"blimp/accuracy/existential_there_quantifiers_2": 0.489,
|
| 54 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.939,
|
| 55 |
+
"blimp/accuracy/sentential_negation_npi_scope": 0.647,
|
| 56 |
+
"blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.839,
|
| 57 |
+
"blimp/accuracy/wh_questions_subject_gap_long_distance": 0.911,
|
| 58 |
+
"blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.92,
|
| 59 |
+
"blimp/accuracy/principle_A_case_2": 0.952,
|
| 60 |
+
"blimp/accuracy/distractor_agreement_relational_noun": 0.858,
|
| 61 |
+
"blimp/accuracy/sentential_negation_npi_licensor_present": 0.989,
|
| 62 |
+
"blimp/accuracy/superlative_quantifiers_1": 0.84,
|
| 63 |
+
"blimp/accuracy/wh_island": 0.805,
|
| 64 |
+
"blimp/accuracy/principle_A_domain_1": 0.988,
|
| 65 |
+
"blimp/accuracy/complex_NP_island": 0.595,
|
| 66 |
+
"blimp/accuracy/determiner_noun_agreement_irregular_2": 0.977,
|
| 67 |
+
"blimp/accuracy/irregular_past_participle_verbs": 0.913,
|
| 68 |
+
"blimp/accuracy/drop_argument": 0.717,
|
| 69 |
+
"blimp/accuracy/wh_questions_object_gap": 0.85,
|
| 70 |
+
"blimp/accuracy/animate_subject_passive": 0.812,
|
| 71 |
+
"blimp/accuracy/existential_there_quantifiers_1": 0.979,
|
| 72 |
+
"blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.929,
|
| 73 |
+
"blimp/accuracy/npi_present_2": 0.57,
|
| 74 |
+
"blimp/accuracy/determiner_noun_agreement_irregular_1": 0.964,
|
| 75 |
+
"blimp/accuracy/anaphor_number_agreement": 0.99,
|
| 76 |
+
"blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.965,
|
| 77 |
+
"blimp/accuracy/existential_there_object_raising": 0.87,
|
| 78 |
+
"blimp/accuracy/matrix_question_npi_licensor_present": 0.395,
|
| 79 |
+
"blimp/accuracy/npi_present_1": 0.587,
|
| 80 |
+
"blimp/accuracy/wh_vs_that_no_gap": 0.982,
|
| 81 |
+
"blimp/accuracy/left_branch_island_echo_question": 0.465,
|
| 82 |
+
"blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.979,
|
| 83 |
+
"blimp/accuracy/causative": 0.759,
|
| 84 |
+
"blimp/accuracy/group_average": 0.8032835820895522,
|
| 85 |
+
"blimp/accuracy/seq_average": 0.8032835820895522,
|
| 86 |
+
"cbt/accuracy/NE": 0.8277243589743589,
|
| 87 |
+
"cbt/accuracy/V": 0.9384,
|
| 88 |
+
"cbt/accuracy/CN": 0.89,
|
| 89 |
+
"cbt/accuracy/P": 0.9212,
|
| 90 |
+
"cbt/accuracy/group_average": 0.8943310897435897,
|
| 91 |
+
"cbt/accuracy/seq_average": 0.8943577430972389,
|
| 92 |
+
"hellaswag/accuracy/val": 0.3657637920732922,
|
| 93 |
+
"hellaswag/accuracy/group_average": 0.3657637920732922,
|
| 94 |
+
"hellaswag/accuracy/seq_average": 0.3657637920732922,
|
| 95 |
+
"piqa/accuracy/val": 0.6338411316648531,
|
| 96 |
+
"piqa/accuracy/group_average": 0.6338411316648531,
|
| 97 |
+
"piqa/accuracy/seq_average": 0.6338411316648531,
|
| 98 |
+
"ai2arc/accuracy/ARC-Easy": 0.37420718816067655,
|
| 99 |
+
"ai2arc/accuracy/ARC-Challenge": 0.2334763948497854,
|
| 100 |
+
"ai2arc/accuracy/group_average": 0.303841791505231,
|
| 101 |
+
"ai2arc/accuracy/seq_average": 0.3277620396600567,
|
| 102 |
+
"race/accuracy/test/high": 0.2878787878787879,
|
| 103 |
+
"race/accuracy/test/middle": 0.3767409470752089,
|
| 104 |
+
"race/accuracy/group_average": 0.3323098674769984,
|
| 105 |
+
"race/accuracy/seq_average": 0.3137413862991488,
|
| 106 |
+
"siqa/accuracy/dev": 0.37615148413510746,
|
| 107 |
+
"siqa/accuracy/group_average": 0.37615148413510746,
|
| 108 |
+
"siqa/accuracy/seq_average": 0.37615148413510746,
|
| 109 |
+
"commonsenseqa/accuracy/dev_rand_split": 0.27764127764127766,
|
| 110 |
+
"commonsenseqa/accuracy/group_average": 0.27764127764127766,
|
| 111 |
+
"commonsenseqa/accuracy/seq_average": 0.27764127764127766
|
| 112 |
+
}
|
Pretrain_language_model/save/slimpajama_moe_no_attmoe_660M_standardlb_remoe/export/result-model-360000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.260709732297867, "val/accuracy": 0.5283164372519841, "val/perplexity": 9.589893008826998, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.3385417298500584, "lambada/accuracy/total": 0.37558229813664595, "lambada/accuracy/openai_last_token": 0.8072593167701864, "lambada/perplexity": 6.459880642874371, "lambada/lm_loss": 2.8613641744698417, "lambada/lm_perplexity": 17.485363760928298, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.45194936769431504, "mean_loss": 2.299625731073963, "blimp/accuracy/passive_2": 0.907, "blimp/accuracy/determiner_noun_agreement_2": 0.988, "blimp/accuracy/ellipsis_n_bar_1": 0.826, "blimp/accuracy/tough_vs_raising_2": 0.878, "blimp/accuracy/tough_vs_raising_1": 0.621, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.894, "blimp/accuracy/principle_A_reconstruction": 0.316, "blimp/accuracy/wh_vs_that_with_gap": 0.495, "blimp/accuracy/principle_A_domain_2": 0.873, "blimp/accuracy/determiner_noun_agreement_1": 0.99, "blimp/accuracy/ellipsis_n_bar_2": 0.904, "blimp/accuracy/principle_A_domain_3": 0.63, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.93, "blimp/accuracy/animate_subject_trans": 0.917, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.943, "blimp/accuracy/distractor_agreement_relative_clause": 0.713, "blimp/accuracy/transitive": 0.892, "blimp/accuracy/sentential_subject_island": 0.354, "blimp/accuracy/adjunct_island": 0.826, "blimp/accuracy/intransitive": 0.767, "blimp/accuracy/existential_there_subject_raising": 0.892, "blimp/accuracy/irregular_past_participle_adjectives": 0.905, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.712, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.372, "blimp/accuracy/only_npi_scope": 0.656, "blimp/accuracy/superlative_quantifiers_2": 0.81, "blimp/accuracy/passive_1": 0.917, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.927, "blimp/accuracy/inchoative": 0.634, "blimp/accuracy/anaphor_gender_agreement": 0.978, "blimp/accuracy/principle_A_c_command": 0.69, "blimp/accuracy/only_npi_licensor_present": 0.689, "blimp/accuracy/expletive_it_object_raising": 0.781, "blimp/accuracy/left_branch_island_simple_question": 0.742, "blimp/accuracy/wh_questions_subject_gap": 0.952, "blimp/accuracy/existential_there_quantifiers_2": 0.466, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.944, "blimp/accuracy/sentential_negation_npi_scope": 0.61, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.848, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.907, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.906, "blimp/accuracy/principle_A_case_2": 0.95, "blimp/accuracy/distractor_agreement_relational_noun": 0.852, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.991, "blimp/accuracy/superlative_quantifiers_1": 0.853, "blimp/accuracy/wh_island": 0.776, "blimp/accuracy/principle_A_domain_1": 0.991, "blimp/accuracy/complex_NP_island": 0.57, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.98, "blimp/accuracy/irregular_past_participle_verbs": 0.909, "blimp/accuracy/drop_argument": 0.71, "blimp/accuracy/wh_questions_object_gap": 0.862, "blimp/accuracy/animate_subject_passive": 0.803, "blimp/accuracy/existential_there_quantifiers_1": 0.985, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.912, "blimp/accuracy/npi_present_2": 0.565, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.957, "blimp/accuracy/anaphor_number_agreement": 0.991, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.97, "blimp/accuracy/existential_there_object_raising": 0.868, "blimp/accuracy/matrix_question_npi_licensor_present": 0.393, "blimp/accuracy/npi_present_1": 0.556, "blimp/accuracy/wh_vs_that_no_gap": 0.978, "blimp/accuracy/left_branch_island_echo_question": 0.411, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.97, "blimp/accuracy/causative": 0.755, "blimp/accuracy/group_average": 0.7994029850746269, "blimp/accuracy/seq_average": 0.7994029850746268, "cbt/accuracy/NE": 0.8261217948717948, "cbt/accuracy/V": 0.9456, "cbt/accuracy/CN": 0.8956, "cbt/accuracy/P": 0.9216, "cbt/accuracy/group_average": 0.8972304487179488, "cbt/accuracy/seq_average": 0.8972589035614246, "hellaswag/accuracy/val": 0.3686516630153356, "hellaswag/accuracy/group_average": 0.3686516630153356, "hellaswag/accuracy/seq_average": 0.3686516630153356, "piqa/accuracy/val": 0.6398258977149075, "piqa/accuracy/group_average": 0.6398258977149075, "piqa/accuracy/seq_average": 0.6398258977149075, "ai2arc/accuracy/ARC-Easy": 0.3763213530655391, "ai2arc/accuracy/ARC-Challenge": 0.23090128755364808, "ai2arc/accuracy/group_average": 0.3036113203095936, "ai2arc/accuracy/seq_average": 0.328328611898017, "race/accuracy/test/high": 0.2904516866781018, "race/accuracy/test/middle": 0.3697771587743733, "race/accuracy/group_average": 0.33011442272623753, "race/accuracy/seq_average": 0.313538710985002, "siqa/accuracy/dev": 0.3781985670419652, "siqa/accuracy/group_average": 0.3781985670419652, "siqa/accuracy/seq_average": 0.3781985670419652, "commonsenseqa/accuracy/dev_rand_split": 0.2800982800982801, "commonsenseqa/accuracy/group_average": 0.2800982800982801, "commonsenseqa/accuracy/seq_average": 0.2800982800982801}
|
Pretrain_language_model/save/slimpajama_moe_no_attmoe_660M_standardlb_remoe/export/result-model-40000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.5876406715029763, "val/accuracy": 0.4782288566468254, "val/perplexity": 13.298359363660653, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.6465018136160716, "lambada/accuracy/total": 0.2843555900621118, "lambada/accuracy/openai_last_token": 0.7635869565217391, "lambada/perplexity": 10.601802160659874, "lambada/lm_loss": 3.1739570216002497, "lambada/lm_perplexity": 23.901877721398986, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.3812922233544686, "mean_loss": 2.6170712425595237, "blimp/accuracy/passive_2": 0.908, "blimp/accuracy/determiner_noun_agreement_2": 0.979, "blimp/accuracy/ellipsis_n_bar_1": 0.785, "blimp/accuracy/tough_vs_raising_2": 0.842, "blimp/accuracy/tough_vs_raising_1": 0.656, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.897, "blimp/accuracy/principle_A_reconstruction": 0.361, "blimp/accuracy/wh_vs_that_with_gap": 0.445, "blimp/accuracy/principle_A_domain_2": 0.815, "blimp/accuracy/determiner_noun_agreement_1": 0.988, "blimp/accuracy/ellipsis_n_bar_2": 0.922, "blimp/accuracy/principle_A_domain_3": 0.633, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.914, "blimp/accuracy/animate_subject_trans": 0.894, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.897, "blimp/accuracy/distractor_agreement_relative_clause": 0.613, "blimp/accuracy/transitive": 0.851, "blimp/accuracy/sentential_subject_island": 0.339, "blimp/accuracy/adjunct_island": 0.839, "blimp/accuracy/intransitive": 0.724, "blimp/accuracy/existential_there_subject_raising": 0.831, "blimp/accuracy/irregular_past_participle_adjectives": 0.991, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.584, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.243, "blimp/accuracy/only_npi_scope": 0.643, "blimp/accuracy/superlative_quantifiers_2": 0.742, "blimp/accuracy/passive_1": 0.898, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.918, "blimp/accuracy/inchoative": 0.579, "blimp/accuracy/anaphor_gender_agreement": 0.961, "blimp/accuracy/principle_A_c_command": 0.675, "blimp/accuracy/only_npi_licensor_present": 0.639, "blimp/accuracy/expletive_it_object_raising": 0.78, "blimp/accuracy/left_branch_island_simple_question": 0.659, "blimp/accuracy/wh_questions_subject_gap": 0.944, "blimp/accuracy/existential_there_quantifiers_2": 0.381, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.926, "blimp/accuracy/sentential_negation_npi_scope": 0.563, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.792, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.93, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.883, "blimp/accuracy/principle_A_case_2": 0.916, "blimp/accuracy/distractor_agreement_relational_noun": 0.859, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.996, "blimp/accuracy/superlative_quantifiers_1": 0.729, "blimp/accuracy/wh_island": 0.726, "blimp/accuracy/principle_A_domain_1": 0.97, "blimp/accuracy/complex_NP_island": 0.569, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.969, "blimp/accuracy/irregular_past_participle_verbs": 0.885, "blimp/accuracy/drop_argument": 0.737, "blimp/accuracy/wh_questions_object_gap": 0.836, "blimp/accuracy/animate_subject_passive": 0.798, "blimp/accuracy/existential_there_quantifiers_1": 0.966, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.908, "blimp/accuracy/npi_present_2": 0.542, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.937, "blimp/accuracy/anaphor_number_agreement": 0.981, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.966, "blimp/accuracy/existential_there_object_raising": 0.832, "blimp/accuracy/matrix_question_npi_licensor_present": 0.264, "blimp/accuracy/npi_present_1": 0.484, "blimp/accuracy/wh_vs_that_no_gap": 0.985, "blimp/accuracy/left_branch_island_echo_question": 0.401, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.982, "blimp/accuracy/causative": 0.688, "blimp/accuracy/group_average": 0.7729850746268658, "blimp/accuracy/seq_average": 0.7729850746268657, "cbt/accuracy/NE": 0.7752403846153846, "cbt/accuracy/V": 0.9176, "cbt/accuracy/CN": 0.8332, "cbt/accuracy/P": 0.8952, "cbt/accuracy/group_average": 0.8553100961538461, "cbt/accuracy/seq_average": 0.8553421368547419, "hellaswag/accuracy/val": 0.30033857797251545, "hellaswag/accuracy/group_average": 0.30033857797251545, "hellaswag/accuracy/seq_average": 0.30033857797251545, "piqa/accuracy/val": 0.6050054406964092, "piqa/accuracy/group_average": 0.6050054406964092, "piqa/accuracy/seq_average": 0.6050054406964092, "ai2arc/accuracy/ARC-Easy": 0.3412262156448203, "ai2arc/accuracy/ARC-Challenge": 0.21545064377682405, "ai2arc/accuracy/group_average": 0.2783384297108222, "ai2arc/accuracy/seq_average": 0.29971671388101984, "race/accuracy/test/high": 0.26300743281875355, "race/accuracy/test/middle": 0.3363509749303621, "race/accuracy/group_average": 0.2996792038745578, "race/accuracy/seq_average": 0.2843534657478719, "siqa/accuracy/dev": 0.35516888433981575, "siqa/accuracy/group_average": 0.35516888433981575, "siqa/accuracy/seq_average": 0.35516888433981575, "commonsenseqa/accuracy/dev_rand_split": 0.26044226044226043, "commonsenseqa/accuracy/group_average": 0.26044226044226043, "commonsenseqa/accuracy/seq_average": 0.26044226044226043}
|
Pretrain_language_model/save/slimpajama_moe_no_attmoe_660M_standardlb_remoe/export/result-model-400000.pth.json
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"val/loss": 2.252528357127356,
|
| 3 |
+
"val/accuracy": 0.5296892438616071,
|
| 4 |
+
"val/perplexity": 9.511754572058493,
|
| 5 |
+
"val/time_since_best_loss": 0,
|
| 6 |
+
"val/time_since_best_accuracy": 0,
|
| 7 |
+
"lambada/loss": 2.396760170504173,
|
| 8 |
+
"lambada/accuracy/total": 0.3705357142857143,
|
| 9 |
+
"lambada/accuracy/openai_last_token": 0.8082298136645962,
|
| 10 |
+
"lambada/perplexity": 6.511672332228969,
|
| 11 |
+
"lambada/lm_loss": 2.857919794746293,
|
| 12 |
+
"lambada/lm_perplexity": 17.425241130555758,
|
| 13 |
+
"lambada/time_since_best_loss": 0,
|
| 14 |
+
"lambada/time_since_best_accuracy": 0,
|
| 15 |
+
"mean_accuracy": 0.4501124790736607,
|
| 16 |
+
"mean_loss": 2.3246442638157645,
|
| 17 |
+
"blimp/accuracy/passive_2": 0.907,
|
| 18 |
+
"blimp/accuracy/determiner_noun_agreement_2": 0.986,
|
| 19 |
+
"blimp/accuracy/ellipsis_n_bar_1": 0.813,
|
| 20 |
+
"blimp/accuracy/tough_vs_raising_2": 0.886,
|
| 21 |
+
"blimp/accuracy/tough_vs_raising_1": 0.636,
|
| 22 |
+
"blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.921,
|
| 23 |
+
"blimp/accuracy/principle_A_reconstruction": 0.347,
|
| 24 |
+
"blimp/accuracy/wh_vs_that_with_gap": 0.471,
|
| 25 |
+
"blimp/accuracy/principle_A_domain_2": 0.87,
|
| 26 |
+
"blimp/accuracy/determiner_noun_agreement_1": 0.995,
|
| 27 |
+
"blimp/accuracy/ellipsis_n_bar_2": 0.907,
|
| 28 |
+
"blimp/accuracy/principle_A_domain_3": 0.656,
|
| 29 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.936,
|
| 30 |
+
"blimp/accuracy/animate_subject_trans": 0.916,
|
| 31 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.941,
|
| 32 |
+
"blimp/accuracy/distractor_agreement_relative_clause": 0.719,
|
| 33 |
+
"blimp/accuracy/transitive": 0.899,
|
| 34 |
+
"blimp/accuracy/sentential_subject_island": 0.354,
|
| 35 |
+
"blimp/accuracy/adjunct_island": 0.848,
|
| 36 |
+
"blimp/accuracy/intransitive": 0.77,
|
| 37 |
+
"blimp/accuracy/existential_there_subject_raising": 0.895,
|
| 38 |
+
"blimp/accuracy/irregular_past_participle_adjectives": 0.874,
|
| 39 |
+
"blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.722,
|
| 40 |
+
"blimp/accuracy/principle_A_case_1": 1.0,
|
| 41 |
+
"blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.344,
|
| 42 |
+
"blimp/accuracy/only_npi_scope": 0.692,
|
| 43 |
+
"blimp/accuracy/superlative_quantifiers_2": 0.79,
|
| 44 |
+
"blimp/accuracy/passive_1": 0.927,
|
| 45 |
+
"blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.926,
|
| 46 |
+
"blimp/accuracy/inchoative": 0.618,
|
| 47 |
+
"blimp/accuracy/anaphor_gender_agreement": 0.981,
|
| 48 |
+
"blimp/accuracy/principle_A_c_command": 0.676,
|
| 49 |
+
"blimp/accuracy/only_npi_licensor_present": 0.692,
|
| 50 |
+
"blimp/accuracy/expletive_it_object_raising": 0.787,
|
| 51 |
+
"blimp/accuracy/left_branch_island_simple_question": 0.768,
|
| 52 |
+
"blimp/accuracy/wh_questions_subject_gap": 0.938,
|
| 53 |
+
"blimp/accuracy/existential_there_quantifiers_2": 0.451,
|
| 54 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.945,
|
| 55 |
+
"blimp/accuracy/sentential_negation_npi_scope": 0.616,
|
| 56 |
+
"blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.837,
|
| 57 |
+
"blimp/accuracy/wh_questions_subject_gap_long_distance": 0.909,
|
| 58 |
+
"blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.916,
|
| 59 |
+
"blimp/accuracy/principle_A_case_2": 0.972,
|
| 60 |
+
"blimp/accuracy/distractor_agreement_relational_noun": 0.857,
|
| 61 |
+
"blimp/accuracy/sentential_negation_npi_licensor_present": 0.991,
|
| 62 |
+
"blimp/accuracy/superlative_quantifiers_1": 0.787,
|
| 63 |
+
"blimp/accuracy/wh_island": 0.803,
|
| 64 |
+
"blimp/accuracy/principle_A_domain_1": 0.988,
|
| 65 |
+
"blimp/accuracy/complex_NP_island": 0.564,
|
| 66 |
+
"blimp/accuracy/determiner_noun_agreement_irregular_2": 0.975,
|
| 67 |
+
"blimp/accuracy/irregular_past_participle_verbs": 0.915,
|
| 68 |
+
"blimp/accuracy/drop_argument": 0.719,
|
| 69 |
+
"blimp/accuracy/wh_questions_object_gap": 0.855,
|
| 70 |
+
"blimp/accuracy/animate_subject_passive": 0.805,
|
| 71 |
+
"blimp/accuracy/existential_there_quantifiers_1": 0.979,
|
| 72 |
+
"blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.903,
|
| 73 |
+
"blimp/accuracy/npi_present_2": 0.58,
|
| 74 |
+
"blimp/accuracy/determiner_noun_agreement_irregular_1": 0.963,
|
| 75 |
+
"blimp/accuracy/anaphor_number_agreement": 0.995,
|
| 76 |
+
"blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.972,
|
| 77 |
+
"blimp/accuracy/existential_there_object_raising": 0.872,
|
| 78 |
+
"blimp/accuracy/matrix_question_npi_licensor_present": 0.417,
|
| 79 |
+
"blimp/accuracy/npi_present_1": 0.575,
|
| 80 |
+
"blimp/accuracy/wh_vs_that_no_gap": 0.981,
|
| 81 |
+
"blimp/accuracy/left_branch_island_echo_question": 0.423,
|
| 82 |
+
"blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.976,
|
| 83 |
+
"blimp/accuracy/causative": 0.744,
|
| 84 |
+
"blimp/accuracy/group_average": 0.8013880597014924,
|
| 85 |
+
"blimp/accuracy/seq_average": 0.8013880597014925,
|
| 86 |
+
"cbt/accuracy/NE": 0.8293269230769231,
|
| 87 |
+
"cbt/accuracy/V": 0.9432,
|
| 88 |
+
"cbt/accuracy/CN": 0.8912,
|
| 89 |
+
"cbt/accuracy/P": 0.9252,
|
| 90 |
+
"cbt/accuracy/group_average": 0.8972317307692308,
|
| 91 |
+
"cbt/accuracy/seq_average": 0.8972589035614246,
|
| 92 |
+
"hellaswag/accuracy/val": 0.37134037044413465,
|
| 93 |
+
"hellaswag/accuracy/group_average": 0.37134037044413465,
|
| 94 |
+
"hellaswag/accuracy/seq_average": 0.37134037044413465,
|
| 95 |
+
"piqa/accuracy/val": 0.6365614798694232,
|
| 96 |
+
"piqa/accuracy/group_average": 0.6365614798694232,
|
| 97 |
+
"piqa/accuracy/seq_average": 0.6365614798694232,
|
| 98 |
+
"ai2arc/accuracy/ARC-Easy": 0.38012684989429174,
|
| 99 |
+
"ai2arc/accuracy/ARC-Challenge": 0.2257510729613734,
|
| 100 |
+
"ai2arc/accuracy/group_average": 0.30293896142783255,
|
| 101 |
+
"ai2arc/accuracy/seq_average": 0.3291784702549575,
|
| 102 |
+
"race/accuracy/test/high": 0.29273870783304745,
|
| 103 |
+
"race/accuracy/test/middle": 0.36559888579387184,
|
| 104 |
+
"race/accuracy/group_average": 0.3291687968134597,
|
| 105 |
+
"race/accuracy/seq_average": 0.3139440616132955,
|
| 106 |
+
"siqa/accuracy/dev": 0.37871033776867963,
|
| 107 |
+
"siqa/accuracy/group_average": 0.37871033776867963,
|
| 108 |
+
"siqa/accuracy/seq_average": 0.37871033776867963,
|
| 109 |
+
"commonsenseqa/accuracy/dev_rand_split": 0.2784602784602785,
|
| 110 |
+
"commonsenseqa/accuracy/group_average": 0.2784602784602785,
|
| 111 |
+
"commonsenseqa/accuracy/seq_average": 0.2784602784602785
|
| 112 |
+
}
|